Skip to content

Commit

Permalink
Set up structure for two new spectrograms
Browse files Browse the repository at this point in the history
Issue #332 Set up basic structure for including mel-scale and octave-scale spectrograms in the type of generated standard scale spectrograms..
  • Loading branch information
towsey committed Jul 30, 2020
1 parent 115b610 commit d9688f9
Show file tree
Hide file tree
Showing 5 changed files with 368 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/AnalysisConfigFiles/Towsey.SpectrogramGenerator.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ Images:
- DecibelSpectrogramNoiseReduced
- Experimental
- DifferenceSpectrogram
- MelScaleSpectrogram
- CepstralSpectrogram
- OctaveScaleSpectrogram
- AmplitudeSpectrogramLocalContrastNormalization


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,15 +190,31 @@ public static AudioToSonogramResult GenerateSpectrogramImages(
}
}

// IMAGE 6) Cepstral Spectrogram
// IMAGE 6) Mel-frequency Spectrogram
if (@do.Contains(MelScaleSpectrogram))
{
images.Add(
MelScaleSpectrogram,
GetMelScaleSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
}

// IMAGE 7) Cepstral Spectrogram
if (@do.Contains(CepstralSpectrogram))
{
images.Add(
CepstralSpectrogram,
GetCepstralSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
}

// IMAGE 7) AmplitudeSpectrogram_LocalContrastNormalization
// IMAGE 8) Octave-frequency scale Spectrogram
if (@do.Contains(OctaveScaleSpectrogram))
{
images.Add(
OctaveScaleSpectrogram,
GetOctaveScaleSpectrogram(sonoConfig, recordingSegment, sourceRecordingName));
}

// IMAGE 9) AmplitudeSpectrogram_LocalContrastNormalization
if (@do.Contains(AmplitudeSpectrogramLocalContrastNormalization))
{
var neighborhoodSeconds = config.NeighborhoodSeconds;
Expand Down Expand Up @@ -340,6 +356,28 @@ public static Image<Rgb24> GetDecibelSpectrogram_Ridges(
return image;
}

public static Image<Rgb24> GetMelScaleSpectrogram(
SonogramConfig sonoConfig,
AudioRecording recording,
string sourceRecordingName)
{
// TODO at present noise reduction type must be set = Standard.
sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
sonoConfig.NoiseReductionParameter = 3.0;
var melFreqGram = new SpectrogramMelScale(sonoConfig, recording.WavReader);
var image = melFreqGram.GetImage();
var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
"MEL-FREQUENCY SPECTROGRAM " + sourceRecordingName,
image.Width,
ImageTags[CepstralSpectrogram]);
var startTime = TimeSpan.Zero;
var xAxisTicInterval = TimeSpan.FromSeconds(1);
TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate);
var labelInterval = TimeSpan.FromSeconds(5);
image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval);
return image;
}

public static Image<Rgb24> GetCepstralSpectrogram(
SonogramConfig sonoConfig,
AudioRecording recording,
Expand All @@ -362,6 +400,28 @@ public static Image<Rgb24> GetCepstralSpectrogram(
return image;
}

public static Image<Rgb24> GetOctaveScaleSpectrogram(
SonogramConfig sonoConfig,
AudioRecording recording,
string sourceRecordingName)
{
// TODO at present noise reduction type must be set = Standard.
sonoConfig.NoiseReductionType = NoiseReductionType.Standard;
sonoConfig.NoiseReductionParameter = 3.0;
var octaveScaleGram = new SpectrogramOctaveScale(sonoConfig, recording.WavReader);
var image = octaveScaleGram.GetImage();
var titleBar = BaseSonogram.DrawTitleBarOfGrayScaleSpectrogram(
"OCTAVE-SCALE SPECTROGRAM " + sourceRecordingName,
image.Width,
ImageTags[CepstralSpectrogram]);
var startTime = TimeSpan.Zero;
var xAxisTicInterval = TimeSpan.FromSeconds(1);
TimeSpan xAxisPixelDuration = TimeSpan.FromSeconds(sonoConfig.WindowStep / (double)sonoConfig.SampleRate);
var labelInterval = TimeSpan.FromSeconds(5);
image = BaseSonogram.FrameSonogram(image, titleBar, startTime, xAxisTicInterval, xAxisPixelDuration, labelInterval);
return image;
}

public static Image<Rgb24> GetLcnSpectrogram(
SonogramConfig sonoConfig,
AudioRecording recordingSegment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ public enum SpectrogramImageType
DecibelSpectrogramNoiseReduced = 2,
Experimental = 3,
DifferenceSpectrogram = 4,
CepstralSpectrogram = 5,
AmplitudeSpectrogramLocalContrastNormalization = 6,
MelScaleSpectrogram = 5,
CepstralSpectrogram = 6,
OctaveScaleSpectrogram = 7,
AmplitudeSpectrogramLocalContrastNormalization = 8,
}
}
126 changes: 126 additions & 0 deletions src/AudioAnalysisTools/StandardSpectrograms/SpectrogramMelScale.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// <copyright file="SpectrogramCepstral.cs" company="QutEcoacoustics">
// All code in this file and all associated files are the copyright and property of the QUT Ecoacoustics Research Group (formerly MQUTeR, and formerly QUT Bioacoustics Research Group).
// </copyright>

namespace AudioAnalysisTools.StandardSpectrograms
{
using System;
using Acoustics.Tools.Wav;
using AudioAnalysisTools.DSP;
using AudioAnalysisTools.WavTools;
using TowseyLibrary;

public class SpectrogramMelScale : BaseSonogram
{
public SpectrogramMelScale(string configFile, WavReader wav)
: this(SonogramConfig.Load(configFile), wav)
{
}

public SpectrogramMelScale(SonogramConfig config, WavReader wav)
: base(config, wav)
{
}

public SpectrogramMelScale(AmplitudeSonogram sg)
: base(sg.Configuration)
{
this.Configuration = sg.Configuration;
this.DecibelsPerFrame = sg.DecibelsPerFrame;
this.DecibelsNormalised = sg.DecibelsNormalised;
this.Duration = sg.Duration;
this.FrameCount = sg.FrameCount;
this.DecibelReference = sg.DecibelReference;
this.MaxAmplitude = sg.MaxAmplitude;
this.SampleRate = sg.SampleRate;
this.SigState = sg.SigState;
this.SnrData = sg.SnrData;
this.Data = sg.Data;

//converts amplitude matrix to cepstral sonogram
this.Make(this.Data);
}

public SpectrogramMelScale(AmplitudeSonogram sg, int minHz, int maxHz)
: this(sg)
{
this.DecibelsPerFrame = sg.DecibelsPerFrame;
this.DecibelsNormalised = sg.DecibelsNormalised;
this.Duration = sg.Duration;
this.FrameCount = sg.FrameCount;
this.DecibelReference = sg.DecibelReference;
this.MaxAmplitude = sg.MaxAmplitude;
this.SampleRate = sg.SampleRate;
this.SigState = sg.SigState;
this.SnrData = sg.SnrData;

this.Data = SpectrogramTools.ExtractFreqSubband(sg.Data, minHz, maxHz, this.Configuration.DoMelScale, sg.Configuration.FreqBinCount, sg.FBinWidth);

//converts amplitude matrix to mel-frequency scale spectrogram
this.Make(this.Data);
}

/// <summary>
/// Converts amplitude matrix to mel-frequency scale spectrogram.
/// </summary>
/// <param name="amplitudeM">Matrix of amplitude values.</param>
public override void Make(double[,] amplitudeM)
{
var tuple = MakeMelScaleSpectrogram(this.Configuration, amplitudeM, this.DecibelsNormalised, this.SampleRate);
this.Data = tuple.Item1;
this.ModalNoiseProfile = tuple.Item2; //store the full bandwidth modal noise profile
}

//##################################################################################################################################

/// <summary>
/// NOTE!!!! The decibel array has been normalised in 0 - 1.
/// </summary>
protected static Tuple<double[,], double[]> MakeMelScaleSpectrogram(SonogramConfig config, double[,] matrix, double[] decibels, int sampleRate)
{
double[,] m = matrix;
int nyquist = sampleRate / 2;
double epsilon = config.epsilon;
bool includeDelta = config.mfccConfig.IncludeDelta;
bool includeDoubleDelta = config.mfccConfig.IncludeDoubleDelta;

//(i) APPLY FILTER BANK
int bandCount = config.mfccConfig.FilterbankCount;
bool doMelScale = config.mfccConfig.DoMelScale;
int ccCount = config.mfccConfig.CcCount;
int fftBinCount = config.FreqBinCount; //number of Hz bands = 2^N +1. Subtract DC bin
int minHz = config.MinFreqBand ?? 0;
int maxHz = config.MaxFreqBand ?? nyquist;

Log.WriteIfVerbose("ApplyFilterBank(): Dim prior to filter bank =" + matrix.GetLength(1));

//error check that filterBankCount < Number of FFT bins
if (bandCount > fftBinCount)
{
throw new Exception(
"## FATAL ERROR in BaseSonogram.MakeCepstrogram():- Can't calculate cepstral coefficients. Filterbank Count > number of FFT bins. (" +
bandCount + " > " + fftBinCount + ")\n\n");
}

//this is the filter count for full bandwidth 0-Nyquist. This number is trimmed proportionately to fit the required bandwidth.
m = doMelScale ? MFCCStuff.MelFilterBank(m, bandCount, nyquist, minHz, maxHz) : MFCCStuff.LinearFilterBank(m, bandCount, nyquist, minHz, maxHz);

Log.WriteIfVerbose("\tDim after filter bank=" + m.GetLength(1) + " (Max filter bank=" + bandCount + ")");

//(ii) CONVERT AMPLITUDES TO DECIBELS
m = MFCCStuff.DecibelSpectra(m, config.WindowPower, sampleRate, epsilon); //from spectrogram

//(iii) NOISE REDUCTION
var tuple1 = SNR.NoiseReduce(m, config.NoiseReductionType, config.NoiseReductionParameter);
m = tuple1.Item1;

//(iv) Normalize Matrix Values
m = DataTools.normalise(m);

var tuple2 = Tuple.Create(m, tuple1.Item2);

// return matrix and full bandwidth modal noise profile
return tuple2;
}
} // end class SpectrogramMelScale
}
Loading

0 comments on commit d9688f9

Please sign in to comment.