Public Member Functions
static int	ailiaAudioLog1p (float[] dst, float[] src, int src_n)
	Convert the input values to a logarithmic scale. More...

static int	ailiaAudioConvertPowerToDB (float[] dst, float[] src, int src_n, float top_db)
	Convert non-negative input values to decibel scale. More...

static int	ailiaAudioGetFrameLen (ref Int32 frame_n, int sample_n, int fft_n, int hop_n, int center)
	Get the number of frames generated by the STFT. More...

static int	ailiaAudioGetSampleLen (ref Int32 sample_n, int frame_n, int freq_n, int hop_n, int center)
	Get the number of samples generated by the ISTFT. More...

static int	ailiaAudioGetWindow (float[] dst, int window_n, int win_type)
	Get the window function. More...

static int	ailiaAudioFFT (float[] dst, float[] src, int fft_n)
	Execute the FFT. More...

static int	ailiaAudioIFFT (float[] dst, float[] src, int fft_n)
	Execute the IFFT. More...

static int	ailiaAudioGetSpectrogram (float[] dst, float[] src, int sample_n, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int norm_type)
	Generate the spectrogram from the audio signal. More...

static int	ailiaAudioGetInverseSpectrogram (float[] dst, float[] src, int frame_n, int freq_n, int hop_n, int win_n, int win_type, int max_sample_n, int center, int norm_type)
	Generate an audio signal from a complex spectrogram. More...

static int	ailiaAudioGetFBMatrix (float[] dst, int freq_n, float f_min, float f_max, int mel_n, int sample_rate, int mel_norm, int mel_formula)
	Create a mel filter-bank. More...

static int	ailiaAudioGetMelSpectrogram (float[] dst, float[] src, int sample_n, int sample_rate, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int fft_norm_type, float f_min, float f_max, int mel_n, int mel_norm_type, int mel_formula)
	Generate the mel spectrogram from the audio signal. More...

static int	ailiaAudioMagPhase (float[] dst_mag, float[] dst_phase, float[] src, int freq_n, int frame_n, float power, int phase_form)
	Get the amplitude and the phase from the spectrogram. More...

static int	ailiaAudioStandardize (float[] dst, float[] src, int src_n)
	Standardize a real signal. More...

static int	ailiaAudioComplexNorm (float[] dst, float[] src, int src_n, float power)
	Get the norm of the complex signal. More...

static int	ailiaAudioConvertToMel (float[] dst, float[] src, float[] fb_mtrx, int freq_n, int frame_n, int mel_n)
	Convert the real output of the STFT to the mel scale. More...

static int	ailiaAudioFixFrameLen (float[] dst, float[] src, int freq_n, int dst_frame_n, int src_frame_n, float pad_data)
	Fix the number of time frames of a real-valued spectrogram/mel-spectrogram. More...

static int	ailiaAudioResample (float[] dst, float[] src, int dst_sample_rate, int dst_n, int src_sample_rate, int src_n)
	Resample the signal. More...

static int	ailiaAudioGetResampleLen (ref Int32 dst_sample_n, int dst_sample_rate, int src_sample_n, int src_sample_rate)
	Get the number of samples after the resampling. More...

static int	ailiaAudioLinerFilter (float[] dst, float[] src, float[] n_coef, float[] d_coef, float[] zi, int dst_n, int src_n, int n_coef_n, int d_coef_n, int zi_n)
	Apply a filter to the signal. More...

static int	ailiaAudioGetLinerFilterZiCoef (float[] dst_zi, float[] n_coef, float[] d_coef, int dst_n, int n_coef_n, int d_coef_n)
	Calculate the initial delay coefficients for filtering. More...

static int	ailiaAudioFilterFilter (float[] dst, float[] src, float[] n_coef, float[] d_coef, int dst_n, int src_n, int n_coef_n, int d_coef_n, int pad_type, int pad_len)
	Apply a zero-phase filter to the signal. More...

static int	ailiaAudioGetNonSilentPos (ref Int32 dst_start_pos, ref Int32 dst_length, float[] src, int sample_n, int win_n, int hop_n, float thr_db)
	Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input. More...

Public Attributes
const Int32	AILIA_AUDIO_WIN_TYPE_HANN = (1)

const Int32	AILIA_AUDIO_WIN_TYPE_HAMMING = (2)

const Int32	AILIA_AUDIO_STFT_CENTER_NONE = (0)

const Int32	AILIA_AUDIO_STFT_CENTER_ENABLE = (1)

const Int32	AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT = (2)

const Int32	AILIA_AUDIO_FFT_NORMALIZE_NONE = (0)

const Int32	AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT = (1)

const Int32	AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT = (1)

const Int32	AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT = (2)

const Int32	AILIA_AUDIO_MEL_NORMALIZE_NONE = (0)

const Int32	AILIA_AUDIO_MEL_NORMALIZE_ENABLE = (1)

const Int32	AILIA_AUDIO_MEL_SCALE_FORMULA_HTK = (1)

const Int32	AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE = (0)

const Int32	AILIA_AUDIO_PHASE_FORM_COMPLEX = (1)

const Int32	AILIA_AUDIO_PHASE_FORM_REAL = (0)

const Int32	AILIA_AUDIO_FILTFILT_PAD_NONE = (0)

const Int32	AILIA_AUDIO_FILTFILT_PAD_ODD = (1)

const Int32	AILIA_AUDIO_FILTFILT_PAD_EVEN = (2)

const Int32	AILIA_AUDIO_FILTFILT_PAD_CONSTANT = (3)

const String	LIBRARY_NAME = "ailia_audio"

Member Function Documentation

◆ ailiaAudioComplexNorm()

static int AiliaAudio.ailiaAudioComplexNorm	(	float []	dst,
		float []	src,
		int	src_n,
		float	power
	)

Get the norm of the complex signal.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, an array of length (2 * src_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (src_n, 2))
src_n	length of the input data
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Compute the norm of the input data. For each src_cmp = src[0] + i * src[1], tmp_dst = pow(src[0],2.0) + pow(src[1],2.0) dst[0] = pow(tmp_dst,0.5*power);

◆ ailiaAudioConvertPowerToDB()

static int AiliaAudio.ailiaAudioConvertPowerToDB	(	float []	dst,
		float []	src,
		int	src_n,
		float	top_db
	)

Convert non-negative input values to decibel scale.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	number of elements to be calculated
top_db	float >= 0.0

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Output compatible with librosa.power_to_db. dst = trimlow( 10 * log10(src / ref) ) where ref is the max of 1e-10 and of positive values of src, and trimlow(), if top_db > 0, trims all values inferior to (- top_db) and replaces them by (- top_db)), else, trimlow() does nothing.

◆ ailiaAudioConvertToMel()

static int AiliaAudio.ailiaAudioConvertToMel	(	float []	dst,
		float []	src,
		float []	fb_mtrx,
		int	freq_n,
		int	frame_n,
		int	mel_n
	)

Convert the real output of the STFT to the mel scale.

Parameters

dst	pointer to the output data, of float format, of length (mel_n * frame_n), and of memory layout (in row-major convention) (mel_n, frame_n).
src	pointer to the input data, of float format, of length (freq_n * frame_n), and of memory layout (in row-major convention) (freq_n, frame_n).
fb_mtrx	the mel filter-bank, of float format, of length (mel_n * freq_n), and of memory layout (in row-major convention) (mel_n, freq_n).
freq_n	number of frequency indices
frame_n	number of time frames in the input data
mel_n	number of mel frequency indices

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Converts the real spectrogram given in input to the mel scale. The argument fb_mtrx can take the coefficients outputted by ailiaAudioGetFBMatrix() .

◆ ailiaAudioFFT()

static int AiliaAudio.ailiaAudioFFT	(	float []	dst,
		float []	src,
		int	fft_n
	)

Execute the FFT.

Parameters

dst	pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
src	pointer to the input data, of float format, and of length fft_n
fft_n	count of FFT values (i.e. of frequency bins)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioFilterFilter()

static int AiliaAudio.ailiaAudioFilterFilter	(	float []	dst,
		float []	src,
		float []	n_coef,
		float []	d_coef,
		int	dst_n,
		int	src_n,
		int	n_coef_n,
		int	d_coef_n,
		int	pad_type,
		int	pad_len
	)

Apply a zero-phase filter to the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_n	length (in number of samples) reserved in the output buffer (dst_n >= src_n)
src_n	number of samples in the input signal
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter
pad_type	type of padding to apply at the start and at the end of the input signal: any of the AILIA_AUDIO_FILTFILT_PAD_* constants
pad_len	length of the padding applied to the start and to the end of the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of values written to the output dst is min(dst_m,src_n). The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioFixFrameLen()

static int AiliaAudio.ailiaAudioFixFrameLen	(	float []	dst,
		float []	src,
		int	freq_n,
		int	dst_frame_n,
		int	src_frame_n,
		float	pad_data
	)

Fix the number of time frames of a real-valued spectrogram/mel-spectrogram.

Parameters

dst	pointer to the output data, of length (freq_n * dst_frame_n), and of memory layout (in row-major convention) (freq_n, dst_frame_n).
src	pointer to the input data, of length (freq_n * src_frame_n), and of memory layout (in row-major convention) (freq_n, src_frame_n).
freq_n	number of frequency indices
dst_frame_n	number of time frames in the output data
src_frame_n	number of time frames in the input data
pad_data	value inserted for padding (used when dst_frame_n > src_frame_n)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst_frame_n > src_frame_n : missing time frames are added and filled with the value pad_data. dst_frame_n <= src_frame_n : only keeps the first dst_frame_n data.

◆ ailiaAudioGetFBMatrix()

static int AiliaAudio.ailiaAudioGetFBMatrix	(	float []	dst,
		int	freq_n,
		float	f_min,
		float	f_max,
		int	mel_n,
		int	sample_rate,
		int	mel_norm,
		int	mel_formula
	)

Create a mel filter-bank.

Parameters

dst	pointer to the output data, of float format, and of length (mel_n * freq_n). (memory layout, using the row-major convention: (mel_n, freq_n))
freq_n	number of frequency indices for the FFT (1+fft_n/2)
f_min	lowest frequency
f_max	highest frequency
mel_n	number of mel frequency bins in the output (< freq_n)
sample_rate	sampling rate for the signal that will be inputted to this filter
mel_norm	whether to normalize the output (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formula	mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetFrameLen()

static int AiliaAudio.ailiaAudioGetFrameLen	(	ref Int32	frame_n,
		int	sample_n,
		int	fft_n,
		int	hop_n,
		int	center
	)

Get the number of frames generated by the STFT.

Parameters

frame_n	pointer to the destination where to write the output (the number of frames)
sample_n	count of samples on which the STFT is performed
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
center	any of the AILIA_AUDIO_STFT_CENTER_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the STFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, the sample_n samples are cut in packets of size hop_n, and no padding occurs before the first sample nor after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a reflection padding of length fft_n/n is performed before the first sample and after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a zero padding of length fft_n/n is performed before the first sample and after the last sample, and moreover an additional zero padding is performed to ensure that the total length is a multiple of hop_n.

◆ ailiaAudioGetInverseSpectrogram()

static int AiliaAudio.ailiaAudioGetInverseSpectrogram	(	float []	dst,
		float []	src,
		int	frame_n,
		int	freq_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_sample_n,
		int	center,
		int	norm_type
	)

Generate an audio signal from a complex spectrogram.

Parameters

dst	pointer to the output data, of float format, and of length sample_n
src	pointer to the input data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. Memory layout, using the row-major convention: (freq_n, frame_n, 2).
frame_n	number of time frames in the input data
freq_n	number of frequencies bins for each time frame (freq_n = fft_n/2+1)
hop_n	step size of the time frame increment (expressed in number of samples) for the inputted spectrogram.
win_n	size of the window function
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_sample_n	maximum value of the sample index in the outputted data
center	whether padding (before and after) was used or not (and its type) during the generation of the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
norm_type	normalization type that was used during the generation of the input data: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame the normalization is executed at the end of the IFFT. Only accepts a complex spectrogram in input.

◆ ailiaAudioGetLinerFilterZiCoef()

static int AiliaAudio.ailiaAudioGetLinerFilterZiCoef	(	float []	dst_zi,
		float []	n_coef,
		float []	d_coef,
		int	dst_n,
		int	n_coef_n,
		int	d_coef_n
	)

Calculate the initial delay coefficients for filtering.

Parameters

dst_zi	pointer to the output (initial delay coefficients), of float format, and of length dst_n (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_n	size, in number of samples, reserved in the output buffer (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

These initial delay coefficients dst_zi, once multiplied with the early values of the signal, can be passed as initial delayed values, the zi argument, to ailiaAudioLinerFilter() . Of the dst_n reserved length of the output buffer, the length used is max(n_coef_n,d_coef_n)-1. If dst_n is less than that, only the corresponding first values are output. If dst_n is larger, the remaining is filled with 0. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioGetMelSpectrogram()

static int AiliaAudio.ailiaAudioGetMelSpectrogram	(	float []	dst,
		float []	src,
		int	sample_n,
		int	sample_rate,
		int	fft_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_frame_n,
		int	center,
		float	power,
		int	fft_norm_type,
		float	f_min,
		float	f_max,
		int	mel_n,
		int	mel_norm_type,
		int	mel_formula
	)

Generate the mel spectrogram from the audio signal.

Parameters

dst	pointer to the output data, of float format, and of length (mel_n * frame_n) (with frame_n the number of time frames outputted). (memory layout, using the row-major convention: (mel_n, frame_n))
src	pointer to the input data, of float format, monoral PCM audio data.
sample_n	count of samples in the input data
sample_rate	sampling rate of the input signal
fft_n	number of FFT components
hop_n	stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_n	size of the window function (in number of samples)
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_n	maximum value of the time frame index in the outputted data
center	whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
fft_norm_type	normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
f_min	lowest frequency
f_max	highest frequency
mel_n	number of mel frequency bins in the output (< freq_n)
mel_norm	whether to normalize the mel spectrogram (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formula	mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT(STFT) -> normalization -> power exponentiation -> get the mel filter-bank coefficients -> convert to the mel scale. The output is real values, and its length is mel_n*frame_n (with frame_n the number of time frames outputted).

◆ ailiaAudioGetNonSilentPos()

static int AiliaAudio.ailiaAudioGetNonSilentPos	(	ref Int32	dst_start_pos,
		ref Int32	dst_length,
		float []	src,
		int	sample_n,
		int	win_n,
		int	hop_n,
		float	thr_db
	)

Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input.

Parameters

dst_start_pos	pointer to the destination where to write the outputted start position of the non-silence area, of int format
dst_length	pointer to the destination where to write the outputted length of the non-silence area, of int format
src	pointer to the input data, of float format, and of length sample_n
sample_n	count of samples in the input data
win_n	size of the window function
hop_n	stride of each window shift (in number of samples)
thr_db	threshold (in dB) above which the signal is considered non-silence (thr_db > 0)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

In case the whole signal is considered silence, the following happens: *dst_start_pos = -1, *dst_length = 0

◆ ailiaAudioGetResampleLen()

static int AiliaAudio.ailiaAudioGetResampleLen	(	ref Int32	dst_sample_n,
		int	dst_sample_rate,
		int	src_sample_n,
		int	src_sample_rate
	)

Get the number of samples after the resampling.

Parameters

dst_sample_n	pointer to the destination where to write the output (the number of samples after resampling)
dst_sample_rate	sampling rate after the resampling
src_sample_n	number of samples in the input signal
src_sample_rate	sampling rate of the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetSampleLen()

static int AiliaAudio.ailiaAudioGetSampleLen	(	ref Int32	sample_n,
		int	frame_n,
		int	freq_n,
		int	hop_n,
		int	center
	)

Get the number of samples generated by the ISTFT.

Parameters

sample_n	pointer to the destination where to write the output (the number of samples)
frame_n	length of the STFT data, expressed in number of frames
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
center	any of the AILIA_AUDIO_STFT_CENTER_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the ISTFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, no truncation is performed at the beginning nor at the end. If AILIA_AUDIO_STFT_CENTER_NONE is not used, a truncation is performed at the beginning and at the end.

◆ ailiaAudioGetSpectrogram()

static int AiliaAudio.ailiaAudioGetSpectrogram	(	float []	dst,
		float []	src,
		int	sample_n,
		int	fft_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_frame_n,
		int	center,
		float	power,
		int	norm_type
	)

Generate the spectrogram from the audio signal.

Parameters

dst	pointer to the output data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. (where freq_n = fft_n/2+1). Memory layout, using the row-major convention: (freq_n, frame_n, 2).
src	pointer to the input data, of float format, and of length sample_n
sample_n	count of samples in the input data
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_n	size of the window function
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_n	maximum value of the time frame index in the outputted data
center	whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
power	exponent to apply to the spectrogram (> = 0.0). A special case is for 0.0: complex spectrogram. For other cases the amplitude is just exponentiated accordingly: 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
norm_type	normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT -> normalization -> power exponentiation. As the output data alternates real and imaginary parts, its length is 2*(fft_n/2+1)*frame_n. (where frame_n is the number of time frames outputted) When the power argument is a non-zero value, all the complex parts are set to 0 in the output.

◆ ailiaAudioGetWindow()

static int AiliaAudio.ailiaAudioGetWindow	(	float []	dst,
		int	window_n,
		int	win_type
	)

Get the window function.

Parameters

dst	pointer to the output data, of float format, and of length window_n
window_n	length of the window (in number of samples)
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Only the Hann and the Hamming window functions are supported.

◆ ailiaAudioIFFT()

static int AiliaAudio.ailiaAudioIFFT	(	float []	dst,
		float []	src,
		int	fft_n
	)

Execute the IFFT.

Parameters

dst	pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
src	pointer to the input data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
fft_n	count of FFT values (i.e. of frequency bins)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioLinerFilter()

static int AiliaAudio.ailiaAudioLinerFilter	(	float []	dst,
		float []	src,
		float []	n_coef,
		float []	d_coef,
		float []	zi,
		int	dst_n,
		int	src_n,
		int	n_coef_n,
		int	d_coef_n,
		int	zi_n
	)

Apply a filter to the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
zi	pointer to the initial delayed values to be used, of float format, and of length zi_n (zi_n = max(n_coef_n,d_coef_n)-1). nullptr is allowed.
dst_n	size, in number of samples, reserved in the output buffer (dst_n >= src_n)
src_n	number of samples in the input signal
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter
zi_n	number of initial delayed values provided (zi_n >= max(n_coef_n,d_coef_n)-1)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of samples outputted to dst is min(dst_m,src_n). Use zi to provide the initial delayed values. During processing, this array is overriden with the new delayed values. Out of the zi_n, the number of delayed values used is max(n_coef_n,d_coef_n)-1. If there are less than that, the remaining is assumed to be zeros, and the array zi is not updated with the new values. When zi is nullptr, zi_n is ignored, all the delayed values are assumed to be zero, and the new delayed values are not returned. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioLog1p()

static int AiliaAudio.ailiaAudioLog1p	(	float []	dst,
		float []	src,
		int	src_n
	)

Convert the input values to a logarithmic scale.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	number of elements to be calculated

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst = log_e(1.0 + src)

◆ ailiaAudioMagPhase()

static int AiliaAudio.ailiaAudioMagPhase	(	float []	dst_mag,
		float []	dst_phase,
		float []	src,
		int	freq_n,
		int	frame_n,
		float	power,
		int	phase_form
	)

Get the amplitude and the phase from the spectrogram.

Parameters

dst_mag	pointer to the outputted amplitudes, an array of length (freq_n * frame_n). (memory layout, using the row-major convention: (freq_n, frame_n))
dst_phase	pointer to the outputted phases, an array of length (2 * freq_n * frame_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (freq_n, frame_n, 2))
src	pointer to the input data, of length (2 * frame_n * freq_n) (a sequence of complex pairs [real, imaginary]). (memory layout, using the row-major convention: (frame_n, freq_n, 2))
freq_n	number of frequency indices
frame_n	number of time frames
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
phase_form	format of the outputted phase: any of the AILIA_AUDIO_PHASE_FORM_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

To be compatible with librosa, use: phase_form = AILIA_AUDIO_PHASE_FORM_COMPLEX , power = 1.0 To be compatible with PyTorch, use: phase_form = AILIA_AUDIO_PHASE_FORM_REAL , power = 1.0 The dst_phase output depends on phase_form:

AILIA_AUDIO_PHASE_FORM_COMPLEX : signal with real and imaginary parts, of size (freq_n * frame_n * 2)
AILIA_AUDIO_PHASE_FORM_REAL : real signal, of size (freq_n * frame_n)

◆ ailiaAudioResample()

static int AiliaAudio.ailiaAudioResample	(	float []	dst,
		float []	src,
		int	dst_sample_rate,
		int	dst_n,
		int	src_sample_rate,
		int	src_n
	)

Resample the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
dst_sample_rate	sampling rate after the resampling
dst_n	length (in number of samples) reserved in the output buffer（dst_n >= max_resample_n）
src_sample_rate	sampling rate of the input signal
src_n	number of samples in the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The max number of samples in the output, max_resample_n, can be obtained from ailiaAudioGetResampleLen() . dst_n < max_resample_n : only the first dst_n samples are outputted dst_n >= max_resample_n : max_resample_n samples are outputted

◆ ailiaAudioStandardize()

static int AiliaAudio.ailiaAudioStandardize	(	float []	dst,
		float []	src,
		int	src_n
	)

Standardize a real signal.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	length of the input data

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Standardize the input data so that its average value becomes 0 and its variance 1. dst = (src - mean(src)) / std(src)

Member Data Documentation

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT = (1)

Normalize the FFT output in a way compatible with librosa

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_NONE = (0)

Do not normalize the FFT output

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT = (1)

Normalize the FFT output in a way compatible with PyTorch

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT = (2)

Normalize the FFT output in a way compatible with SciPy

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_CONSTANT = (3)

During zero-phase filtering, pad using the edge value

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_EVEN = (2)

During zero-phase filtering, pad with an even reflection (normal reflection)

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_NONE = (0)

During zero-phase filtering, do not pad

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_ODD = (1)

During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value)

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_NORMALIZE_ENABLE = (1)

Normalize the output of the mel spectrogram

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_NORMALIZE_NONE = (0)

Do not normalize the output of the mel spectrogram

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

const Int32 AiliaAudio.AILIA_AUDIO_MEL_SCALE_FORMULA_HTK = (1)

Get the mel scale from the HTK formula (PyTorch compatible)

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE = (0)

Get the mel scale from the Slanye's formula (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

const Int32 AiliaAudio.AILIA_AUDIO_PHASE_FORM_COMPLEX = (1)

Output the phase in complex format (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_REAL

const Int32 AiliaAudio.AILIA_AUDIO_PHASE_FORM_REAL = (0)

Output the phase in complex format (compatible with the default of PyTorch)

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_ENABLE = (1)

for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples

◆ AILIA_AUDIO_STFT_CENTER_NONE

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_NONE = (0)

for the STFT, do not insert padding before and after

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT = (2)

for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

const Int32 AiliaAudio.AILIA_AUDIO_WIN_TYPE_HAMMING = (2)

use a Hamming window function

◆ AILIA_AUDIO_WIN_TYPE_HANN

const Int32 AiliaAudio.AILIA_AUDIO_WIN_TYPE_HANN = (1)

use a Hann window function

◆ LIBRARY_NAME

const String AiliaAudio.LIBRARY_NAME = "ailia_audio"

The documentation for this class was generated from the following file:

AiliaAudio.cs

Public Member Functions

Public Attributes

Member Function Documentation

◆ ailiaAudioComplexNorm()

◆ ailiaAudioConvertPowerToDB()

◆ ailiaAudioConvertToMel()

◆ ailiaAudioFFT()

◆ ailiaAudioFilterFilter()

◆ ailiaAudioFixFrameLen()

◆ ailiaAudioGetFBMatrix()

◆ ailiaAudioGetFrameLen()

◆ ailiaAudioGetInverseSpectrogram()

◆ ailiaAudioGetLinerFilterZiCoef()

◆ ailiaAudioGetMelSpectrogram()

◆ ailiaAudioGetNonSilentPos()

◆ ailiaAudioGetResampleLen()

◆ ailiaAudioGetSampleLen()

◆ ailiaAudioGetSpectrogram()

◆ ailiaAudioGetWindow()

◆ ailiaAudioIFFT()

◆ ailiaAudioLinerFilter()

◆ ailiaAudioLog1p()

◆ ailiaAudioMagPhase()

◆ ailiaAudioResample()

◆ ailiaAudioStandardize()

Member Data Documentation

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

◆ AILIA_AUDIO_PHASE_FORM_REAL

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

◆ AILIA_AUDIO_STFT_CENTER_NONE

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

◆ AILIA_AUDIO_WIN_TYPE_HANN

◆ LIBRARY_NAME