audio processing library More...

Macros
#define	AILIA_API

#define	AILIA_AUDIO_WIN_TYPE_HANN (1)
	use a Hann window function More...

#define	AILIA_AUDIO_WIN_TYPE_HAMMING (2)
	use a Hamming window function More...

#define	AILIA_AUDIO_STFT_CENTER_NONE (0)
	for the STFT, do not insert padding before and after More...

#define	AILIA_AUDIO_STFT_CENTER_ENABLE (1)
	for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples More...

#define	AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT (2)
	for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n More...

#define	AILIA_AUDIO_FFT_NORMALIZE_NONE (0)
	Do not normalize the FFT output. More...

#define	AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT (1)
	Normalize the FFT output in a way compatible with librosa. More...

#define	AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT (1)
	Normalize the FFT output in a way compatible with PyTorch. More...

#define	AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT (2)
	Normalize the FFT output in a way compatible with SciPy. More...

#define	AILIA_AUDIO_MEL_NORMALIZE_NONE (0)
	Do not normalize the output of the mel spectrogram. More...

#define	AILIA_AUDIO_MEL_NORMALIZE_ENABLE (1)
	Normalize the output of the mel spectrogram. More...

#define	AILIA_AUDIO_MEL_SCALE_FORMULA_HTK (1)
	Get the mel scale from the HTK formula (PyTorch compatible) More...

#define	AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE (0)
	Get the mel scale from the Slanye's formula (compatible with the default of librosa) More...

#define	AILIA_AUDIO_PHASE_FORM_COMPLEX (1)
	Output the phase in complex format (compatible with the default of librosa) More...

#define	AILIA_AUDIO_PHASE_FORM_REAL (0)
	Output the phase in complex format (compatible with the default of PyTorch) More...

#define	AILIA_AUDIO_FILTFILT_PAD_NONE (0)
	During zero-phase filtering, do not pad. More...

#define	AILIA_AUDIO_FILTFILT_PAD_ODD (1)
	During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value) More...

#define	AILIA_AUDIO_FILTFILT_PAD_EVEN (2)
	During zero-phase filtering, pad with an even reflection (normal reflection) More...

#define	AILIA_AUDIO_FILTFILT_PAD_CONSTANT (3)
	During zero-phase filtering, pad using the edge value. More...

Functions
int AILIA_API	ailiaAudioLog1p (void dst, const void src, int src_n)
	Convert the input values to a logarithmic scale. More...

int AILIA_API	ailiaAudioConvertPowerToDB (void dst, const void src, int src_n, float top_db)
	Convert non-negative input values to decibel scale. More...

int AILIA_API	ailiaAudioGetFrameLen (int *frame_n, int sample_n, int fft_n, int hop_n, int center)
	Get the number of frames generated by the STFT. More...

int AILIA_API	ailiaAudioGetSampleLen (int *sample_n, int frame_n, int freq_n, int hop_n, int center)
	Get the number of samples generated by the ISTFT. More...

int AILIA_API	ailiaAudioGetWindow (void *dst, int window_n, int win_type)
	Get the window function. More...

int AILIA_API	ailiaAudioFFT (void dst, const void src, int fft_n)
	Execute the FFT. More...

int AILIA_API	ailiaAudioIFFT (void dst, const void src, int fft_n)
	Execute the IFFT. More...

int AILIA_API	ailiaAudioGetSpectrogram (void dst, const void src, int sample_n, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int norm_type)
	Generate the spectrogram from the audio signal. More...

int AILIA_API	ailiaAudioGetInverseSpectrogram (void dst, const void src, int frame_n, int freq_n, int hop_n, int win_n, int win_type, int max_sample_n, int center, int norm_type)
	Generate an audio signal from a complex spectrogram. More...

int AILIA_API	ailiaAudioGetFBMatrix (void *dst, const int freq_n, float f_min, float f_max, int mel_n, int sample_rate, int mel_norm, int mel_formula)
	Create a mel filter-bank. More...

int AILIA_API	ailiaAudioGetMelSpectrogram (void dst, const void src, int sample_n, int sample_rate, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int fft_norm_type, float f_min, float f_max, int mel_n, int mel_norm_type, int mel_formula)
	Generate the mel spectrogram from the audio signal. More...

int AILIA_API	ailiaAudioMagPhase (void dst_mag, void dst_phase, const void *src, int freq_n, int frame_n, float power, int phase_form)
	Get the amplitude and the phase from the spectrogram. More...

int AILIA_API	ailiaAudioStandardize (void dst, const void src, const int src_n)
	Standardize a real signal. More...

int AILIA_API	ailiaAudioComplexNorm (void dst, const void src, const int src_n, float power)
	Get the norm of the complex signal. More...

int AILIA_API	ailiaAudioConvertToMel (void dst, const void src, const void *fb_mtrx, int freq_n, int frame_n, int mel_n)
	Convert the real output of the STFT to the mel scale. More...

int AILIA_API	ailiaAudioFixFrameLen (void dst, const void src, int freq_n, int dst_frame_n, int src_frame_n, float pad_data)
	Fix the number of time frames of a real-valued spectrogram/mel-spectrogram. More...

int AILIA_API	ailiaAudioResample (void dst, const void src, int dst_sample_rate, int dst_n, int src_sample_rate, int src_n)
	Resample the signal. More...

int AILIA_API	ailiaAudioGetResampleLen (int *dst_sample_n, int dst_sample_rate, int src_sample_n, int src_sample_rate)
	Get the number of samples after the resampling. More...

int AILIA_API	ailiaAudioLinerFilter (void dst, const void src, const void n_coef, const void d_coef, void *zi, int dst_n, int src_n, int n_coef_n, int d_coef_n, int zi_n)
	Apply a filter to the signal. More...

int AILIA_API	ailiaAudioGetLinerFilterZiCoef (void dst_zi, const void n_coef, const void *d_coef, int dst_n, int n_coef_n, int d_coef_n)
	Calculate the initial delay coefficients for filtering. More...

int AILIA_API	ailiaAudioFilterFilter (void dst, const void src, const void n_coef, const void d_coef, int dst_n, int src_n, int n_coef_n, int d_coef_n, int pad_type, int pad_len)
	Apply a zero-phase filter to the signal. More...

int AILIA_API	ailiaAudioGetNonSilentPos (int dst_start_pos, int dst_length, const void *src, int sample_n, int win_n, int hop_n, float thr_db)
	Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input. More...

Detailed Description

audio processing library

Copyright: AXELL CORPORATION, ax Inc.

Date: 2021/07/28

Macro Definition Documentation

◆ AILIA_API

#define AILIA_API

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT (1)

Normalize the FFT output in a way compatible with librosa.

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

#define AILIA_AUDIO_FFT_NORMALIZE_NONE (0)

Do not normalize the FFT output.

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT (1)

Normalize the FFT output in a way compatible with PyTorch.

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT (2)

Normalize the FFT output in a way compatible with SciPy.

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

#define AILIA_AUDIO_FILTFILT_PAD_CONSTANT (3)

During zero-phase filtering, pad using the edge value.

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

#define AILIA_AUDIO_FILTFILT_PAD_EVEN (2)

During zero-phase filtering, pad with an even reflection (normal reflection)

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

#define AILIA_AUDIO_FILTFILT_PAD_NONE (0)

During zero-phase filtering, do not pad.

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

#define AILIA_AUDIO_FILTFILT_PAD_ODD (1)

During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value)

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

#define AILIA_AUDIO_MEL_NORMALIZE_ENABLE (1)

Normalize the output of the mel spectrogram.

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

#define AILIA_AUDIO_MEL_NORMALIZE_NONE (0)

Do not normalize the output of the mel spectrogram.

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

#define AILIA_AUDIO_MEL_SCALE_FORMULA_HTK (1)

Get the mel scale from the HTK formula (PyTorch compatible)

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

#define AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE (0)

Get the mel scale from the Slanye's formula (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

#define AILIA_AUDIO_PHASE_FORM_COMPLEX (1)

Output the phase in complex format (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_REAL

#define AILIA_AUDIO_PHASE_FORM_REAL (0)

Output the phase in complex format (compatible with the default of PyTorch)

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

#define AILIA_AUDIO_STFT_CENTER_ENABLE (1)

for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples

◆ AILIA_AUDIO_STFT_CENTER_NONE

#define AILIA_AUDIO_STFT_CENTER_NONE (0)

for the STFT, do not insert padding before and after

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

#define AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT (2)

for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

#define AILIA_AUDIO_WIN_TYPE_HAMMING (2)

use a Hamming window function

◆ AILIA_AUDIO_WIN_TYPE_HANN

#define AILIA_AUDIO_WIN_TYPE_HANN (1)

use a Hann window function

Function Documentation

◆ ailiaAudioComplexNorm()

int AILIA_API ailiaAudioComplexNorm	(	void *	dst,
		const void *	src,
		const int	src_n,
		float	power
	)

Get the norm of the complex signal.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, an array of length (2 * src_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (src_n, 2))
src_n	length of the input data
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Compute the norm of the input data. For each src_cmp = src[0] + i * src[1], tmp_dst = pow(src[0],2.0) + pow(src[1],2.0) dst[0] = pow(tmp_dst,0.5*power);

◆ ailiaAudioConvertPowerToDB()

int AILIA_API ailiaAudioConvertPowerToDB	(	void *	dst,
		const void *	src,
		int	src_n,
		float	top_db
	)

Convert non-negative input values to decibel scale.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	number of elements to be calculated
top_db	float >= 0.0

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Output compatible with librosa.power_to_db. dst = trimlow( 10 * log10(src / ref) ) where ref is the max of 1e-10 and of positive values of src, and trimlow(), if top_db > 0, trims all values inferior to (- top_db) and replaces them by (- top_db)), else, trimlow() does nothing.

◆ ailiaAudioConvertToMel()

int AILIA_API ailiaAudioConvertToMel	(	void *	dst,
		const void *	src,
		const void *	fb_mtrx,
		int	freq_n,
		int	frame_n,
		int	mel_n
	)

Convert the real output of the STFT to the mel scale.

Parameters

dst	pointer to the output data, of float format, of length (mel_n * frame_n), and of memory layout (in row-major convention) (mel_n, frame_n).
src	pointer to the input data, of float format, of length (freq_n * frame_n), and of memory layout (in row-major convention) (freq_n, frame_n).
fb_mtrx	the mel filter-bank, of float format, of length (mel_n * freq_n), and of memory layout (in row-major convention) (mel_n, freq_n).
freq_n	number of frequency indices
frame_n	number of time frames in the input data
mel_n	number of mel frequency indices

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Converts the real spectrogram given in input to the mel scale. The argument fb_mtrx can take the coefficients outputted by ailiaAudioGetFBMatrix() .

◆ ailiaAudioFFT()

int AILIA_API ailiaAudioFFT	(	void *	dst,
		const void *	src,
		int	fft_n
	)

Execute the FFT.

Parameters

dst	pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
src	pointer to the input data, of float format, and of length fft_n
fft_n	count of FFT values (i.e. of frequency bins)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioFilterFilter()

int AILIA_API ailiaAudioFilterFilter	(	void *	dst,
		const void *	src,
		const void *	n_coef,
		const void *	d_coef,
		int	dst_n,
		int	src_n,
		int	n_coef_n,
		int	d_coef_n,
		int	pad_type,
		int	pad_len
	)

Apply a zero-phase filter to the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_n	length (in number of samples) reserved in the output buffer (dst_n >= src_n)
src_n	number of samples in the input signal
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter
pad_type	type of padding to apply at the start and at the end of the input signal: any of the AILIA_AUDIO_FILTFILT_PAD_* constants
pad_len	length of the padding applied to the start and to the end of the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of values written to the output dst is min(dst_m,src_n). The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioFixFrameLen()

int AILIA_API ailiaAudioFixFrameLen	(	void *	dst,
		const void *	src,
		int	freq_n,
		int	dst_frame_n,
		int	src_frame_n,
		float	pad_data
	)

Fix the number of time frames of a real-valued spectrogram/mel-spectrogram.

Parameters

dst	pointer to the output data, of length (freq_n * dst_frame_n), and of memory layout (in row-major convention) (freq_n, dst_frame_n).
src	pointer to the input data, of length (freq_n * src_frame_n), and of memory layout (in row-major convention) (freq_n, src_frame_n).
freq_n	number of frequency indices
dst_frame_n	number of time frames in the output data
src_frame_n	number of time frames in the input data
pad_data	value inserted for padding (used when dst_frame_n > src_frame_n)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst_frame_n > src_frame_n : missing time frames are added and filled with the value pad_data. dst_frame_n <= src_frame_n : only keeps the first dst_frame_n data.

◆ ailiaAudioGetFBMatrix()

int AILIA_API ailiaAudioGetFBMatrix	(	void *	dst,
		const int	freq_n,
		float	f_min,
		float	f_max,
		int	mel_n,
		int	sample_rate,
		int	mel_norm,
		int	mel_formula
	)

Create a mel filter-bank.

Parameters

dst	pointer to the output data, of float format, and of length (mel_n * freq_n). (memory layout, using the row-major convention: (mel_n, freq_n))
freq_n	number of frequency indices for the FFT (1+fft_n/2)
f_min	lowest frequency
f_max	highest frequency
mel_n	number of mel frequency bins in the output (< freq_n)
sample_rate	sampling rate for the signal that will be inputted to this filter
mel_norm	whether to normalize the output (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formula	mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetFrameLen()

int AILIA_API ailiaAudioGetFrameLen	(	int *	frame_n,
		int	sample_n,
		int	fft_n,
		int	hop_n,
		int	center
	)

Get the number of frames generated by the STFT.

Parameters

frame_n	pointer to the destination where to write the output (the number of frames)
sample_n	count of samples on which the STFT is performed
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
center	any of the AILIA_AUDIO_STFT_CENTER_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the STFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, the sample_n samples are cut in packets of size hop_n, and no padding occurs before the first sample nor after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a reflection padding of length fft_n/n is performed before the first sample and after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a zero padding of length fft_n/n is performed before the first sample and after the last sample, and moreover an additional zero padding is performed to ensure that the total length is a multiple of hop_n.

◆ ailiaAudioGetInverseSpectrogram()

int AILIA_API ailiaAudioGetInverseSpectrogram	(	void *	dst,
		const void *	src,
		int	frame_n,
		int	freq_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_sample_n,
		int	center,
		int	norm_type
	)

Generate an audio signal from a complex spectrogram.

Parameters

dst	pointer to the output data, of float format, and of length sample_n
src	pointer to the input data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. Memory layout, using the row-major convention: (freq_n, frame_n, 2).
frame_n	number of time frames in the input data
freq_n	number of frequencies bins for each time frame (freq_n = fft_n/2+1)
hop_n	step size of the time frame increment (expressed in number of samples) for the inputted spectrogram.
win_n	size of the window function
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_sample_n	maximum value of the sample index in the outputted data
center	whether padding (before and after) was used or not (and its type) during the generation of the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
norm_type	normalization type that was used during the generation of the input data: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame the normalization is executed at the end of the IFFT. Only accepts a complex spectrogram in input.

◆ ailiaAudioGetLinerFilterZiCoef()

int AILIA_API ailiaAudioGetLinerFilterZiCoef	(	void *	dst_zi,
		const void *	n_coef,
		const void *	d_coef,
		int	dst_n,
		int	n_coef_n,
		int	d_coef_n
	)

Calculate the initial delay coefficients for filtering.

Parameters

dst_zi	pointer to the output (initial delay coefficients), of float format, and of length dst_n (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_n	size, in number of samples, reserved in the output buffer (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

These initial delay coefficients dst_zi, once multiplied with the early values of the signal, can be passed as initial delayed values, the zi argument, to ailiaAudioLinerFilter() . Of the dst_n reserved length of the output buffer, the length used is max(n_coef_n,d_coef_n)-1. If dst_n is less than that, only the corresponding first values are output. If dst_n is larger, the remaining is filled with 0. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioGetMelSpectrogram()

int AILIA_API ailiaAudioGetMelSpectrogram	(	void *	dst,
		const void *	src,
		int	sample_n,
		int	sample_rate,
		int	fft_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_frame_n,
		int	center,
		float	power,
		int	fft_norm_type,
		float	f_min,
		float	f_max,
		int	mel_n,
		int	mel_norm_type,
		int	mel_formula
	)

Generate the mel spectrogram from the audio signal.

Parameters

dst	pointer to the output data, of float format, and of length (mel_n * frame_n) (with frame_n the number of time frames outputted). (memory layout, using the row-major convention: (mel_n, frame_n))
src	pointer to the input data, of float format, monoral PCM audio data.
sample_n	count of samples in the input data
sample_rate	sampling rate of the input signal
fft_n	number of FFT components
hop_n	stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_n	size of the window function (in number of samples)
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_n	maximum value of the time frame index in the outputted data
center	whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
fft_norm_type	normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
f_min	lowest frequency
f_max	highest frequency
mel_n	number of mel frequency bins in the output (< freq_n)
mel_norm	whether to normalize the mel spectrogram (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formula	mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT(STFT) -> normalization -> power exponentiation -> get the mel filter-bank coefficients -> convert to the mel scale. The output is real values, and its length is mel_n*frame_n (with frame_n the number of time frames outputted).

◆ ailiaAudioGetNonSilentPos()

int AILIA_API ailiaAudioGetNonSilentPos	(	int *	dst_start_pos,
		int *	dst_length,
		const void *	src,
		int	sample_n,
		int	win_n,
		int	hop_n,
		float	thr_db
	)

Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input.

Parameters

dst_start_pos	pointer to the destination where to write the outputted start position of the non-silence area, of int format
dst_length	pointer to the destination where to write the outputted length of the non-silence area, of int format
src	pointer to the input data, of float format, and of length sample_n
sample_n	count of samples in the input data
win_n	size of the window function
hop_n	stride of each window shift (in number of samples)
thr_db	threshold (in dB) above which the signal is considered non-silence (thr_db > 0)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

In case the whole signal is considered silence, the following happens: *dst_start_pos = -1, *dst_length = 0

◆ ailiaAudioGetResampleLen()

int AILIA_API ailiaAudioGetResampleLen	(	int *	dst_sample_n,
		int	dst_sample_rate,
		int	src_sample_n,
		int	src_sample_rate
	)

Get the number of samples after the resampling.

Parameters

dst_sample_n	pointer to the destination where to write the output (the number of samples after resampling)
dst_sample_rate	sampling rate after the resampling
src_sample_n	number of samples in the input signal
src_sample_rate	sampling rate of the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetSampleLen()

int AILIA_API ailiaAudioGetSampleLen	(	int *	sample_n,
		int	frame_n,
		int	freq_n,
		int	hop_n,
		int	center
	)

Get the number of samples generated by the ISTFT.

Parameters

sample_n	pointer to the destination where to write the output (the number of samples)
frame_n	length of the STFT data, expressed in number of frames
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
center	any of the AILIA_AUDIO_STFT_CENTER_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the ISTFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, no truncation is performed at the beginning nor at the end. If AILIA_AUDIO_STFT_CENTER_NONE is not used, a truncation is performed at the beginning and at the end.

◆ ailiaAudioGetSpectrogram()

int AILIA_API ailiaAudioGetSpectrogram	(	void *	dst,
		const void *	src,
		int	sample_n,
		int	fft_n,
		int	hop_n,
		int	win_n,
		int	win_type,
		int	max_frame_n,
		int	center,
		float	power,
		int	norm_type
	)

Generate the spectrogram from the audio signal.

Parameters

dst	pointer to the output data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. (where freq_n = fft_n/2+1). Memory layout, using the row-major convention: (freq_n, frame_n, 2).
src	pointer to the input data, of float format, and of length sample_n
sample_n	count of samples in the input data
fft_n	size of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_n	stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_n	size of the window function
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_n	maximum value of the time frame index in the outputted data
center	whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
power	exponent to apply to the spectrogram (> = 0.0). A special case is for 0.0: complex spectrogram. For other cases the amplitude is just exponentiated accordingly: 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
norm_type	normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT -> normalization -> power exponentiation. As the output data alternates real and imaginary parts, its length is 2*(fft_n/2+1)*frame_n. (where frame_n is the number of time frames outputted) When the power argument is a non-zero value, all the complex parts are set to 0 in the output.

◆ ailiaAudioGetWindow()

int AILIA_API ailiaAudioGetWindow	(	void *	dst,
		int	window_n,
		int	win_type
	)

Get the window function.

Parameters

dst	pointer to the output data, of float format, and of length window_n
window_n	length of the window (in number of samples)
win_type	type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Only the Hann and the Hamming window functions are supported.

◆ ailiaAudioIFFT()

int AILIA_API ailiaAudioIFFT	(	void *	dst,
		const void *	src,
		int	fft_n
	)

Execute the IFFT.

Parameters

dst	pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
src	pointer to the input data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
fft_n	count of FFT values (i.e. of frequency bins)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioLinerFilter()

int AILIA_API ailiaAudioLinerFilter	(	void *	dst,
		const void *	src,
		const void *	n_coef,
		const void *	d_coef,
		void *	zi,
		int	dst_n,
		int	src_n,
		int	n_coef_n,
		int	d_coef_n,
		int	zi_n
	)

Apply a filter to the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
n_coef	pointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coef	pointer to the denominator coefficients of the filter, of float format, and length d_coef_n
zi	pointer to the initial delayed values to be used, of float format, and of length zi_n (zi_n = max(n_coef_n,d_coef_n)-1). nullptr is allowed.
dst_n	size, in number of samples, reserved in the output buffer (dst_n >= src_n)
src_n	number of samples in the input signal
n_coef_n	number of numerator coefficients of the filter
d_coef_n	number of denominator coefficients of the filter
zi_n	number of initial delayed values provided (zi_n >= max(n_coef_n,d_coef_n)-1)

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of samples outputted to dst is min(dst_m,src_n). Use zi to provide the initial delayed values. During processing, this array is overriden with the new delayed values. Out of the zi_n, the number of delayed values used is max(n_coef_n,d_coef_n)-1. If there are less than that, the remaining is assumed to be zeros, and the array zi is not updated with the new values. When zi is nullptr, zi_n is ignored, all the delayed values are assumed to be zero, and the new delayed values are not returned. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioLog1p()

int AILIA_API ailiaAudioLog1p	(	void *	dst,
		const void *	src,
		int	src_n
	)

Convert the input values to a logarithmic scale.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	number of elements to be calculated

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst = log_e(1.0 + src)

◆ ailiaAudioMagPhase()

int AILIA_API ailiaAudioMagPhase	(	void *	dst_mag,
		void *	dst_phase,
		const void *	src,
		int	freq_n,
		int	frame_n,
		float	power,
		int	phase_form
	)

Get the amplitude and the phase from the spectrogram.

Parameters

dst_mag	pointer to the outputted amplitudes, an array of length (freq_n * frame_n). (memory layout, using the row-major convention: (freq_n, frame_n))
dst_phase	pointer to the outputted phases, an array of length (2 * freq_n * frame_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (freq_n, frame_n, 2))
src	pointer to the input data, of length (2 * frame_n * freq_n) (a sequence of complex pairs [real, imaginary]). (memory layout, using the row-major convention: (frame_n, freq_n, 2))
freq_n	number of frequency indices
frame_n	number of time frames
power	exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
phase_form	format of the outputted phase: any of the AILIA_AUDIO_PHASE_FORM_* constants

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

To be compatible with librosa, use: phase_form = AILIA_AUDIO_PHASE_FORM_COMPLEX , power = 1.0 To be compatible with PyTorch, use: phase_form = AILIA_AUDIO_PHASE_FORM_REAL , power = 1.0 The dst_phase output depends on phase_form:

AILIA_AUDIO_PHASE_FORM_COMPLEX : signal with real and imaginary parts, of size (freq_n * frame_n * 2)
AILIA_AUDIO_PHASE_FORM_REAL : real signal, of size (freq_n * frame_n)

◆ ailiaAudioResample()

int AILIA_API ailiaAudioResample	(	void *	dst,
		const void *	src,
		int	dst_sample_rate,
		int	dst_n,
		int	src_sample_rate,
		int	src_n
	)

Resample the signal.

Parameters

dst	pointer to the output data, of float format, and of length dst_n
src	pointer to the input data, of float format, and of length src_n
dst_sample_rate	sampling rate after the resampling
dst_n	length (in number of samples) reserved in the output buffer（dst_n >= max_resample_n）
src_sample_rate	sampling rate of the input signal
src_n	number of samples in the input signal

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The max number of samples in the output, max_resample_n, can be obtained from ailiaAudioGetResampleLen() . dst_n < max_resample_n : only the first dst_n samples are outputted dst_n >= max_resample_n : max_resample_n samples are outputted

◆ ailiaAudioStandardize()

int AILIA_API ailiaAudioStandardize	(	void *	dst,
		const void *	src,
		const int	src_n
	)

Standardize a real signal.

Parameters

dst	pointer to the output data, of float format, and of length src_n
src	pointer to the input data, of float format, and of length src_n
src_n	length of the input data

Returns: In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Standardize the input data so that its average value becomes 0 and its variance 1. dst = (src - mean(src)) / std(src)

Macros

Functions

Detailed Description

Macro Definition Documentation

◆ AILIA_API

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

◆ AILIA_AUDIO_PHASE_FORM_REAL

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

◆ AILIA_AUDIO_STFT_CENTER_NONE

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

◆ AILIA_AUDIO_WIN_TYPE_HANN

Function Documentation

◆ ailiaAudioComplexNorm()

◆ ailiaAudioConvertPowerToDB()

◆ ailiaAudioConvertToMel()

◆ ailiaAudioFFT()

◆ ailiaAudioFilterFilter()

◆ ailiaAudioFixFrameLen()

◆ ailiaAudioGetFBMatrix()

◆ ailiaAudioGetFrameLen()

◆ ailiaAudioGetInverseSpectrogram()

◆ ailiaAudioGetLinerFilterZiCoef()

◆ ailiaAudioGetMelSpectrogram()

◆ ailiaAudioGetNonSilentPos()

◆ ailiaAudioGetResampleLen()

◆ ailiaAudioGetSampleLen()

◆ ailiaAudioGetSpectrogram()

◆ ailiaAudioGetWindow()

◆ ailiaAudioIFFT()

◆ ailiaAudioLinerFilter()

◆ ailiaAudioLog1p()

◆ ailiaAudioMagPhase()

◆ ailiaAudioResample()

◆ ailiaAudioStandardize()