ailia  1.5.0.0
Macros | Functions
ailia_audio.h File Reference

audio processing library More...

Go to the source code of this file.

Macros

#define AILIA_API
 
#define AILIA_AUDIO_WIN_TYPE_HANN   (1)
 use a Hann window function More...
 
#define AILIA_AUDIO_WIN_TYPE_HAMMING   (2)
 use a Hamming window function More...
 
#define AILIA_AUDIO_STFT_CENTER_NONE   (0)
 for the STFT, do not insert padding before and after More...
 
#define AILIA_AUDIO_STFT_CENTER_ENABLE   (1)
 for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples More...
 
#define AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT   (2)
 for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n More...
 
#define AILIA_AUDIO_FFT_NORMALIZE_NONE   (0)
 Do not normalize the FFT output. More...
 
#define AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT   (1)
 Normalize the FFT output in a way compatible with librosa. More...
 
#define AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT   (1)
 Normalize the FFT output in a way compatible with PyTorch. More...
 
#define AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT   (2)
 Normalize the FFT output in a way compatible with SciPy. More...
 
#define AILIA_AUDIO_MEL_NORMALIZE_NONE   (0)
 Do not normalize the output of the mel spectrogram. More...
 
#define AILIA_AUDIO_MEL_NORMALIZE_ENABLE   (1)
 Normalize the output of the mel spectrogram. More...
 
#define AILIA_AUDIO_MEL_SCALE_FORMULA_HTK   (1)
 Get the mel scale from the HTK formula (PyTorch compatible) More...
 
#define AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE   (0)
 Get the mel scale from the Slanye's formula (compatible with the default of librosa) More...
 
#define AILIA_AUDIO_PHASE_FORM_COMPLEX   (1)
 Output the phase in complex format (compatible with the default of librosa) More...
 
#define AILIA_AUDIO_PHASE_FORM_REAL   (0)
 Output the phase in complex format (compatible with the default of PyTorch) More...
 
#define AILIA_AUDIO_FILTFILT_PAD_NONE   (0)
 During zero-phase filtering, do not pad. More...
 
#define AILIA_AUDIO_FILTFILT_PAD_ODD   (1)
 During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value) More...
 
#define AILIA_AUDIO_FILTFILT_PAD_EVEN   (2)
 During zero-phase filtering, pad with an even reflection (normal reflection) More...
 
#define AILIA_AUDIO_FILTFILT_PAD_CONSTANT   (3)
 During zero-phase filtering, pad using the edge value. More...
 

Functions

int AILIA_API ailiaAudioLog1p (void *dst, const void *src, int src_n)
 Convert the input values to a logarithmic scale. More...
 
int AILIA_API ailiaAudioConvertPowerToDB (void *dst, const void *src, int src_n, float top_db)
 Convert non-negative input values to decibel scale. More...
 
int AILIA_API ailiaAudioGetFrameLen (int *frame_n, int sample_n, int fft_n, int hop_n, int center)
 Get the number of frames generated by the STFT. More...
 
int AILIA_API ailiaAudioGetSampleLen (int *sample_n, int frame_n, int freq_n, int hop_n, int center)
 Get the number of samples generated by the ISTFT. More...
 
int AILIA_API ailiaAudioGetWindow (void *dst, int window_n, int win_type)
 Get the window function. More...
 
int AILIA_API ailiaAudioFFT (void *dst, const void *src, int fft_n)
 Execute the FFT. More...
 
int AILIA_API ailiaAudioIFFT (void *dst, const void *src, int fft_n)
 Execute the IFFT. More...
 
int AILIA_API ailiaAudioGetSpectrogram (void *dst, const void *src, int sample_n, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int norm_type)
 Generate the spectrogram from the audio signal. More...
 
int AILIA_API ailiaAudioGetInverseSpectrogram (void *dst, const void *src, int frame_n, int freq_n, int hop_n, int win_n, int win_type, int max_sample_n, int center, int norm_type)
 Generate an audio signal from a complex spectrogram. More...
 
int AILIA_API ailiaAudioGetFBMatrix (void *dst, const int freq_n, float f_min, float f_max, int mel_n, int sample_rate, int mel_norm, int mel_formula)
 Create a mel filter-bank. More...
 
int AILIA_API ailiaAudioGetMelSpectrogram (void *dst, const void *src, int sample_n, int sample_rate, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int fft_norm_type, float f_min, float f_max, int mel_n, int mel_norm_type, int mel_formula)
 Generate the mel spectrogram from the audio signal. More...
 
int AILIA_API ailiaAudioMagPhase (void *dst_mag, void *dst_phase, const void *src, int freq_n, int frame_n, float power, int phase_form)
 Get the amplitude and the phase from the spectrogram. More...
 
int AILIA_API ailiaAudioStandardize (void *dst, const void *src, const int src_n)
 Standardize a real signal. More...
 
int AILIA_API ailiaAudioComplexNorm (void *dst, const void *src, const int src_n, float power)
 Get the norm of the complex signal. More...
 
int AILIA_API ailiaAudioConvertToMel (void *dst, const void *src, const void *fb_mtrx, int freq_n, int frame_n, int mel_n)
 Convert the real output of the STFT to the mel scale. More...
 
int AILIA_API ailiaAudioFixFrameLen (void *dst, const void *src, int freq_n, int dst_frame_n, int src_frame_n, float pad_data)
 Fix the number of time frames of a real-valued spectrogram/mel-spectrogram. More...
 
int AILIA_API ailiaAudioResample (void *dst, const void *src, int dst_sample_rate, int dst_n, int src_sample_rate, int src_n)
 Resample the signal. More...
 
int AILIA_API ailiaAudioGetResampleLen (int *dst_sample_n, int dst_sample_rate, int src_sample_n, int src_sample_rate)
 Get the number of samples after the resampling. More...
 
int AILIA_API ailiaAudioLinerFilter (void *dst, const void *src, const void *n_coef, const void *d_coef, void *zi, int dst_n, int src_n, int n_coef_n, int d_coef_n, int zi_n)
 Apply a filter to the signal. More...
 
int AILIA_API ailiaAudioGetLinerFilterZiCoef (void *dst_zi, const void *n_coef, const void *d_coef, int dst_n, int n_coef_n, int d_coef_n)
 Calculate the initial delay coefficients for filtering. More...
 
int AILIA_API ailiaAudioFilterFilter (void *dst, const void *src, const void *n_coef, const void *d_coef, int dst_n, int src_n, int n_coef_n, int d_coef_n, int pad_type, int pad_len)
 Apply a zero-phase filter to the signal. More...
 
int AILIA_API ailiaAudioGetNonSilentPos (int *dst_start_pos, int *dst_length, const void *src, int sample_n, int win_n, int hop_n, float thr_db)
 Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input. More...
 

Detailed Description

audio processing library

Date
2021/07/28

Macro Definition Documentation

◆ AILIA_API

#define AILIA_API

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT   (1)

Normalize the FFT output in a way compatible with librosa.

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

#define AILIA_AUDIO_FFT_NORMALIZE_NONE   (0)

Do not normalize the FFT output.

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT   (1)

Normalize the FFT output in a way compatible with PyTorch.

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

#define AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT   (2)

Normalize the FFT output in a way compatible with SciPy.

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

#define AILIA_AUDIO_FILTFILT_PAD_CONSTANT   (3)

During zero-phase filtering, pad using the edge value.

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

#define AILIA_AUDIO_FILTFILT_PAD_EVEN   (2)

During zero-phase filtering, pad with an even reflection (normal reflection)

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

#define AILIA_AUDIO_FILTFILT_PAD_NONE   (0)

During zero-phase filtering, do not pad.

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

#define AILIA_AUDIO_FILTFILT_PAD_ODD   (1)

During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value)

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

#define AILIA_AUDIO_MEL_NORMALIZE_ENABLE   (1)

Normalize the output of the mel spectrogram.

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

#define AILIA_AUDIO_MEL_NORMALIZE_NONE   (0)

Do not normalize the output of the mel spectrogram.

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

#define AILIA_AUDIO_MEL_SCALE_FORMULA_HTK   (1)

Get the mel scale from the HTK formula (PyTorch compatible)

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

#define AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE   (0)

Get the mel scale from the Slanye's formula (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

#define AILIA_AUDIO_PHASE_FORM_COMPLEX   (1)

Output the phase in complex format (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_REAL

#define AILIA_AUDIO_PHASE_FORM_REAL   (0)

Output the phase in complex format (compatible with the default of PyTorch)

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

#define AILIA_AUDIO_STFT_CENTER_ENABLE   (1)

for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples

◆ AILIA_AUDIO_STFT_CENTER_NONE

#define AILIA_AUDIO_STFT_CENTER_NONE   (0)

for the STFT, do not insert padding before and after

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

#define AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT   (2)

for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

#define AILIA_AUDIO_WIN_TYPE_HAMMING   (2)

use a Hamming window function

◆ AILIA_AUDIO_WIN_TYPE_HANN

#define AILIA_AUDIO_WIN_TYPE_HANN   (1)

use a Hann window function

Function Documentation

◆ ailiaAudioComplexNorm()

int AILIA_API ailiaAudioComplexNorm ( void *  dst,
const void *  src,
const int  src_n,
float  power 
)

Get the norm of the complex signal.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, an array of length (2 * src_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (src_n, 2))
src_nlength of the input data
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Compute the norm of the input data. For each src_cmp = src[0] + i * src[1], tmp_dst = pow(src[0],2.0) + pow(src[1],2.0) dst[0] = pow(tmp_dst,0.5*power);

◆ ailiaAudioConvertPowerToDB()

int AILIA_API ailiaAudioConvertPowerToDB ( void *  dst,
const void *  src,
int  src_n,
float  top_db 
)

Convert non-negative input values to decibel scale.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nnumber of elements to be calculated
top_dbfloat >= 0.0
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Output compatible with librosa.power_to_db. dst = trimlow( 10 * log10(src / ref) ) where ref is the max of 1e-10 and of positive values of src, and trimlow(), if top_db > 0, trims all values inferior to (- top_db) and replaces them by (- top_db)), else, trimlow() does nothing.

◆ ailiaAudioConvertToMel()

int AILIA_API ailiaAudioConvertToMel ( void *  dst,
const void *  src,
const void *  fb_mtrx,
int  freq_n,
int  frame_n,
int  mel_n 
)

Convert the real output of the STFT to the mel scale.

Parameters
dstpointer to the output data, of float format, of length (mel_n * frame_n), and of memory layout (in row-major convention) (mel_n, frame_n).
srcpointer to the input data, of float format, of length (freq_n * frame_n), and of memory layout (in row-major convention) (freq_n, frame_n).
fb_mtrxthe mel filter-bank, of float format, of length (mel_n * freq_n), and of memory layout (in row-major convention) (mel_n, freq_n).
freq_nnumber of frequency indices
frame_nnumber of time frames in the input data
mel_nnumber of mel frequency indices
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Converts the real spectrogram given in input to the mel scale. The argument fb_mtrx can take the coefficients outputted by ailiaAudioGetFBMatrix() .

◆ ailiaAudioFFT()

int AILIA_API ailiaAudioFFT ( void *  dst,
const void *  src,
int  fft_n 
)

Execute the FFT.

Parameters
dstpointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
srcpointer to the input data, of float format, and of length fft_n
fft_ncount of FFT values (i.e. of frequency bins)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioFilterFilter()

int AILIA_API ailiaAudioFilterFilter ( void *  dst,
const void *  src,
const void *  n_coef,
const void *  d_coef,
int  dst_n,
int  src_n,
int  n_coef_n,
int  d_coef_n,
int  pad_type,
int  pad_len 
)

Apply a zero-phase filter to the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_nlength (in number of samples) reserved in the output buffer (dst_n >= src_n)
src_nnumber of samples in the input signal
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
pad_typetype of padding to apply at the start and at the end of the input signal: any of the AILIA_AUDIO_FILTFILT_PAD_* constants
pad_lenlength of the padding applied to the start and to the end of the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of values written to the output dst is min(dst_m,src_n). The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioFixFrameLen()

int AILIA_API ailiaAudioFixFrameLen ( void *  dst,
const void *  src,
int  freq_n,
int  dst_frame_n,
int  src_frame_n,
float  pad_data 
)

Fix the number of time frames of a real-valued spectrogram/mel-spectrogram.

Parameters
dstpointer to the output data, of length (freq_n * dst_frame_n), and of memory layout (in row-major convention) (freq_n, dst_frame_n).
srcpointer to the input data, of length (freq_n * src_frame_n), and of memory layout (in row-major convention) (freq_n, src_frame_n).
freq_nnumber of frequency indices
dst_frame_nnumber of time frames in the output data
src_frame_nnumber of time frames in the input data
pad_datavalue inserted for padding (used when dst_frame_n > src_frame_n)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst_frame_n > src_frame_n : missing time frames are added and filled with the value pad_data. dst_frame_n <= src_frame_n : only keeps the first dst_frame_n data.

◆ ailiaAudioGetFBMatrix()

int AILIA_API ailiaAudioGetFBMatrix ( void *  dst,
const int  freq_n,
float  f_min,
float  f_max,
int  mel_n,
int  sample_rate,
int  mel_norm,
int  mel_formula 
)

Create a mel filter-bank.

Parameters
dstpointer to the output data, of float format, and of length (mel_n * freq_n). (memory layout, using the row-major convention: (mel_n, freq_n))
freq_nnumber of frequency indices for the FFT (1+fft_n/2)
f_minlowest frequency
f_maxhighest frequency
mel_nnumber of mel frequency bins in the output (< freq_n)
sample_ratesampling rate for the signal that will be inputted to this filter
mel_normwhether to normalize the output (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formulamel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetFrameLen()

int AILIA_API ailiaAudioGetFrameLen ( int *  frame_n,
int  sample_n,
int  fft_n,
int  hop_n,
int  center 
)

Get the number of frames generated by the STFT.

Parameters
frame_npointer to the destination where to write the output (the number of frames)
sample_ncount of samples on which the STFT is performed
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
centerany of the AILIA_AUDIO_STFT_CENTER_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the STFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, the sample_n samples are cut in packets of size hop_n, and no padding occurs before the first sample nor after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a reflection padding of length fft_n/n is performed before the first sample and after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a zero padding of length fft_n/n is performed before the first sample and after the last sample, and moreover an additional zero padding is performed to ensure that the total length is a multiple of hop_n.

◆ ailiaAudioGetInverseSpectrogram()

int AILIA_API ailiaAudioGetInverseSpectrogram ( void *  dst,
const void *  src,
int  frame_n,
int  freq_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_sample_n,
int  center,
int  norm_type 
)

Generate an audio signal from a complex spectrogram.

Parameters
dstpointer to the output data, of float format, and of length sample_n
srcpointer to the input data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. Memory layout, using the row-major convention: (freq_n, frame_n, 2).
frame_nnumber of time frames in the input data
freq_nnumber of frequencies bins for each time frame (freq_n = fft_n/2+1)
hop_nstep size of the time frame increment (expressed in number of samples) for the inputted spectrogram.
win_nsize of the window function
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_sample_nmaximum value of the sample index in the outputted data
centerwhether padding (before and after) was used or not (and its type) during the generation of the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
norm_typenormalization type that was used during the generation of the input data: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame the normalization is executed at the end of the IFFT. Only accepts a complex spectrogram in input.

◆ ailiaAudioGetLinerFilterZiCoef()

int AILIA_API ailiaAudioGetLinerFilterZiCoef ( void *  dst_zi,
const void *  n_coef,
const void *  d_coef,
int  dst_n,
int  n_coef_n,
int  d_coef_n 
)

Calculate the initial delay coefficients for filtering.

Parameters
dst_zipointer to the output (initial delay coefficients), of float format, and of length dst_n (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_nsize, in number of samples, reserved in the output buffer (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

These initial delay coefficients dst_zi, once multiplied with the early values of the signal, can be passed as initial delayed values, the zi argument, to ailiaAudioLinerFilter() . Of the dst_n reserved length of the output buffer, the length used is max(n_coef_n,d_coef_n)-1. If dst_n is less than that, only the corresponding first values are output. If dst_n is larger, the remaining is filled with 0. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioGetMelSpectrogram()

int AILIA_API ailiaAudioGetMelSpectrogram ( void *  dst,
const void *  src,
int  sample_n,
int  sample_rate,
int  fft_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_frame_n,
int  center,
float  power,
int  fft_norm_type,
float  f_min,
float  f_max,
int  mel_n,
int  mel_norm_type,
int  mel_formula 
)

Generate the mel spectrogram from the audio signal.

Parameters
dstpointer to the output data, of float format, and of length (mel_n * frame_n) (with frame_n the number of time frames outputted). (memory layout, using the row-major convention: (mel_n, frame_n))
srcpointer to the input data, of float format, monoral PCM audio data.
sample_ncount of samples in the input data
sample_ratesampling rate of the input signal
fft_nnumber of FFT components
hop_nstride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_nsize of the window function (in number of samples)
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_nmaximum value of the time frame index in the outputted data
centerwhether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
fft_norm_typenormalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
f_minlowest frequency
f_maxhighest frequency
mel_nnumber of mel frequency bins in the output (< freq_n)
mel_normwhether to normalize the mel spectrogram (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formulamel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT(STFT) -> normalization -> power exponentiation -> get the mel filter-bank coefficients -> convert to the mel scale. The output is real values, and its length is mel_n*frame_n (with frame_n the number of time frames outputted).

◆ ailiaAudioGetNonSilentPos()

int AILIA_API ailiaAudioGetNonSilentPos ( int *  dst_start_pos,
int *  dst_length,
const void *  src,
int  sample_n,
int  win_n,
int  hop_n,
float  thr_db 
)

Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input.

Parameters
dst_start_pospointer to the destination where to write the outputted start position of the non-silence area, of int format
dst_lengthpointer to the destination where to write the outputted length of the non-silence area, of int format
srcpointer to the input data, of float format, and of length sample_n
sample_ncount of samples in the input data
win_nsize of the window function
hop_nstride of each window shift (in number of samples)
thr_dbthreshold (in dB) above which the signal is considered non-silence (thr_db > 0)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

In case the whole signal is considered silence, the following happens: *dst_start_pos = -1, *dst_length = 0

◆ ailiaAudioGetResampleLen()

int AILIA_API ailiaAudioGetResampleLen ( int *  dst_sample_n,
int  dst_sample_rate,
int  src_sample_n,
int  src_sample_rate 
)

Get the number of samples after the resampling.

Parameters
dst_sample_npointer to the destination where to write the output (the number of samples after resampling)
dst_sample_ratesampling rate after the resampling
src_sample_nnumber of samples in the input signal
src_sample_ratesampling rate of the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetSampleLen()

int AILIA_API ailiaAudioGetSampleLen ( int *  sample_n,
int  frame_n,
int  freq_n,
int  hop_n,
int  center 
)

Get the number of samples generated by the ISTFT.

Parameters
sample_npointer to the destination where to write the output (the number of samples)
frame_nlength of the STFT data, expressed in number of frames
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
centerany of the AILIA_AUDIO_STFT_CENTER_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the ISTFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, no truncation is performed at the beginning nor at the end. If AILIA_AUDIO_STFT_CENTER_NONE is not used, a truncation is performed at the beginning and at the end.

◆ ailiaAudioGetSpectrogram()

int AILIA_API ailiaAudioGetSpectrogram ( void *  dst,
const void *  src,
int  sample_n,
int  fft_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_frame_n,
int  center,
float  power,
int  norm_type 
)

Generate the spectrogram from the audio signal.

Parameters
dstpointer to the output data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. (where freq_n = fft_n/2+1). Memory layout, using the row-major convention: (freq_n, frame_n, 2).
srcpointer to the input data, of float format, and of length sample_n
sample_ncount of samples in the input data
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_nsize of the window function
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_nmaximum value of the time frame index in the outputted data
centerwhether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
powerexponent to apply to the spectrogram (> = 0.0). A special case is for 0.0: complex spectrogram. For other cases the amplitude is just exponentiated accordingly: 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
norm_typenormalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT -> normalization -> power exponentiation. As the output data alternates real and imaginary parts, its length is 2*(fft_n/2+1)*frame_n. (where frame_n is the number of time frames outputted) When the power argument is a non-zero value, all the complex parts are set to 0 in the output.

◆ ailiaAudioGetWindow()

int AILIA_API ailiaAudioGetWindow ( void *  dst,
int  window_n,
int  win_type 
)

Get the window function.

Parameters
dstpointer to the output data, of float format, and of length window_n
window_nlength of the window (in number of samples)
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Only the Hann and the Hamming window functions are supported.

◆ ailiaAudioIFFT()

int AILIA_API ailiaAudioIFFT ( void *  dst,
const void *  src,
int  fft_n 
)

Execute the IFFT.

Parameters
dstpointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
srcpointer to the input data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
fft_ncount of FFT values (i.e. of frequency bins)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioLinerFilter()

int AILIA_API ailiaAudioLinerFilter ( void *  dst,
const void *  src,
const void *  n_coef,
const void *  d_coef,
void *  zi,
int  dst_n,
int  src_n,
int  n_coef_n,
int  d_coef_n,
int  zi_n 
)

Apply a filter to the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
zipointer to the initial delayed values to be used, of float format, and of length zi_n (zi_n = max(n_coef_n,d_coef_n)-1). nullptr is allowed.
dst_nsize, in number of samples, reserved in the output buffer (dst_n >= src_n)
src_nnumber of samples in the input signal
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
zi_nnumber of initial delayed values provided (zi_n >= max(n_coef_n,d_coef_n)-1)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of samples outputted to dst is min(dst_m,src_n). Use zi to provide the initial delayed values. During processing, this array is overriden with the new delayed values. Out of the zi_n, the number of delayed values used is max(n_coef_n,d_coef_n)-1. If there are less than that, the remaining is assumed to be zeros, and the array zi is not updated with the new values. When zi is nullptr, zi_n is ignored, all the delayed values are assumed to be zero, and the new delayed values are not returned. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioLog1p()

int AILIA_API ailiaAudioLog1p ( void *  dst,
const void *  src,
int  src_n 
)

Convert the input values to a logarithmic scale.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nnumber of elements to be calculated
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst = log_e(1.0 + src)

◆ ailiaAudioMagPhase()

int AILIA_API ailiaAudioMagPhase ( void *  dst_mag,
void *  dst_phase,
const void *  src,
int  freq_n,
int  frame_n,
float  power,
int  phase_form 
)

Get the amplitude and the phase from the spectrogram.

Parameters
dst_magpointer to the outputted amplitudes, an array of length (freq_n * frame_n). (memory layout, using the row-major convention: (freq_n, frame_n))
dst_phasepointer to the outputted phases, an array of length (2 * freq_n * frame_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (freq_n, frame_n, 2))
srcpointer to the input data, of length (2 * frame_n * freq_n) (a sequence of complex pairs [real, imaginary]). (memory layout, using the row-major convention: (frame_n, freq_n, 2))
freq_nnumber of frequency indices
frame_nnumber of time frames
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
phase_formformat of the outputted phase: any of the AILIA_AUDIO_PHASE_FORM_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

To be compatible with librosa, use: phase_form = AILIA_AUDIO_PHASE_FORM_COMPLEX , power = 1.0 To be compatible with PyTorch, use: phase_form = AILIA_AUDIO_PHASE_FORM_REAL , power = 1.0 The dst_phase output depends on phase_form:

◆ ailiaAudioResample()

int AILIA_API ailiaAudioResample ( void *  dst,
const void *  src,
int  dst_sample_rate,
int  dst_n,
int  src_sample_rate,
int  src_n 
)

Resample the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
dst_sample_ratesampling rate after the resampling
dst_nlength (in number of samples) reserved in the output buffer(dst_n >= max_resample_n)
src_sample_ratesampling rate of the input signal
src_nnumber of samples in the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The max number of samples in the output, max_resample_n, can be obtained from ailiaAudioGetResampleLen() . dst_n < max_resample_n : only the first dst_n samples are outputted dst_n >= max_resample_n : max_resample_n samples are outputted

◆ ailiaAudioStandardize()

int AILIA_API ailiaAudioStandardize ( void *  dst,
const void *  src,
const int  src_n 
)

Standardize a real signal.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nlength of the input data
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Standardize the input data so that its average value becomes 0 and its variance 1. dst = (src - mean(src)) / std(src)