ailia  1.3.0.0
Public Member Functions | Public Attributes | List of all members
AiliaAudio Class Reference

Public Member Functions

static int ailiaAudioLog1p (float[] dst, float[] src, int src_n)
 Convert the input values to a logarithmic scale. More...
 
static int ailiaAudioConvertPowerToDB (float[] dst, float[] src, int src_n, float top_db)
 Convert non-negative input values to decibel scale. More...
 
static int ailiaAudioGetFrameLen (ref Int32 frame_n, int sample_n, int fft_n, int hop_n, int center)
 Get the number of frames generated by the STFT. More...
 
static int ailiaAudioGetSampleLen (ref Int32 sample_n, int frame_n, int freq_n, int hop_n, int center)
 Get the number of samples generated by the ISTFT. More...
 
static int ailiaAudioGetWindow (float[] dst, int window_n, int win_type)
 Get the window function. More...
 
static int ailiaAudioFFT (float[] dst, float[] src, int fft_n)
 Execute the FFT. More...
 
static int ailiaAudioIFFT (float[] dst, float[] src, int fft_n)
 Execute the IFFT. More...
 
static int ailiaAudioGetSpectrogram (float[] dst, float[] src, int sample_n, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int norm_type)
 Generate the spectrogram from the audio signal. More...
 
static int ailiaAudioGetInverseSpectrogram (float[] dst, float[] src, int frame_n, int freq_n, int hop_n, int win_n, int win_type, int max_sample_n, int center, int norm_type)
 Generate an audio signal from a complex spectrogram. More...
 
static int ailiaAudioGetFBMatrix (float[] dst, int freq_n, float f_min, float f_max, int mel_n, int sample_rate, int mel_norm, int mel_formula)
 Create a mel filter-bank. More...
 
static int ailiaAudioGetMelSpectrogram (float[] dst, float[] src, int sample_n, int sample_rate, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int fft_norm_type, float f_min, float f_max, int mel_n, int mel_norm_type, int mel_formula)
 Generate the mel spectrogram from the audio signal. More...
 
static int ailiaAudioMagPhase (float[] dst_mag, float[] dst_phase, float[] src, int freq_n, int frame_n, float power, int phase_form)
 Get the amplitude and the phase from the spectrogram. More...
 
static int ailiaAudioStandardize (float[] dst, float[] src, int src_n)
 Standardize a real signal. More...
 
static int ailiaAudioComplexNorm (float[] dst, float[] src, int src_n, float power)
 Get the norm of the complex signal. More...
 
static int ailiaAudioConvertToMel (float[] dst, float[] src, float[] fb_mtrx, int freq_n, int frame_n, int mel_n)
 Convert the real output of the STFT to the mel scale. More...
 
static int ailiaAudioFixFrameLen (float[] dst, float[] src, int freq_n, int dst_frame_n, int src_frame_n, float pad_data)
 Fix the number of time frames of a real-valued spectrogram/mel-spectrogram. More...
 
static int ailiaAudioResample (float[] dst, float[] src, int dst_sample_rate, int dst_n, int src_sample_rate, int src_n)
 Resample the signal. More...
 
static int ailiaAudioGetResampleLen (ref Int32 dst_sample_n, int dst_sample_rate, int src_sample_n, int src_sample_rate)
 Get the number of samples after the resampling. More...
 
static int ailiaAudioLinerFilter (float[] dst, float[] src, float[] n_coef, float[] d_coef, float[] zi, int dst_n, int src_n, int n_coef_n, int d_coef_n, int zi_n)
 Apply a filter to the signal. More...
 
static int ailiaAudioGetLinerFilterZiCoef (float[] dst_zi, float[] n_coef, float[] d_coef, int dst_n, int n_coef_n, int d_coef_n)
 Calculate the initial delay coefficients for filtering. More...
 
static int ailiaAudioFilterFilter (float[] dst, float[] src, float[] n_coef, float[] d_coef, int dst_n, int src_n, int n_coef_n, int d_coef_n, int pad_type, int pad_len)
 Apply a zero-phase filter to the signal. More...
 
static int ailiaAudioGetNonSilentPos (ref Int32 dst_start_pos, ref Int32 dst_length, float[] src, int sample_n, int win_n, int hop_n, float thr_db)
 Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input. More...
 

Public Attributes

const Int32 AILIA_AUDIO_WIN_TYPE_HANN = (1)
 
const Int32 AILIA_AUDIO_WIN_TYPE_HAMMING = (2)
 
const Int32 AILIA_AUDIO_STFT_CENTER_NONE = (0)
 
const Int32 AILIA_AUDIO_STFT_CENTER_ENABLE = (1)
 
const Int32 AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT = (2)
 
const Int32 AILIA_AUDIO_FFT_NORMALIZE_NONE = (0)
 
const Int32 AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT = (1)
 
const Int32 AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT = (1)
 
const Int32 AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT = (2)
 
const Int32 AILIA_AUDIO_MEL_NORMALIZE_NONE = (0)
 
const Int32 AILIA_AUDIO_MEL_NORMALIZE_ENABLE = (1)
 
const Int32 AILIA_AUDIO_MEL_SCALE_FORMULA_HTK = (1)
 
const Int32 AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE = (0)
 
const Int32 AILIA_AUDIO_PHASE_FORM_COMPLEX = (1)
 
const Int32 AILIA_AUDIO_PHASE_FORM_REAL = (0)
 
const Int32 AILIA_AUDIO_FILTFILT_PAD_NONE = (0)
 
const Int32 AILIA_AUDIO_FILTFILT_PAD_ODD = (1)
 
const Int32 AILIA_AUDIO_FILTFILT_PAD_EVEN = (2)
 
const Int32 AILIA_AUDIO_FILTFILT_PAD_CONSTANT = (3)
 
const String LIBRARY_NAME = "ailia_audio"
 

Member Function Documentation

◆ ailiaAudioComplexNorm()

static int AiliaAudio.ailiaAudioComplexNorm ( float []  dst,
float []  src,
int  src_n,
float  power 
)

Get the norm of the complex signal.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, an array of length (2 * src_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (src_n, 2))
src_nlength of the input data
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Compute the norm of the input data. For each src_cmp = src[0] + i * src[1], tmp_dst = pow(src[0],2.0) + pow(src[1],2.0) dst[0] = pow(tmp_dst,0.5*power);

◆ ailiaAudioConvertPowerToDB()

static int AiliaAudio.ailiaAudioConvertPowerToDB ( float []  dst,
float []  src,
int  src_n,
float  top_db 
)

Convert non-negative input values to decibel scale.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nnumber of elements to be calculated
top_dbfloat >= 0.0
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Output compatible with librosa.power_to_db. dst = trimlow( 10 * log10(src / ref) ) where ref is the max of 1e-10 and of positive values of src, and trimlow(), if top_db > 0, trims all values inferior to (- top_db) and replaces them by (- top_db)), else, trimlow() does nothing.

◆ ailiaAudioConvertToMel()

static int AiliaAudio.ailiaAudioConvertToMel ( float []  dst,
float []  src,
float []  fb_mtrx,
int  freq_n,
int  frame_n,
int  mel_n 
)

Convert the real output of the STFT to the mel scale.

Parameters
dstpointer to the output data, of float format, of length (mel_n * frame_n), and of memory layout (in row-major convention) (mel_n, frame_n).
srcpointer to the input data, of float format, of length (freq_n * frame_n), and of memory layout (in row-major convention) (freq_n, frame_n).
fb_mtrxthe mel filter-bank, of float format, of length (mel_n * freq_n), and of memory layout (in row-major convention) (mel_n, freq_n).
freq_nnumber of frequency indices
frame_nnumber of time frames in the input data
mel_nnumber of mel frequency indices
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Converts the real spectrogram given in input to the mel scale. The argument fb_mtrx can take the coefficients outputted by ailiaAudioGetFBMatrix() .

◆ ailiaAudioFFT()

static int AiliaAudio.ailiaAudioFFT ( float []  dst,
float []  src,
int  fft_n 
)

Execute the FFT.

Parameters
dstpointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
srcpointer to the input data, of float format, and of length fft_n
fft_ncount of FFT values (i.e. of frequency bins)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioFilterFilter()

static int AiliaAudio.ailiaAudioFilterFilter ( float []  dst,
float []  src,
float []  n_coef,
float []  d_coef,
int  dst_n,
int  src_n,
int  n_coef_n,
int  d_coef_n,
int  pad_type,
int  pad_len 
)

Apply a zero-phase filter to the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_nlength (in number of samples) reserved in the output buffer (dst_n >= src_n)
src_nnumber of samples in the input signal
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
pad_typetype of padding to apply at the start and at the end of the input signal: any of the AILIA_AUDIO_FILTFILT_PAD_* constants
pad_lenlength of the padding applied to the start and to the end of the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of values written to the output dst is min(dst_m,src_n). The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioFixFrameLen()

static int AiliaAudio.ailiaAudioFixFrameLen ( float []  dst,
float []  src,
int  freq_n,
int  dst_frame_n,
int  src_frame_n,
float  pad_data 
)

Fix the number of time frames of a real-valued spectrogram/mel-spectrogram.

Parameters
dstpointer to the output data, of length (freq_n * dst_frame_n), and of memory layout (in row-major convention) (freq_n, dst_frame_n).
srcpointer to the input data, of length (freq_n * src_frame_n), and of memory layout (in row-major convention) (freq_n, src_frame_n).
freq_nnumber of frequency indices
dst_frame_nnumber of time frames in the output data
src_frame_nnumber of time frames in the input data
pad_datavalue inserted for padding (used when dst_frame_n > src_frame_n)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst_frame_n > src_frame_n : missing time frames are added and filled with the value pad_data. dst_frame_n <= src_frame_n : only keeps the first dst_frame_n data.

◆ ailiaAudioGetFBMatrix()

static int AiliaAudio.ailiaAudioGetFBMatrix ( float []  dst,
int  freq_n,
float  f_min,
float  f_max,
int  mel_n,
int  sample_rate,
int  mel_norm,
int  mel_formula 
)

Create a mel filter-bank.

Parameters
dstpointer to the output data, of float format, and of length (mel_n * freq_n). (memory layout, using the row-major convention: (mel_n, freq_n))
freq_nnumber of frequency indices for the FFT (1+fft_n/2)
f_minlowest frequency
f_maxhighest frequency
mel_nnumber of mel frequency bins in the output (< freq_n)
sample_ratesampling rate for the signal that will be inputted to this filter
mel_normwhether to normalize the output (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formulamel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetFrameLen()

static int AiliaAudio.ailiaAudioGetFrameLen ( ref Int32  frame_n,
int  sample_n,
int  fft_n,
int  hop_n,
int  center 
)

Get the number of frames generated by the STFT.

Parameters
frame_npointer to the destination where to write the output (the number of frames)
sample_ncount of samples on which the STFT is performed
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
centerany of the AILIA_AUDIO_STFT_CENTER_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the STFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, the sample_n samples are cut in packets of size hop_n, and no padding occurs before the first sample nor after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a reflection padding of length fft_n/n is performed before the first sample and after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a zero padding of length fft_n/n is performed before the first sample and after the last sample, and moreover an additional zero padding is performed to ensure that the total length is a multiple of hop_n.

◆ ailiaAudioGetInverseSpectrogram()

static int AiliaAudio.ailiaAudioGetInverseSpectrogram ( float []  dst,
float []  src,
int  frame_n,
int  freq_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_sample_n,
int  center,
int  norm_type 
)

Generate an audio signal from a complex spectrogram.

Parameters
dstpointer to the output data, of float format, and of length sample_n
srcpointer to the input data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. Memory layout, using the row-major convention: (freq_n, frame_n, 2).
frame_nnumber of time frames in the input data
freq_nnumber of frequencies bins for each time frame (freq_n = fft_n/2+1)
hop_nstep size of the time frame increment (expressed in number of samples) for the inputted spectrogram.
win_nsize of the window function
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_sample_nmaximum value of the sample index in the outputted data
centerwhether padding (before and after) was used or not (and its type) during the generation of the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
norm_typenormalization type that was used during the generation of the input data: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame the normalization is executed at the end of the IFFT. Only accepts a complex spectrogram in input.

◆ ailiaAudioGetLinerFilterZiCoef()

static int AiliaAudio.ailiaAudioGetLinerFilterZiCoef ( float []  dst_zi,
float []  n_coef,
float []  d_coef,
int  dst_n,
int  n_coef_n,
int  d_coef_n 
)

Calculate the initial delay coefficients for filtering.

Parameters
dst_zipointer to the output (initial delay coefficients), of float format, and of length dst_n (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
dst_nsize, in number of samples, reserved in the output buffer (dst_n >= max(n_coef_n,d_coef_n)-1)
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

These initial delay coefficients dst_zi, once multiplied with the early values of the signal, can be passed as initial delayed values, the zi argument, to ailiaAudioLinerFilter() . Of the dst_n reserved length of the output buffer, the length used is max(n_coef_n,d_coef_n)-1. If dst_n is less than that, only the corresponding first values are output. If dst_n is larger, the remaining is filled with 0. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioGetMelSpectrogram()

static int AiliaAudio.ailiaAudioGetMelSpectrogram ( float []  dst,
float []  src,
int  sample_n,
int  sample_rate,
int  fft_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_frame_n,
int  center,
float  power,
int  fft_norm_type,
float  f_min,
float  f_max,
int  mel_n,
int  mel_norm_type,
int  mel_formula 
)

Generate the mel spectrogram from the audio signal.

Parameters
dstpointer to the output data, of float format, and of length (mel_n * frame_n) (with frame_n the number of time frames outputted). (memory layout, using the row-major convention: (mel_n, frame_n))
srcpointer to the input data, of float format, monoral PCM audio data.
sample_ncount of samples in the input data
sample_ratesampling rate of the input signal
fft_nnumber of FFT components
hop_nstride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_nsize of the window function (in number of samples)
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_nmaximum value of the time frame index in the outputted data
centerwhether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
fft_norm_typenormalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
f_minlowest frequency
f_maxhighest frequency
mel_nnumber of mel frequency bins in the output (< freq_n)
mel_normwhether to normalize the mel spectrogram (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants
mel_formulamel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT(STFT) -> normalization -> power exponentiation -> get the mel filter-bank coefficients -> convert to the mel scale. The output is real values, and its length is mel_n*frame_n (with frame_n the number of time frames outputted).

◆ ailiaAudioGetNonSilentPos()

static int AiliaAudio.ailiaAudioGetNonSilentPos ( ref Int32  dst_start_pos,
ref Int32  dst_length,
float []  src,
int  sample_n,
int  win_n,
int  hop_n,
float  thr_db 
)

Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input.

Parameters
dst_start_pospointer to the destination where to write the outputted start position of the non-silence area, of int format
dst_lengthpointer to the destination where to write the outputted length of the non-silence area, of int format
srcpointer to the input data, of float format, and of length sample_n
sample_ncount of samples in the input data
win_nsize of the window function
hop_nstride of each window shift (in number of samples)
thr_dbthreshold (in dB) above which the signal is considered non-silence (thr_db > 0)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

In case the whole signal is considered silence, the following happens: *dst_start_pos = -1, *dst_length = 0

◆ ailiaAudioGetResampleLen()

static int AiliaAudio.ailiaAudioGetResampleLen ( ref Int32  dst_sample_n,
int  dst_sample_rate,
int  src_sample_n,
int  src_sample_rate 
)

Get the number of samples after the resampling.

Parameters
dst_sample_npointer to the destination where to write the output (the number of samples after resampling)
dst_sample_ratesampling rate after the resampling
src_sample_nnumber of samples in the input signal
src_sample_ratesampling rate of the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

◆ ailiaAudioGetSampleLen()

static int AiliaAudio.ailiaAudioGetSampleLen ( ref Int32  sample_n,
int  frame_n,
int  freq_n,
int  hop_n,
int  center 
)

Get the number of samples generated by the ISTFT.

Parameters
sample_npointer to the destination where to write the output (the number of samples)
frame_nlength of the STFT data, expressed in number of frames
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output.
centerany of the AILIA_AUDIO_STFT_CENTER_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Before executing the ISTFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, no truncation is performed at the beginning nor at the end. If AILIA_AUDIO_STFT_CENTER_NONE is not used, a truncation is performed at the beginning and at the end.

◆ ailiaAudioGetSpectrogram()

static int AiliaAudio.ailiaAudioGetSpectrogram ( float []  dst,
float []  src,
int  sample_n,
int  fft_n,
int  hop_n,
int  win_n,
int  win_type,
int  max_frame_n,
int  center,
float  power,
int  norm_type 
)

Generate the spectrogram from the audio signal.

Parameters
dstpointer to the output data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. (where freq_n = fft_n/2+1). Memory layout, using the row-major convention: (freq_n, frame_n, 2).
srcpointer to the input data, of float format, and of length sample_n
sample_ncount of samples in the input data
fft_nsize of the FFT at each frame (i.e. number of frequency bins at each frame)
hop_nstride of each window shift (in number of samples). This is the size of the time increment for the spectrogram.
win_nsize of the window function
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
max_frame_nmaximum value of the time frame index in the outputted data
centerwhether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants
powerexponent to apply to the spectrogram (> = 0.0). A special case is for 0.0: complex spectrogram. For other cases the amplitude is just exponentiated accordingly: 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
norm_typenormalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

For each time frame, the operations are processed in this order: FFT -> normalization -> power exponentiation. As the output data alternates real and imaginary parts, its length is 2*(fft_n/2+1)*frame_n. (where frame_n is the number of time frames outputted) When the power argument is a non-zero value, all the complex parts are set to 0 in the output.

◆ ailiaAudioGetWindow()

static int AiliaAudio.ailiaAudioGetWindow ( float []  dst,
int  window_n,
int  win_type 
)

Get the window function.

Parameters
dstpointer to the output data, of float format, and of length window_n
window_nlength of the window (in number of samples)
win_typetype of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Only the Hann and the Hamming window functions are supported.

◆ ailiaAudioIFFT()

static int AiliaAudio.ailiaAudioIFFT ( float []  dst,
float []  src,
int  fft_n 
)

Execute the IFFT.

Parameters
dstpointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
srcpointer to the input data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2).
fft_ncount of FFT values (i.e. of frequency bins)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.

◆ ailiaAudioLinerFilter()

static int AiliaAudio.ailiaAudioLinerFilter ( float []  dst,
float []  src,
float []  n_coef,
float []  d_coef,
float []  zi,
int  dst_n,
int  src_n,
int  n_coef_n,
int  d_coef_n,
int  zi_n 
)

Apply a filter to the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
n_coefpointer to the numerator coefficients of the filter, of float format, and length n_coef_n
d_coefpointer to the denominator coefficients of the filter, of float format, and length d_coef_n
zipointer to the initial delayed values to be used, of float format, and of length zi_n (zi_n = max(n_coef_n,d_coef_n)-1). nullptr is allowed.
dst_nsize, in number of samples, reserved in the output buffer (dst_n >= src_n)
src_nnumber of samples in the input signal
n_coef_nnumber of numerator coefficients of the filter
d_coef_nnumber of denominator coefficients of the filter
zi_nnumber of initial delayed values provided (zi_n >= max(n_coef_n,d_coef_n)-1)
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The number of samples outputted to dst is min(dst_m,src_n). Use zi to provide the initial delayed values. During processing, this array is overriden with the new delayed values. Out of the zi_n, the number of delayed values used is max(n_coef_n,d_coef_n)-1. If there are less than that, the remaining is assumed to be zeros, and the array zi is not updated with the new values. When zi is nullptr, zi_n is ignored, all the delayed values are assumed to be zero, and the new delayed values are not returned. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.

◆ ailiaAudioLog1p()

static int AiliaAudio.ailiaAudioLog1p ( float []  dst,
float []  src,
int  src_n 
)

Convert the input values to a logarithmic scale.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nnumber of elements to be calculated
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

dst = log_e(1.0 + src)

◆ ailiaAudioMagPhase()

static int AiliaAudio.ailiaAudioMagPhase ( float []  dst_mag,
float []  dst_phase,
float []  src,
int  freq_n,
int  frame_n,
float  power,
int  phase_form 
)

Get the amplitude and the phase from the spectrogram.

Parameters
dst_magpointer to the outputted amplitudes, an array of length (freq_n * frame_n). (memory layout, using the row-major convention: (freq_n, frame_n))
dst_phasepointer to the outputted phases, an array of length (2 * freq_n * frame_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (freq_n, frame_n, 2))
srcpointer to the input data, of length (2 * frame_n * freq_n) (a sequence of complex pairs [real, imaginary]). (memory layout, using the row-major convention: (frame_n, freq_n, 2))
freq_nnumber of frequency indices
frame_nnumber of time frames
powerexponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed.
phase_formformat of the outputted phase: any of the AILIA_AUDIO_PHASE_FORM_* constants
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

To be compatible with librosa, use: phase_form = AILIA_AUDIO_PHASE_FORM_COMPLEX , power = 1.0 To be compatible with PyTorch, use: phase_form = AILIA_AUDIO_PHASE_FORM_REAL , power = 1.0 The dst_phase output depends on phase_form:

◆ ailiaAudioResample()

static int AiliaAudio.ailiaAudioResample ( float []  dst,
float []  src,
int  dst_sample_rate,
int  dst_n,
int  src_sample_rate,
int  src_n 
)

Resample the signal.

Parameters
dstpointer to the output data, of float format, and of length dst_n
srcpointer to the input data, of float format, and of length src_n
dst_sample_ratesampling rate after the resampling
dst_nlength (in number of samples) reserved in the output buffer(dst_n >= max_resample_n)
src_sample_ratesampling rate of the input signal
src_nnumber of samples in the input signal
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

The max number of samples in the output, max_resample_n, can be obtained from ailiaAudioGetResampleLen() . dst_n < max_resample_n : only the first dst_n samples are outputted dst_n >= max_resample_n : max_resample_n samples are outputted

◆ ailiaAudioStandardize()

static int AiliaAudio.ailiaAudioStandardize ( float []  dst,
float []  src,
int  src_n 
)

Standardize a real signal.

Parameters
dstpointer to the output data, of float format, and of length src_n
srcpointer to the input data, of float format, and of length src_n
src_nlength of the input data
Returns
In case of success, AILIA_STATUS_SUCCESS , and else an error code is returned.

Standardize the input data so that its average value becomes 0 and its variance 1. dst = (src - mean(src)) / std(src)

Member Data Documentation

◆ AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT = (1)

Normalize the FFT output in a way compatible with librosa

◆ AILIA_AUDIO_FFT_NORMALIZE_NONE

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_NONE = (0)

Do not normalize the FFT output

◆ AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT = (1)

Normalize the FFT output in a way compatible with PyTorch

◆ AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT

const Int32 AiliaAudio.AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT = (2)

Normalize the FFT output in a way compatible with SciPy

◆ AILIA_AUDIO_FILTFILT_PAD_CONSTANT

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_CONSTANT = (3)

During zero-phase filtering, pad using the edge value

◆ AILIA_AUDIO_FILTFILT_PAD_EVEN

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_EVEN = (2)

During zero-phase filtering, pad with an even reflection (normal reflection)

◆ AILIA_AUDIO_FILTFILT_PAD_NONE

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_NONE = (0)

During zero-phase filtering, do not pad

◆ AILIA_AUDIO_FILTFILT_PAD_ODD

const Int32 AiliaAudio.AILIA_AUDIO_FILTFILT_PAD_ODD = (1)

During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value)

◆ AILIA_AUDIO_MEL_NORMALIZE_ENABLE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_NORMALIZE_ENABLE = (1)

Normalize the output of the mel spectrogram

◆ AILIA_AUDIO_MEL_NORMALIZE_NONE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_NORMALIZE_NONE = (0)

Do not normalize the output of the mel spectrogram

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_HTK

const Int32 AiliaAudio.AILIA_AUDIO_MEL_SCALE_FORMULA_HTK = (1)

Get the mel scale from the HTK formula (PyTorch compatible)

◆ AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE

const Int32 AiliaAudio.AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE = (0)

Get the mel scale from the Slanye's formula (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_COMPLEX

const Int32 AiliaAudio.AILIA_AUDIO_PHASE_FORM_COMPLEX = (1)

Output the phase in complex format (compatible with the default of librosa)

◆ AILIA_AUDIO_PHASE_FORM_REAL

const Int32 AiliaAudio.AILIA_AUDIO_PHASE_FORM_REAL = (0)

Output the phase in complex format (compatible with the default of PyTorch)

◆ AILIA_AUDIO_STFT_CENTER_ENABLE

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_ENABLE = (1)

for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples

◆ AILIA_AUDIO_STFT_CENTER_NONE

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_NONE = (0)

for the STFT, do not insert padding before and after

◆ AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT

const Int32 AiliaAudio.AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT = (2)

for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n

◆ AILIA_AUDIO_WIN_TYPE_HAMMING

const Int32 AiliaAudio.AILIA_AUDIO_WIN_TYPE_HAMMING = (2)

use a Hamming window function

◆ AILIA_AUDIO_WIN_TYPE_HANN

const Int32 AiliaAudio.AILIA_AUDIO_WIN_TYPE_HANN = (1)

use a Hann window function

◆ LIBRARY_NAME

const String AiliaAudio.LIBRARY_NAME = "ailia_audio"

The documentation for this class was generated from the following file: