ailia
1.5.0.0
|
audio processing library More...
Go to the source code of this file.
Macros | |
#define | AILIA_API |
#define | AILIA_AUDIO_WIN_TYPE_HANN (1) |
use a Hann window function More... | |
#define | AILIA_AUDIO_WIN_TYPE_HAMMING (2) |
use a Hamming window function More... | |
#define | AILIA_AUDIO_STFT_CENTER_NONE (0) |
for the STFT, do not insert padding before and after More... | |
#define | AILIA_AUDIO_STFT_CENTER_ENABLE (1) |
for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples More... | |
#define | AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT (2) |
for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n More... | |
#define | AILIA_AUDIO_FFT_NORMALIZE_NONE (0) |
Do not normalize the FFT output. More... | |
#define | AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT (1) |
Normalize the FFT output in a way compatible with librosa. More... | |
#define | AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT (1) |
Normalize the FFT output in a way compatible with PyTorch. More... | |
#define | AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT (2) |
Normalize the FFT output in a way compatible with SciPy. More... | |
#define | AILIA_AUDIO_MEL_NORMALIZE_NONE (0) |
Do not normalize the output of the mel spectrogram. More... | |
#define | AILIA_AUDIO_MEL_NORMALIZE_ENABLE (1) |
Normalize the output of the mel spectrogram. More... | |
#define | AILIA_AUDIO_MEL_SCALE_FORMULA_HTK (1) |
Get the mel scale from the HTK formula (PyTorch compatible) More... | |
#define | AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE (0) |
Get the mel scale from the Slanye's formula (compatible with the default of librosa) More... | |
#define | AILIA_AUDIO_PHASE_FORM_COMPLEX (1) |
Output the phase in complex format (compatible with the default of librosa) More... | |
#define | AILIA_AUDIO_PHASE_FORM_REAL (0) |
Output the phase in complex format (compatible with the default of PyTorch) More... | |
#define | AILIA_AUDIO_FILTFILT_PAD_NONE (0) |
During zero-phase filtering, do not pad. More... | |
#define | AILIA_AUDIO_FILTFILT_PAD_ODD (1) |
During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value) More... | |
#define | AILIA_AUDIO_FILTFILT_PAD_EVEN (2) |
During zero-phase filtering, pad with an even reflection (normal reflection) More... | |
#define | AILIA_AUDIO_FILTFILT_PAD_CONSTANT (3) |
During zero-phase filtering, pad using the edge value. More... | |
Functions | |
int AILIA_API | ailiaAudioLog1p (void *dst, const void *src, int src_n) |
Convert the input values to a logarithmic scale. More... | |
int AILIA_API | ailiaAudioConvertPowerToDB (void *dst, const void *src, int src_n, float top_db) |
Convert non-negative input values to decibel scale. More... | |
int AILIA_API | ailiaAudioGetFrameLen (int *frame_n, int sample_n, int fft_n, int hop_n, int center) |
Get the number of frames generated by the STFT. More... | |
int AILIA_API | ailiaAudioGetSampleLen (int *sample_n, int frame_n, int freq_n, int hop_n, int center) |
Get the number of samples generated by the ISTFT. More... | |
int AILIA_API | ailiaAudioGetWindow (void *dst, int window_n, int win_type) |
Get the window function. More... | |
int AILIA_API | ailiaAudioFFT (void *dst, const void *src, int fft_n) |
Execute the FFT. More... | |
int AILIA_API | ailiaAudioIFFT (void *dst, const void *src, int fft_n) |
Execute the IFFT. More... | |
int AILIA_API | ailiaAudioGetSpectrogram (void *dst, const void *src, int sample_n, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int norm_type) |
Generate the spectrogram from the audio signal. More... | |
int AILIA_API | ailiaAudioGetInverseSpectrogram (void *dst, const void *src, int frame_n, int freq_n, int hop_n, int win_n, int win_type, int max_sample_n, int center, int norm_type) |
Generate an audio signal from a complex spectrogram. More... | |
int AILIA_API | ailiaAudioGetFBMatrix (void *dst, const int freq_n, float f_min, float f_max, int mel_n, int sample_rate, int mel_norm, int mel_formula) |
Create a mel filter-bank. More... | |
int AILIA_API | ailiaAudioGetMelSpectrogram (void *dst, const void *src, int sample_n, int sample_rate, int fft_n, int hop_n, int win_n, int win_type, int max_frame_n, int center, float power, int fft_norm_type, float f_min, float f_max, int mel_n, int mel_norm_type, int mel_formula) |
Generate the mel spectrogram from the audio signal. More... | |
int AILIA_API | ailiaAudioMagPhase (void *dst_mag, void *dst_phase, const void *src, int freq_n, int frame_n, float power, int phase_form) |
Get the amplitude and the phase from the spectrogram. More... | |
int AILIA_API | ailiaAudioStandardize (void *dst, const void *src, const int src_n) |
Standardize a real signal. More... | |
int AILIA_API | ailiaAudioComplexNorm (void *dst, const void *src, const int src_n, float power) |
Get the norm of the complex signal. More... | |
int AILIA_API | ailiaAudioConvertToMel (void *dst, const void *src, const void *fb_mtrx, int freq_n, int frame_n, int mel_n) |
Convert the real output of the STFT to the mel scale. More... | |
int AILIA_API | ailiaAudioFixFrameLen (void *dst, const void *src, int freq_n, int dst_frame_n, int src_frame_n, float pad_data) |
Fix the number of time frames of a real-valued spectrogram/mel-spectrogram. More... | |
int AILIA_API | ailiaAudioResample (void *dst, const void *src, int dst_sample_rate, int dst_n, int src_sample_rate, int src_n) |
Resample the signal. More... | |
int AILIA_API | ailiaAudioGetResampleLen (int *dst_sample_n, int dst_sample_rate, int src_sample_n, int src_sample_rate) |
Get the number of samples after the resampling. More... | |
int AILIA_API | ailiaAudioLinerFilter (void *dst, const void *src, const void *n_coef, const void *d_coef, void *zi, int dst_n, int src_n, int n_coef_n, int d_coef_n, int zi_n) |
Apply a filter to the signal. More... | |
int AILIA_API | ailiaAudioGetLinerFilterZiCoef (void *dst_zi, const void *n_coef, const void *d_coef, int dst_n, int n_coef_n, int d_coef_n) |
Calculate the initial delay coefficients for filtering. More... | |
int AILIA_API | ailiaAudioFilterFilter (void *dst, const void *src, const void *n_coef, const void *d_coef, int dst_n, int src_n, int n_coef_n, int d_coef_n, int pad_type, int pad_len) |
Apply a zero-phase filter to the signal. More... | |
int AILIA_API | ailiaAudioGetNonSilentPos (int *dst_start_pos, int *dst_length, const void *src, int sample_n, int win_n, int hop_n, float thr_db) |
Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input. More... | |
audio processing library
#define AILIA_API |
#define AILIA_AUDIO_FFT_NORMALIZE_LIBROSA_COMPAT (1) |
Normalize the FFT output in a way compatible with librosa.
#define AILIA_AUDIO_FFT_NORMALIZE_NONE (0) |
Do not normalize the FFT output.
#define AILIA_AUDIO_FFT_NORMALIZE_PYTORCH_COMPAT (1) |
Normalize the FFT output in a way compatible with PyTorch.
#define AILIA_AUDIO_FFT_NORMALIZE_SCIPY_COMPAT (2) |
Normalize the FFT output in a way compatible with SciPy.
#define AILIA_AUDIO_FILTFILT_PAD_CONSTANT (3) |
During zero-phase filtering, pad using the edge value.
#define AILIA_AUDIO_FILTFILT_PAD_EVEN (2) |
During zero-phase filtering, pad with an even reflection (normal reflection)
#define AILIA_AUDIO_FILTFILT_PAD_NONE (0) |
During zero-phase filtering, do not pad.
#define AILIA_AUDIO_FILTFILT_PAD_ODD (1) |
During zero-phase filtering, pad with an odd reflection (substract the reflected values from two times the edge value)
#define AILIA_AUDIO_MEL_NORMALIZE_ENABLE (1) |
Normalize the output of the mel spectrogram.
#define AILIA_AUDIO_MEL_NORMALIZE_NONE (0) |
Do not normalize the output of the mel spectrogram.
#define AILIA_AUDIO_MEL_SCALE_FORMULA_HTK (1) |
Get the mel scale from the HTK formula (PyTorch compatible)
#define AILIA_AUDIO_MEL_SCALE_FORMULA_SLANYE (0) |
Get the mel scale from the Slanye's formula (compatible with the default of librosa)
#define AILIA_AUDIO_PHASE_FORM_COMPLEX (1) |
Output the phase in complex format (compatible with the default of librosa)
#define AILIA_AUDIO_PHASE_FORM_REAL (0) |
Output the phase in complex format (compatible with the default of PyTorch)
#define AILIA_AUDIO_STFT_CENTER_ENABLE (1) |
for the STFT, insert a padding (reflect) of fft_n/2 before and after the sample_n samples
#define AILIA_AUDIO_STFT_CENTER_NONE (0) |
for the STFT, do not insert padding before and after
#define AILIA_AUDIO_STFT_CENTER_SCIPY_DEFAULT (2) |
for the STFT, insert a padding (zeros) of fft_n/2 before and after the sample_n samples, and also pad at the end with zeros to process in units of hop_n
#define AILIA_AUDIO_WIN_TYPE_HAMMING (2) |
use a Hamming window function
#define AILIA_AUDIO_WIN_TYPE_HANN (1) |
use a Hann window function
int AILIA_API ailiaAudioComplexNorm | ( | void * | dst, |
const void * | src, | ||
const int | src_n, | ||
float | power | ||
) |
Get the norm of the complex signal.
dst | pointer to the output data, of float format, and of length src_n |
src | pointer to the input data, of float format, an array of length (2 * src_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (src_n, 2)) |
src_n | length of the input data |
power | exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram |
Compute the norm of the input data. For each src_cmp = src[0] + i * src[1], tmp_dst = pow(src[0],2.0) + pow(src[1],2.0) dst[0] = pow(tmp_dst,0.5*power);
int AILIA_API ailiaAudioConvertPowerToDB | ( | void * | dst, |
const void * | src, | ||
int | src_n, | ||
float | top_db | ||
) |
Convert non-negative input values to decibel scale.
dst | pointer to the output data, of float format, and of length src_n |
src | pointer to the input data, of float format, and of length src_n |
src_n | number of elements to be calculated |
top_db | float >= 0.0 |
Output compatible with librosa.power_to_db. dst = trimlow( 10 * log10(src / ref) ) where ref is the max of 1e-10 and of positive values of src, and trimlow(), if top_db > 0, trims all values inferior to (- top_db) and replaces them by (- top_db)), else, trimlow() does nothing.
int AILIA_API ailiaAudioConvertToMel | ( | void * | dst, |
const void * | src, | ||
const void * | fb_mtrx, | ||
int | freq_n, | ||
int | frame_n, | ||
int | mel_n | ||
) |
Convert the real output of the STFT to the mel scale.
dst | pointer to the output data, of float format, of length (mel_n * frame_n), and of memory layout (in row-major convention) (mel_n, frame_n). |
src | pointer to the input data, of float format, of length (freq_n * frame_n), and of memory layout (in row-major convention) (freq_n, frame_n). |
fb_mtrx | the mel filter-bank, of float format, of length (mel_n * freq_n), and of memory layout (in row-major convention) (mel_n, freq_n). |
freq_n | number of frequency indices |
frame_n | number of time frames in the input data |
mel_n | number of mel frequency indices |
Converts the real spectrogram given in input to the mel scale. The argument fb_mtrx can take the coefficients outputted by ailiaAudioGetFBMatrix() .
int AILIA_API ailiaAudioFFT | ( | void * | dst, |
const void * | src, | ||
int | fft_n | ||
) |
Execute the FFT.
dst | pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2). |
src | pointer to the input data, of float format, and of length fft_n |
fft_n | count of FFT values (i.e. of frequency bins) |
If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.
int AILIA_API ailiaAudioFilterFilter | ( | void * | dst, |
const void * | src, | ||
const void * | n_coef, | ||
const void * | d_coef, | ||
int | dst_n, | ||
int | src_n, | ||
int | n_coef_n, | ||
int | d_coef_n, | ||
int | pad_type, | ||
int | pad_len | ||
) |
Apply a zero-phase filter to the signal.
dst | pointer to the output data, of float format, and of length dst_n |
src | pointer to the input data, of float format, and of length src_n |
n_coef | pointer to the numerator coefficients of the filter, of float format, and length n_coef_n |
d_coef | pointer to the denominator coefficients of the filter, of float format, and length d_coef_n |
dst_n | length (in number of samples) reserved in the output buffer (dst_n >= src_n) |
src_n | number of samples in the input signal |
n_coef_n | number of numerator coefficients of the filter |
d_coef_n | number of denominator coefficients of the filter |
pad_type | type of padding to apply at the start and at the end of the input signal: any of the AILIA_AUDIO_FILTFILT_PAD_* constants |
pad_len | length of the padding applied to the start and to the end of the input signal |
The number of values written to the output dst is min(dst_m,src_n). The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.
int AILIA_API ailiaAudioFixFrameLen | ( | void * | dst, |
const void * | src, | ||
int | freq_n, | ||
int | dst_frame_n, | ||
int | src_frame_n, | ||
float | pad_data | ||
) |
Fix the number of time frames of a real-valued spectrogram/mel-spectrogram.
dst | pointer to the output data, of length (freq_n * dst_frame_n), and of memory layout (in row-major convention) (freq_n, dst_frame_n). |
src | pointer to the input data, of length (freq_n * src_frame_n), and of memory layout (in row-major convention) (freq_n, src_frame_n). |
freq_n | number of frequency indices |
dst_frame_n | number of time frames in the output data |
src_frame_n | number of time frames in the input data |
pad_data | value inserted for padding (used when dst_frame_n > src_frame_n) |
dst_frame_n > src_frame_n : missing time frames are added and filled with the value pad_data. dst_frame_n <= src_frame_n : only keeps the first dst_frame_n data.
int AILIA_API ailiaAudioGetFBMatrix | ( | void * | dst, |
const int | freq_n, | ||
float | f_min, | ||
float | f_max, | ||
int | mel_n, | ||
int | sample_rate, | ||
int | mel_norm, | ||
int | mel_formula | ||
) |
Create a mel filter-bank.
dst | pointer to the output data, of float format, and of length (mel_n * freq_n). (memory layout, using the row-major convention: (mel_n, freq_n)) |
freq_n | number of frequency indices for the FFT (1+fft_n/2) |
f_min | lowest frequency |
f_max | highest frequency |
mel_n | number of mel frequency bins in the output (< freq_n) |
sample_rate | sampling rate for the signal that will be inputted to this filter |
mel_norm | whether to normalize the output (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants |
mel_formula | mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants |
int AILIA_API ailiaAudioGetFrameLen | ( | int * | frame_n, |
int | sample_n, | ||
int | fft_n, | ||
int | hop_n, | ||
int | center | ||
) |
Get the number of frames generated by the STFT.
frame_n | pointer to the destination where to write the output (the number of frames) |
sample_n | count of samples on which the STFT is performed |
fft_n | size of the FFT at each frame (i.e. number of frequency bins at each frame) |
hop_n | stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output. |
center | any of the AILIA_AUDIO_STFT_CENTER_* constants |
Before executing the STFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, the sample_n samples are cut in packets of size hop_n, and no padding occurs before the first sample nor after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a reflection padding of length fft_n/n is performed before the first sample and after the last sample. If AILIA_AUDIO_STFT_CENTER_ENABLE is used, a zero padding of length fft_n/n is performed before the first sample and after the last sample, and moreover an additional zero padding is performed to ensure that the total length is a multiple of hop_n.
int AILIA_API ailiaAudioGetInverseSpectrogram | ( | void * | dst, |
const void * | src, | ||
int | frame_n, | ||
int | freq_n, | ||
int | hop_n, | ||
int | win_n, | ||
int | win_type, | ||
int | max_sample_n, | ||
int | center, | ||
int | norm_type | ||
) |
Generate an audio signal from a complex spectrogram.
dst | pointer to the output data, of float format, and of length sample_n |
src | pointer to the input data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. Memory layout, using the row-major convention: (freq_n, frame_n, 2). |
frame_n | number of time frames in the input data |
freq_n | number of frequencies bins for each time frame (freq_n = fft_n/2+1) |
hop_n | step size of the time frame increment (expressed in number of samples) for the inputted spectrogram. |
win_n | size of the window function |
win_type | type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants |
max_sample_n | maximum value of the sample index in the outputted data |
center | whether padding (before and after) was used or not (and its type) during the generation of the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants |
norm_type | normalization type that was used during the generation of the input data: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants |
For each time frame the normalization is executed at the end of the IFFT. Only accepts a complex spectrogram in input.
int AILIA_API ailiaAudioGetLinerFilterZiCoef | ( | void * | dst_zi, |
const void * | n_coef, | ||
const void * | d_coef, | ||
int | dst_n, | ||
int | n_coef_n, | ||
int | d_coef_n | ||
) |
Calculate the initial delay coefficients for filtering.
dst_zi | pointer to the output (initial delay coefficients), of float format, and of length dst_n (dst_n >= max(n_coef_n,d_coef_n)-1) |
n_coef | pointer to the numerator coefficients of the filter, of float format, and length n_coef_n |
d_coef | pointer to the denominator coefficients of the filter, of float format, and length d_coef_n |
dst_n | size, in number of samples, reserved in the output buffer (dst_n >= max(n_coef_n,d_coef_n)-1) |
n_coef_n | number of numerator coefficients of the filter |
d_coef_n | number of denominator coefficients of the filter |
These initial delay coefficients dst_zi, once multiplied with the early values of the signal, can be passed as initial delayed values, the zi argument, to ailiaAudioLinerFilter() . Of the dst_n reserved length of the output buffer, the length used is max(n_coef_n,d_coef_n)-1. If dst_n is less than that, only the corresponding first values are output. If dst_n is larger, the remaining is filled with 0. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.
int AILIA_API ailiaAudioGetMelSpectrogram | ( | void * | dst, |
const void * | src, | ||
int | sample_n, | ||
int | sample_rate, | ||
int | fft_n, | ||
int | hop_n, | ||
int | win_n, | ||
int | win_type, | ||
int | max_frame_n, | ||
int | center, | ||
float | power, | ||
int | fft_norm_type, | ||
float | f_min, | ||
float | f_max, | ||
int | mel_n, | ||
int | mel_norm_type, | ||
int | mel_formula | ||
) |
Generate the mel spectrogram from the audio signal.
dst | pointer to the output data, of float format, and of length (mel_n * frame_n) (with frame_n the number of time frames outputted). (memory layout, using the row-major convention: (mel_n, frame_n)) |
src | pointer to the input data, of float format, monoral PCM audio data. |
sample_n | count of samples in the input data |
sample_rate | sampling rate of the input signal |
fft_n | number of FFT components |
hop_n | stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram. |
win_n | size of the window function (in number of samples) |
win_type | type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants |
max_frame_n | maximum value of the time frame index in the outputted data |
center | whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants |
power | exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed. |
fft_norm_type | normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants |
f_min | lowest frequency |
f_max | highest frequency |
mel_n | number of mel frequency bins in the output (< freq_n) |
mel_norm | whether to normalize the mel spectrogram (and the type of the normalization): any of the AILIA_AUDIO_MEL_NORMALIZE_* constants |
mel_formula | mel scale format: any of the AILIA_AUDIO_MEL_SCALE_FORMULA_* constants |
For each time frame, the operations are processed in this order: FFT(STFT) -> normalization -> power exponentiation -> get the mel filter-bank coefficients -> convert to the mel scale. The output is real values, and its length is mel_n*frame_n (with frame_n the number of time frames outputted).
int AILIA_API ailiaAudioGetNonSilentPos | ( | int * | dst_start_pos, |
int * | dst_length, | ||
const void * | src, | ||
int | sample_n, | ||
int | win_n, | ||
int | hop_n, | ||
float | thr_db | ||
) |
Find the region of the signal between the first and the last non-silence samples. Detects the area excluding the silent range before and after the signal input.
dst_start_pos | pointer to the destination where to write the outputted start position of the non-silence area, of int format |
dst_length | pointer to the destination where to write the outputted length of the non-silence area, of int format |
src | pointer to the input data, of float format, and of length sample_n |
sample_n | count of samples in the input data |
win_n | size of the window function |
hop_n | stride of each window shift (in number of samples) |
thr_db | threshold (in dB) above which the signal is considered non-silence (thr_db > 0) |
In case the whole signal is considered silence, the following happens: *dst_start_pos = -1, *dst_length = 0
int AILIA_API ailiaAudioGetResampleLen | ( | int * | dst_sample_n, |
int | dst_sample_rate, | ||
int | src_sample_n, | ||
int | src_sample_rate | ||
) |
Get the number of samples after the resampling.
dst_sample_n | pointer to the destination where to write the output (the number of samples after resampling) |
dst_sample_rate | sampling rate after the resampling |
src_sample_n | number of samples in the input signal |
src_sample_rate | sampling rate of the input signal |
int AILIA_API ailiaAudioGetSampleLen | ( | int * | sample_n, |
int | frame_n, | ||
int | freq_n, | ||
int | hop_n, | ||
int | center | ||
) |
Get the number of samples generated by the ISTFT.
sample_n | pointer to the destination where to write the output (the number of samples) |
frame_n | length of the STFT data, expressed in number of frames |
fft_n | size of the FFT at each frame (i.e. number of frequency bins at each frame) |
hop_n | stride of each window shift (in number of samples). This is the quantum of time for the time axis of the STFT output. |
center | any of the AILIA_AUDIO_STFT_CENTER_* constants |
Before executing the ISTFT, use this function to determine the space required for the output buffer. If AILIA_AUDIO_STFT_CENTER_NONE is used, no truncation is performed at the beginning nor at the end. If AILIA_AUDIO_STFT_CENTER_NONE is not used, a truncation is performed at the beginning and at the end.
int AILIA_API ailiaAudioGetSpectrogram | ( | void * | dst, |
const void * | src, | ||
int | sample_n, | ||
int | fft_n, | ||
int | hop_n, | ||
int | win_n, | ||
int | win_type, | ||
int | max_frame_n, | ||
int | center, | ||
float | power, | ||
int | norm_type | ||
) |
Generate the spectrogram from the audio signal.
dst | pointer to the output data, of float format, of length (2 * freq_n * frame_n), and which memory layout is a sequence of pairs [real part, imaginary part]. (where freq_n = fft_n/2+1). Memory layout, using the row-major convention: (freq_n, frame_n, 2). |
src | pointer to the input data, of float format, and of length sample_n |
sample_n | count of samples in the input data |
fft_n | size of the FFT at each frame (i.e. number of frequency bins at each frame) |
hop_n | stride of each window shift (in number of samples). This is the size of the time increment for the spectrogram. |
win_n | size of the window function |
win_type | type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants |
max_frame_n | maximum value of the time frame index in the outputted data |
center | whether to pad or not (and the type of padding) before and after the input data: any of the AILIA_AUDIO_STFT_CENTER_* constants |
power | exponent to apply to the spectrogram (> = 0.0). A special case is for 0.0: complex spectrogram. For other cases the amplitude is just exponentiated accordingly: 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed. |
norm_type | normalization after the FFT: any of the AILIA_AUDIO_FFT_NORMALIZE_* constants |
For each time frame, the operations are processed in this order: FFT -> normalization -> power exponentiation. As the output data alternates real and imaginary parts, its length is 2*(fft_n/2+1)*frame_n. (where frame_n is the number of time frames outputted) When the power argument is a non-zero value, all the complex parts are set to 0 in the output.
int AILIA_API ailiaAudioGetWindow | ( | void * | dst, |
int | window_n, | ||
int | win_type | ||
) |
Get the window function.
dst | pointer to the output data, of float format, and of length window_n |
window_n | length of the window (in number of samples) |
win_type | type of the window function: any of the AILIA_AUDIO_WIN_TYPE_* constants |
Only the Hann and the Hamming window functions are supported.
int AILIA_API ailiaAudioIFFT | ( | void * | dst, |
const void * | src, | ||
int | fft_n | ||
) |
Execute the IFFT.
dst | pointer to the output data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2). |
src | pointer to the input data, of float format, of length 2*fft_n, and which memory layout is a sequence of fft_n pairs [real part, imaginary part]. Memory layout, using the row-major convention: (fft_n, 2). |
fft_n | count of FFT values (i.e. of frequency bins) |
If fft_n is a power of 2, this function uses a faster algorithm. As the output data alternates real and imaginary parts, its length is 2*fft_n.
int AILIA_API ailiaAudioLinerFilter | ( | void * | dst, |
const void * | src, | ||
const void * | n_coef, | ||
const void * | d_coef, | ||
void * | zi, | ||
int | dst_n, | ||
int | src_n, | ||
int | n_coef_n, | ||
int | d_coef_n, | ||
int | zi_n | ||
) |
Apply a filter to the signal.
dst | pointer to the output data, of float format, and of length dst_n |
src | pointer to the input data, of float format, and of length src_n |
n_coef | pointer to the numerator coefficients of the filter, of float format, and length n_coef_n |
d_coef | pointer to the denominator coefficients of the filter, of float format, and length d_coef_n |
zi | pointer to the initial delayed values to be used, of float format, and of length zi_n (zi_n = max(n_coef_n,d_coef_n)-1). nullptr is allowed. |
dst_n | size, in number of samples, reserved in the output buffer (dst_n >= src_n) |
src_n | number of samples in the input signal |
n_coef_n | number of numerator coefficients of the filter |
d_coef_n | number of denominator coefficients of the filter |
zi_n | number of initial delayed values provided (zi_n >= max(n_coef_n,d_coef_n)-1) |
The number of samples outputted to dst is min(dst_m,src_n). Use zi to provide the initial delayed values. During processing, this array is overriden with the new delayed values. Out of the zi_n, the number of delayed values used is max(n_coef_n,d_coef_n)-1. If there are less than that, the remaining is assumed to be zeros, and the array zi is not updated with the new values. When zi is nullptr, zi_n is ignored, all the delayed values are assumed to be zero, and the new delayed values are not returned. The largest of n_coef_n and d_coef_n is taken as reference and zeros are added for padding where necessary.
int AILIA_API ailiaAudioLog1p | ( | void * | dst, |
const void * | src, | ||
int | src_n | ||
) |
Convert the input values to a logarithmic scale.
dst | pointer to the output data, of float format, and of length src_n |
src | pointer to the input data, of float format, and of length src_n |
src_n | number of elements to be calculated |
dst = log_e(1.0 + src)
int AILIA_API ailiaAudioMagPhase | ( | void * | dst_mag, |
void * | dst_phase, | ||
const void * | src, | ||
int | freq_n, | ||
int | frame_n, | ||
float | power, | ||
int | phase_form | ||
) |
Get the amplitude and the phase from the spectrogram.
dst_mag | pointer to the outputted amplitudes, an array of length (freq_n * frame_n). (memory layout, using the row-major convention: (freq_n, frame_n)) |
dst_phase | pointer to the outputted phases, an array of length (2 * freq_n * frame_n) (sequence of complex pairs [real part, imaginary part]). (memory layout, using the row-major convention: (freq_n, frame_n, 2)) |
src | pointer to the input data, of length (2 * frame_n * freq_n) (a sequence of complex pairs [real, imaginary]). (memory layout, using the row-major convention: (frame_n, freq_n, 2)) |
freq_n | number of frequency indices |
frame_n | number of time frames |
power | exponent to apply to the spectrogram (> 0.0). 1.0: amplitude spectrogram, 2.0: power spectrogram, etc, any other positive exponent value is allowed. |
phase_form | format of the outputted phase: any of the AILIA_AUDIO_PHASE_FORM_* constants |
To be compatible with librosa, use: phase_form = AILIA_AUDIO_PHASE_FORM_COMPLEX , power = 1.0 To be compatible with PyTorch, use: phase_form = AILIA_AUDIO_PHASE_FORM_REAL , power = 1.0 The dst_phase output depends on phase_form:
int AILIA_API ailiaAudioResample | ( | void * | dst, |
const void * | src, | ||
int | dst_sample_rate, | ||
int | dst_n, | ||
int | src_sample_rate, | ||
int | src_n | ||
) |
Resample the signal.
dst | pointer to the output data, of float format, and of length dst_n |
src | pointer to the input data, of float format, and of length src_n |
dst_sample_rate | sampling rate after the resampling |
dst_n | length (in number of samples) reserved in the output buffer(dst_n >= max_resample_n) |
src_sample_rate | sampling rate of the input signal |
src_n | number of samples in the input signal |
The max number of samples in the output, max_resample_n, can be obtained from ailiaAudioGetResampleLen() . dst_n < max_resample_n : only the first dst_n samples are outputted dst_n >= max_resample_n : max_resample_n samples are outputted
int AILIA_API ailiaAudioStandardize | ( | void * | dst, |
const void * | src, | ||
const int | src_n | ||
) |
Standardize a real signal.
dst | pointer to the output data, of float format, and of length src_n |
src | pointer to the input data, of float format, and of length src_n |
src_n | length of the input data |
Standardize the input data so that its average value becomes 0 and its variance 1. dst = (src - mean(src)) / std(src)