ailia_speech  1.3.0.0
Classes | Macros | Typedefs | Functions
ailia_speech.h File Reference
#include "ailia.h"
#include "ailia_tokenizer.h"
Include dependency graph for ailia_speech.h:

Go to the source code of this file.

Classes

struct  _AILIASpeechApiCallback
 
struct  _AILIASpeechText
 

Macros

#define AILIA_API   __stdcall
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY   (0)
 Whisper Tiny model. More...
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_BASE   (1)
 Whisper Base model. More...
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL   (2)
 Whisper Small model. More...
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_MEDIUM   (3)
 Whisper Medium model. More...
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE   (4)
 Whisper Large model. More...
 
#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3   (5)
 Whisper Large V3 model. More...
 
#define AILIA_SPEECH_TASK_TRANSCRIBE   (0)
 Transcribe mode. More...
 
#define AILIA_SPEECH_TASK_TRANSLATE   (1)
 Translate mode. More...
 
#define AILIA_SPEECH_CONSTRAINT_CHARACTERS   (0)
 Constraint by characters. More...
 
#define AILIA_SPEECH_CONSTRAINT_WORDS   (1)
 Constraint by words. Separate words with commas. More...
 
#define AILIA_SPEECH_FLAG_NONE   (0)
 Default flag. More...
 
#define AILIA_SPEECH_FLAG_LIVE   (1)
 Enable live transcribe mode. More...
 
#define AILIA_SPEECH_VAD_TYPE_SILERO   (0)
 SileroVAD. More...
 
#define AILIA_SPEECH_DICTIONARY_TYPE_REPLACE   (0)
 Dictionary for replace. More...
 
#define AILIA_SPEECH_POST_PROCESS_TYPE_T5   (0)
 T5. More...
 
#define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_EN_JA   (1)
 FuguMT EN JA. More...
 
#define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_JA_EN   (2)
 
#define AILIA_SPEECH_USER_API
 
#define AILIA_SPEECH_API_CALLBACK_VERSION   (6)
 Struct version. More...
 
#define AILIA_SPEECH_TEXT_VERSION   (2)
 Struct version. More...
 
#define ailiaSpeechOpenModelFile   ailiaSpeechOpenModelFileA
 
#define ailiaSpeechOpenVadFile   ailiaSpeechOpenVadFileA
 
#define ailiaSpeechOpenDictionaryFile   ailiaSpeechOpenDictionaryFileA
 
#define ailiaSpeechOpenPostProcessFile   ailiaSpeechOpenPostProcessFileA
 

Typedefs

typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_AUDIO_GET_FRAME_LEN) (int *, int, int, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM) (void *, const void *, int, int, int, int, int, int, int, int, float, int, float, float, int, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE) (void *, const void *, int, int, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_AUDIO_GET_RESAMPLE_LEN) (int *, int, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE) (struct AILIATokenizer **, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A) (struct AILIATokenizer *, const char *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W) (struct AILIATokenizer *, const wchar_t *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE) (struct AILIATokenizer *, const char *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT) (struct AILIATokenizer *, unsigned int *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS) (struct AILIATokenizer *, int *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE) (struct AILIATokenizer *, const int *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH) (struct AILIATokenizer *, unsigned int *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT) (struct AILIATokenizer *, char *, unsigned int)
 
typedef void(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_DESTROY) (struct AILIATokenizer *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32) (unsigned int *, unsigned int *, const char *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8) (char *, unsigned int *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_CREATE) (struct AILIANetwork **, int, int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A) (struct AILIANetwork *, const char *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W) (struct AILIANetwork *, const wchar_t *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM) (struct AILIANetwork *, const void *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE) (struct AILIANetwork *, unsigned int)
 
typedef void(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_DESTROY) (struct AILIANetwork *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_UPDATE) (struct AILIANetwork *)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX) (struct AILIANetwork *, unsigned int *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX) (struct AILIANetwork *, unsigned int *, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA) (struct AILIANetwork *, void *, unsigned int, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA) (struct AILIANetwork *, const void *, unsigned int, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE) (struct AILIANetwork *, const AILIAShape *, unsigned int, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE) (struct AILIANetwork *, AILIAShape *, unsigned int, unsigned int)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA) (struct AILIANetwork *dst_net, unsigned int dst_blob_idx, struct AILIANetwork *src_net, unsigned int src_blob_idx)
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT) (AILIAEnvironment **env, unsigned int env_idx, unsigned int version)
 
typedef struct _AILIASpeechApiCallback AILIASpeechApiCallback
 
typedef int(AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK) (void *handle, const char *text)
 Notify the status during the inference. More...
 
typedef struct _AILIASpeechText AILIASpeechText
 

Functions

const typedef char *AILIA_SPEECH_USER_APIAILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL (struct AILIANetwork *)
 
int AILIA_API ailiaSpeechCreate (struct AILIASpeech **net, int env_id, int num_thread, int memory_mode, int task, int flags, AILIASpeechApiCallback callback, int version)
 Creates a network instance. More...
 
int AILIA_API ailiaSpeechOpenModelFileA (struct AILIASpeech *net, const char *encoder_path, const char *decoder_path, int model_type)
 Set models into a network instance. More...
 
int AILIA_API ailiaSpeechOpenModelFileW (struct AILIASpeech *net, const wchar_t *encoder_path, const wchar_t *decoder_path, int model_type)
 Set models into a network instance. More...
 
int AILIA_API ailiaSpeechOpenVadFileA (struct AILIASpeech *net, const char *vad_path, int vad_type)
 Set vad model for voice activity detection. More...
 
int AILIA_API ailiaSpeechOpenVadFileW (struct AILIASpeech *net, const wchar_t *vad_path, int vad_type)
 Set vad model for voice activity detection. More...
 
int AILIA_API ailiaSpeechOpenDictionaryFileA (struct AILIASpeech *net, const char *dictionary_path, int dictionary_type)
 Set dictionary for error correction. More...
 
int AILIA_API ailiaSpeechOpenDictionaryFileW (struct AILIASpeech *net, const wchar_t *dictionary_path, int dictionary_type)
 Set dictionary for error correction. More...
 
int AILIA_API ailiaSpeechOpenPostProcessFileA (struct AILIASpeech *net, const char *encoder_path, const char *decoder_path, const char *source_path, const char *target_path, const char *prefix, int post_process_type)
 Set AI model for post process (MBSC) More...
 
int AILIA_API ailiaSpeechOpenPostProcessFileW (struct AILIASpeech *net, const wchar_t *encoder_path, const wchar_t *decoder_path, const wchar_t *source_path, const wchar_t *target_path, const char *prefix, int post_process_type)
 Set AI model for post process (UTF16) More...
 
int AILIA_API ailiaSpeechPushInputData (struct AILIASpeech *net, const float *src, unsigned int channels, unsigned int samples, unsigned int sampling_rate)
 Push PCM data to queue. More...
 
int AILIA_API ailiaSpeechFinalizeInputData (struct AILIASpeech *net)
 Finalize input PCM data to queue. More...
 
int AILIA_API ailiaSpeechBuffered (struct AILIASpeech *net, unsigned int *buffered)
 Determines if there is enough data to perform speech recognition. More...
 
int AILIA_API ailiaSpeechComplete (struct AILIASpeech *net, unsigned int *complete)
 Determines whether all data has been processed. More...
 
int AILIA_API ailiaSpeechSetPrompt (struct AILIASpeech *net, const char *prompt)
 Set prompt. More...
 
int AILIA_API ailiaSpeechSetConstraint (struct AILIASpeech *net, const char *constraint, int type)
 Set constraint. More...
 
int AILIA_API ailiaSpeechSetLanguage (struct AILIASpeech *net, const char *language)
 Set language. More...
 
int AILIA_API ailiaSpeechSetSilentThreshold (struct AILIASpeech *net, float silent_threshold, float speech_sec, float no_speech_sec)
 Set silent threshold. More...
 
int AILIA_API ailiaSpeechSetIntermediateCallback (struct AILIASpeech *net, AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK callback, void *handle)
 Set a callback to get intermediate results of recognition. More...
 
int AILIA_API ailiaSpeechTranscribe (struct AILIASpeech *net)
 Speech recognition. More...
 
int AILIA_API ailiaSpeechPostProcess (struct AILIASpeech *net)
 Execute post process. More...
 
int AILIA_API ailiaSpeechGetTextCount (struct AILIASpeech *net, unsigned int *count)
 Get recognized text count. More...
 
int AILIA_API ailiaSpeechGetText (struct AILIASpeech *net, AILIASpeechText *text, unsigned int version, unsigned int idx)
 Get recognized text. More...
 
int AILIA_API ailiaSpeechSetText (struct AILIASpeech *net, const AILIASpeechText *text, unsigned int version, unsigned int idx)
 Set postprocess text. More...
 
void AILIA_API ailiaSpeechDestroy (struct AILIASpeech *net)
 It destroys the network instance. More...
 
int AILIA_API ailiaSpeechResetTranscribeState (struct AILIASpeech *net)
 It resets the network instance. More...
 
const char *AILIA_API ailiaSpeechGetErrorDetail (struct AILIASpeech *net)
 Returns the details of errors. More...
 

Macro Definition Documentation

◆ AILIA_API

#define AILIA_API   __stdcall

◆ AILIA_SPEECH_API_CALLBACK_VERSION

#define AILIA_SPEECH_API_CALLBACK_VERSION   (6)

Struct version.

◆ AILIA_SPEECH_CONSTRAINT_CHARACTERS

#define AILIA_SPEECH_CONSTRAINT_CHARACTERS   (0)

Constraint by characters.

◆ AILIA_SPEECH_CONSTRAINT_WORDS

#define AILIA_SPEECH_CONSTRAINT_WORDS   (1)

Constraint by words. Separate words with commas.

◆ AILIA_SPEECH_DICTIONARY_TYPE_REPLACE

#define AILIA_SPEECH_DICTIONARY_TYPE_REPLACE   (0)

Dictionary for replace.

◆ AILIA_SPEECH_FLAG_LIVE

#define AILIA_SPEECH_FLAG_LIVE   (1)

Enable live transcribe mode.

◆ AILIA_SPEECH_FLAG_NONE

#define AILIA_SPEECH_FLAG_NONE   (0)

Default flag.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_BASE

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_BASE   (1)

Whisper Base model.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE   (4)

Whisper Large model.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_LARGE_V3   (5)

Whisper Large V3 model.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_MEDIUM

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_MEDIUM   (3)

Whisper Medium model.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_SMALL   (2)

Whisper Small model.

◆ AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY

#define AILIA_SPEECH_MODEL_TYPE_WHISPER_MULTILINGUAL_TINY   (0)

Whisper Tiny model.

◆ AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_EN_JA

#define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_EN_JA   (1)

FuguMT EN JA.

FuguMT JA EN.

The maximum token length is 384. If exceeded, it will be stripped.

The maximum token length is 512. If exceeded, it will be stripped.

◆ AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_JA_EN

#define AILIA_SPEECH_POST_PROCESS_TYPE_FUGUMT_JA_EN   (2)

◆ AILIA_SPEECH_POST_PROCESS_TYPE_T5

#define AILIA_SPEECH_POST_PROCESS_TYPE_T5   (0)

T5.

◆ AILIA_SPEECH_TASK_TRANSCRIBE

#define AILIA_SPEECH_TASK_TRANSCRIBE   (0)

Transcribe mode.

◆ AILIA_SPEECH_TASK_TRANSLATE

#define AILIA_SPEECH_TASK_TRANSLATE   (1)

Translate mode.

◆ AILIA_SPEECH_TEXT_VERSION

#define AILIA_SPEECH_TEXT_VERSION   (2)

Struct version.

◆ AILIA_SPEECH_USER_API

#define AILIA_SPEECH_USER_API

◆ AILIA_SPEECH_VAD_TYPE_SILERO

#define AILIA_SPEECH_VAD_TYPE_SILERO   (0)

SileroVAD.

◆ ailiaSpeechOpenDictionaryFile

#define ailiaSpeechOpenDictionaryFile   ailiaSpeechOpenDictionaryFileA

◆ ailiaSpeechOpenModelFile

#define ailiaSpeechOpenModelFile   ailiaSpeechOpenModelFileA

◆ ailiaSpeechOpenPostProcessFile

#define ailiaSpeechOpenPostProcessFile   ailiaSpeechOpenPostProcessFileA

◆ ailiaSpeechOpenVadFile

#define ailiaSpeechOpenVadFile   ailiaSpeechOpenVadFileA

Typedef Documentation

◆ AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_FRAME_LEN

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_FRAME_LEN) (int *, int, int, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_MEL_SPECTROGRAM) (void *, const void *, int, int, int, int, int, int, int, int, float, int, float, float, int, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_RESAMPLE_LEN

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_GET_RESAMPLE_LEN) (int *, int, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_AUDIO_RESAMPLE) (void *, const void *, int, int, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_COPY_BLOB_DATA) (struct AILIANetwork *dst_net, unsigned int dst_blob_idx, struct AILIANetwork *src_net, unsigned int src_blob_idx)

◆ AILIA_SPEECH_USER_API_AILIA_CREATE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_CREATE) (struct AILIANetwork **, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_DESTROY

typedef void(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_DESTROY) (struct AILIANetwork *)

◆ AILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_DATA) (struct AILIANetwork *, void *, unsigned int, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_INPUT_INDEX) (struct AILIANetwork *, unsigned int *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_INDEX_BY_OUTPUT_INDEX) (struct AILIANetwork *, unsigned int *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_BLOB_SHAPE) (struct AILIANetwork *, AILIAShape *, unsigned int, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_ENVIRONMENT) (AILIAEnvironment **env, unsigned int env_idx, unsigned int version)

◆ AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_A) (struct AILIANetwork *, const char *)

◆ AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_FILE_W) (struct AILIANetwork *, const wchar_t *)

◆ AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_OPEN_WEIGHT_MEM) (struct AILIANetwork *, const void *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_DATA) (struct AILIANetwork *, const void *, unsigned int, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_INPUT_BLOB_SHAPE) (struct AILIANetwork *, const AILIAShape *, unsigned int, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_SET_MEMORY_MODE) (struct AILIANetwork *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_CREATE) (struct AILIATokenizer **, int, int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DECODE) (struct AILIATokenizer *, const int *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DESTROY

typedef void(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_DESTROY) (struct AILIATokenizer *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_ENCODE) (struct AILIATokenizer *, const char *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT) (struct AILIATokenizer *, char *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TEXT_LENGTH) (struct AILIATokenizer *, unsigned int *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKEN_COUNT) (struct AILIATokenizer *, unsigned int *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_GET_TOKENS) (struct AILIATokenizer *, int *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_A) (struct AILIATokenizer *, const char *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_OPEN_MODEL_FILE_W) (struct AILIATokenizer *, const wchar_t *)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF32_TO_UTF8) (char *, unsigned int *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_TOKENIZER_UTF8_TO_UTF32) (unsigned int *, unsigned int *, const char *, unsigned int)

◆ AILIA_SPEECH_USER_API_AILIA_UPDATE

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_UPDATE) (struct AILIANetwork *)

◆ AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK

typedef int(AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK) (void *handle, const char *text)

Notify the status during the inference.

Parameters
handlehandle specified in ailiaSpeechSetIntermediateCallback
texttext in the middle of reasoning
Returns
Return 0 to continue, non-zero to abort inference.

◆ AILIASpeechApiCallback

◆ AILIASpeechText

Function Documentation

◆ AILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL()

const typedef char* AILIA_SPEECH_USER_API* AILIA_SPEECH_USER_API_AILIA_GET_ERROR_DETAIL ( struct AILIANetwork *  )

◆ ailiaSpeechBuffered()

int AILIA_API ailiaSpeechBuffered ( struct AILIASpeech *  net,
unsigned int *  buffered 
)

Determines if there is enough data to perform speech recognition.

Parameters
netA network instance pointer
bufferedIs data exist (1 : Exist, 0 : Not Exist)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechComplete()

int AILIA_API ailiaSpeechComplete ( struct AILIASpeech *  net,
unsigned int *  complete 
)

Determines whether all data has been processed.

Parameters
netA network instance pointer
completeIs processing complete (1 : Complete, 0 : Not Complete)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechCreate()

int AILIA_API ailiaSpeechCreate ( struct AILIASpeech **  net,
int  env_id,
int  num_thread,
int  memory_mode,
int  task,
int  flags,
AILIASpeechApiCallback  callback,
int  version 
)

Creates a network instance.

Parameters
netA pointer to the network instance pointer
env_idThe ID of the inference backend used for computation (obtained by ailiaGetEnvironment() ). It is selected automatically if AILIA_ENVIRONMENT_ID_AUTO is specified.
num_threadThe upper limit on the number of threads (It is set automatically if AILIA_MULTITHREAD_AUTO
memory_modeThe memory mode (AILIA_MEMORY_MODE_*)
taskAILIA_SPEECH_TASK_*
flagOR of AILIA_SPEECH_FLAG_*
api_callbackThe callback for ailia API
versionAILIA_SPEECH_API_CALLBACK_VERSION
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

Creates a network instance.

◆ ailiaSpeechDestroy()

void AILIA_API ailiaSpeechDestroy ( struct AILIASpeech *  net)

It destroys the network instance.

Parameters
netA network instance pointer

◆ ailiaSpeechFinalizeInputData()

int AILIA_API ailiaSpeechFinalizeInputData ( struct AILIASpeech *  net)

Finalize input PCM data to queue.

Parameters
netA network instance pointer
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

By signaling the end of the audio file, ailiaSpeechBuffered will return 1 even if 30 seconds worth of data does not exist. You must call ailiaSpeechResetTranscribeState after executing ailiaSpeechFinalizeInputData and before executing ailiaSpeechPushInputData.

◆ ailiaSpeechGetErrorDetail()

const char* AILIA_API ailiaSpeechGetErrorDetail ( struct AILIASpeech *  net)

Returns the details of errors.

Parameters
netThe network instance pointer
Returns
Error details

The return value does not have to be released. The string is valid until the next ailiaSpeech API function is called. If model is encrypted, this function returns empty string.

◆ ailiaSpeechGetText()

int AILIA_API ailiaSpeechGetText ( struct AILIASpeech *  net,
AILIASpeechText text,
unsigned int  version,
unsigned int  idx 
)

Get recognized text.

Parameters
netA network instance pointer
textText
versionAILIA_SPEECH_TEXT_VERSION
idxText index
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

The return value does not have to be released. The string is valid until the next ailiaSpeech API function is called.

◆ ailiaSpeechGetTextCount()

int AILIA_API ailiaSpeechGetTextCount ( struct AILIASpeech *  net,
unsigned int *  count 
)

Get recognized text count.

Parameters
netA network instance pointer
countText count
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenDictionaryFileA()

int AILIA_API ailiaSpeechOpenDictionaryFileA ( struct AILIASpeech *  net,
const char *  dictionary_path,
int  dictionary_type 
)

Set dictionary for error correction.

Parameters
netA network instance pointer
dictionary_pathThe path name to the dictionary file (MBSC)
dictionary_typeAILIA_SPEECH_DICTIONARY_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenDictionaryFileW()

int AILIA_API ailiaSpeechOpenDictionaryFileW ( struct AILIASpeech *  net,
const wchar_t *  dictionary_path,
int  dictionary_type 
)

Set dictionary for error correction.

Parameters
netA network instance pointer
dictionary_pathThe path name to the dictionary file (UTF16)
dictionary_typeAILIA_SPEECH_DICTIONARY_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenModelFileA()

int AILIA_API ailiaSpeechOpenModelFileA ( struct AILIASpeech *  net,
const char *  encoder_path,
const char *  decoder_path,
int  model_type 
)

Set models into a network instance.

Parameters
netA network instance pointer
encoder_pathThe path name to the onnx file (MBSC)
decoder_pathThe path name to the onnx file (MBSC)
model_typeAILIA_SPEECH_MODEL_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenModelFileW()

int AILIA_API ailiaSpeechOpenModelFileW ( struct AILIASpeech *  net,
const wchar_t *  encoder_path,
const wchar_t *  decoder_path,
int  model_type 
)

Set models into a network instance.

Parameters
netA network instance pointer
encoder_pathThe path name to the onnx file (UTF16)
decoder_pathThe path name to the onnx file (UTF16)
model_typeAILIA_SPEECH_MODEL_TYPE_*
taskAILIA_SPEECH_TASK_*
flagOR of AILIA_SPEECH_FLAG_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenPostProcessFileA()

int AILIA_API ailiaSpeechOpenPostProcessFileA ( struct AILIASpeech *  net,
const char *  encoder_path,
const char *  decoder_path,
const char *  source_path,
const char *  target_path,
const char *  prefix,
int  post_process_type 
)

Set AI model for post process (MBSC)

Parameters
netA network instance pointer
encoder_pathThe path name to the onnx file (MBSC)
decoder_pathThe path name to the onnx file (MBSC)
source_pathThe path name to the tokenizer model file (MBSC)
target_pathThe path name to the tokenizer model file (MBSC)
prefixThe prefix of T5 (UTF8), NULL for FuguMT
post_process_typeAILIA_SPEECH_POST_PROCESS_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenPostProcessFileW()

int AILIA_API ailiaSpeechOpenPostProcessFileW ( struct AILIASpeech *  net,
const wchar_t *  encoder_path,
const wchar_t *  decoder_path,
const wchar_t *  source_path,
const wchar_t *  target_path,
const char *  prefix,
int  post_process_type 
)

Set AI model for post process (UTF16)

Parameters
netA network instance pointer
encoder_pathThe path name to the onnx file (UTF16)
decoder_pathThe path name to the onnx file (UTF16)
source_pathThe path name to the tokenizer model file (UTF16)
target_pathThe path name to the tokenizer model file (UTF16)
prefixThe prefix of T5 (UTF8), NULL for FuguMT
post_process_typeAILIA_SPEECH_POST_PROCESS_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenVadFileA()

int AILIA_API ailiaSpeechOpenVadFileA ( struct AILIASpeech *  net,
const char *  vad_path,
int  vad_type 
)

Set vad model for voice activity detection.

Parameters
netA network instance pointer
vad_pathThe path name to the onnx file (MBSC)
vad_typeAILIA_SPEECH_VAD_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechOpenVadFileW()

int AILIA_API ailiaSpeechOpenVadFileW ( struct AILIASpeech *  net,
const wchar_t *  vad_path,
int  vad_type 
)

Set vad model for voice activity detection.

Parameters
netA network instance pointer
vad_pathThe path name to the onnx file (UTF16)
vad_typeAILIA_SPEECH_VAD_TYPE_*
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechPostProcess()

int AILIA_API ailiaSpeechPostProcess ( struct AILIASpeech *  net)

Execute post process.

Parameters
netA network instance pointer
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

Get the recognition result with ailiaSpeechGetText API.

◆ ailiaSpeechPushInputData()

int AILIA_API ailiaSpeechPushInputData ( struct AILIASpeech *  net,
const float *  src,
unsigned int  channels,
unsigned int  samples,
unsigned int  sampling_rate 
)

Push PCM data to queue.

Parameters
netA network instance pointer
srcThe input pcm data (channel interleave, LRLR order for stereo, -1.0 to 1.0 pcm range)
channelsThe number of pcm channels
samplesThe number of pcm samples per channel
sampling_rateThe sampling rate (Hz)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechResetTranscribeState()

int AILIA_API ailiaSpeechResetTranscribeState ( struct AILIASpeech *  net)

It resets the network instance.

Parameters
netA network instance pointer
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

By calling this API, the internal state such as the previous decoding result is initialized. There is no need to reopen the model after calling this API. The states of ailiaSpeechOpenModelFile, ailiaSpeechSetIntermediateCallback, ailiaSpeechSetLanguage, ailiaSpeechSetSilentThreshold, ailiaSpeechSetPrompt are preserved.

◆ ailiaSpeechSetConstraint()

int AILIA_API ailiaSpeechSetConstraint ( struct AILIASpeech *  net,
const char *  constraint,
int  type 
)

Set constraint.

Parameters
netA network instance pointer
constraintThe text of constraint (UTF8)
typeThe type of constraint (AILIA_SPEECH_CONSTRAINT_*)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechSetIntermediateCallback()

int AILIA_API ailiaSpeechSetIntermediateCallback ( struct AILIASpeech *  net,
AILIA_SPEECH_USER_API_INTERMEDIATE_CALLBACK  callback,
void *  handle 
)

Set a callback to get intermediate results of recognition.

Parameters
netA network instance pointer
callbackcallback
handlehandle for callback
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

The return value does not have to be released. The string is valid until the next ailiaSpeech API function is called.

◆ ailiaSpeechSetLanguage()

int AILIA_API ailiaSpeechSetLanguage ( struct AILIASpeech *  net,
const char *  language 
)

Set language.

Parameters
netA network instance pointer
languageLanguage code (en, ja, etc)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

If you set "auto" to language, language will automatically detected.

◆ ailiaSpeechSetPrompt()

int AILIA_API ailiaSpeechSetPrompt ( struct AILIASpeech *  net,
const char *  prompt 
)

Set prompt.

Parameters
netA network instance pointer
promptThe text of prompt (UTF8)
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

◆ ailiaSpeechSetSilentThreshold()

int AILIA_API ailiaSpeechSetSilentThreshold ( struct AILIASpeech *  net,
float  silent_threshold,
float  speech_sec,
float  no_speech_sec 
)

Set silent threshold.

Parameters
netA network instance pointer
silent_thresholdvolume threshold
speech_secspeech time
no_speech_secno_speech time
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

If there are more than a certain number of sounded sections, and if the silent section lasts for a certain amount of time or more, the remaining buffer is processed without waiting for 30 seconds.

◆ ailiaSpeechSetText()

int AILIA_API ailiaSpeechSetText ( struct AILIASpeech *  net,
const AILIASpeechText text,
unsigned int  version,
unsigned int  idx 
)

Set postprocess text.

Parameters
netA network instance pointer
textText
versionAILIA_SPEECH_TEXT_VERSION
idxText index
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

Used when using only post-processing without using speech recognition. Since the string is copied to the internal buffer, it can be released after the call. If idx is larger than ailiaSpeechGetTextCount, the internal buffer will be automatically expanded.

◆ ailiaSpeechTranscribe()

int AILIA_API ailiaSpeechTranscribe ( struct AILIASpeech *  net)

Speech recognition.

Parameters
netA network instance pointer
Returns
If this function is successful, it returns AILIA_STATUS_SUCCESS , or an error code otherwise.

Get the recognition result with ailiaSpeechGetText API.