ailia_speech  1.3.0.0
Public Member Functions | Static Public Member Functions | Protected Member Functions | List of all members
ailiaSpeech.AiliaSpeechModel Class Reference
Inheritance diagram for ailiaSpeech.AiliaSpeechModel:
Inheritance graph
[legend]
Collaboration diagram for ailiaSpeech.AiliaSpeechModel:
Collaboration graph
[legend]

Public Member Functions

bool IsError ()
 Check is error occured. More...
 
string GetErrorDetail ()
 Get error detail. More...
 
int GetEnvironmentId (bool is_gpu)
 Get the environmen id. More...
 
string GetEnvironmentName ()
 Get the environmen name. More...
 
bool Open (string encoder_path, string decoder_path, int env_id, int memory_mode, int model_type, int task, int flag, string language)
 Create a instance. More...
 
bool OpenVad (string vad_path, int vad_type)
 Open VAD file. More...
 
bool OpenDictionary (string dictionary_path, int dictionary_type)
 Open dictionary file. More...
 
bool OpenPostProcess (string encoder_path, string decoder_path, string source_path, string target_path, string prefix, int type)
 Open PostProcess file. More...
 
virtual void Close ()
 Destroys instance. More...
 
virtual void Dispose ()
 Release resources. More...
 
bool SetPrompt (string prompt)
 Set prompt. More...
 
bool SetConstraint (string constraint, int constraint_type)
 Set constraint. More...
 
bool Transcribe (float[] waveData, uint frequency, uint channels, bool tail)
 Perform speech recognition. More...
 
bool IsProcessing ()
 Check is processing sub thread. More...
 
bool IsTranscribing ()
 Check is running Speech2Text. More...
 
bool IsCompleted ()
 Gets whether all audio processing is complete. More...
 
List< string > GetResults ()
 Get results and clear of Speech2Text. More...
 
string GetIntermediateText ()
 Get the intermediate result of Speech2Text. More...
 
bool ResetTranscribeState ()
 Initialize the Speech2Text state. More...
 

Static Public Member Functions

static int IntermediateCallback (IntPtr handle, IntPtr text)
 

Protected Member Functions

virtual void Dispose (bool disposing)
 

Member Function Documentation

◆ Close()

virtual void ailiaSpeech.AiliaSpeechModel.Close ( )
inlinevirtual

Destroys instance.

Destroys and initializes the instance.

341  {
342  DestroyThread();
343  DestroyInterrupt();
344  if (net != IntPtr.Zero){
345  AiliaSpeech.ailiaSpeechDestroy(net);
346  net = IntPtr.Zero;
347  }
348  }

◆ Dispose() [1/2]

virtual void ailiaSpeech.AiliaSpeechModel.Dispose ( )
inlinevirtual

Release resources.

358  {
359  Dispose(true);
360  }

◆ Dispose() [2/2]

virtual void ailiaSpeech.AiliaSpeechModel.Dispose ( bool  disposing)
inlineprotectedvirtual
363  {
364  if (disposing){
365  // release managed resource
366  }
367  Close(); // release unmanaged resource
368  }

◆ GetEnvironmentId()

int ailiaSpeech.AiliaSpeechModel.GetEnvironmentId ( bool  is_gpu)
inline

Get the environmen id.

Parameters
is_gpuWhether to use GPU
Returns
env_id
98  {
99  int env_id = Ailia.AILIA_ENVIRONMENT_ID_AUTO;
100  if (is_gpu) { // GPU
101  int count = 0;
102  Ailia.ailiaGetEnvironmentCount(ref count);
103  for (int i = 0; i < count; i++){
104  IntPtr env_ptr = IntPtr.Zero;
105  Ailia.ailiaGetEnvironment(ref env_ptr, (uint)i, Ailia.AILIA_ENVIRONMENT_VERSION);
106  Ailia.AILIAEnvironment env = (Ailia.AILIAEnvironment)Marshal.PtrToStructure(env_ptr, typeof(Ailia.AILIAEnvironment));
107 
108  if (env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_MPS || env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_CUDA || env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_VULKAN){
109  env_id = env.id;
110  env_name = Marshal.PtrToStringAnsi(env.name);
111  }
112  }
113  } else {
114  env_name = "cpu";
115  }
116  return env_id;
117  }

◆ GetEnvironmentName()

string ailiaSpeech.AiliaSpeechModel.GetEnvironmentName ( )
inline

Get the environmen name.

Returns
env_name
130  {
131  return env_name;
132  }

◆ GetErrorDetail()

string ailiaSpeech.AiliaSpeechModel.GetErrorDetail ( )
inline

Get error detail.

Returns
The error detail string.
75  {
76  return m_error_detail;
77  }

◆ GetIntermediateText()

string ailiaSpeech.AiliaSpeechModel.GetIntermediateText ( )
inline

Get the intermediate result of Speech2Text.

Returns
Transcribe results.
818  {
819  lock (m_intermediate_lock_async){
820  return m_intermediate_text;
821  }
822  }

◆ GetResults()

List<string> ailiaSpeech.AiliaSpeechModel.GetResults ( )
inline

Get results and clear of Speech2Text.

Returns
Transcribe results.
798  {
799  lock (m_lock_async)
800  {
801  List<string> results = new List<string>(m_results);
802  m_results.Clear();
803  return results;
804  }
805  }

◆ IntermediateCallback()

static int ailiaSpeech.AiliaSpeechModel.IntermediateCallback ( IntPtr  handle,
IntPtr  text 
)
inlinestatic
497  {
498  lock (m_intermediate_lock_async){
499  try{
500  string decoded_text = Marshal.PtrToStringAnsi(text);
501  m_intermediate_text = decoded_text;
502  }catch(Exception e){
503  }
504  }
505  if (Marshal.ReadInt32(handle) != 0){
506  return -1; // 中断
507  }
508  return 0;
509  }

◆ IsCompleted()

bool ailiaSpeech.AiliaSpeechModel.IsCompleted ( )
inline

Gets whether all audio processing is complete.

Returns
If Speech2Text is complete, it returns true , or false otherwise.
780  {
781  lock (m_lock_async)
782  {
783  return m_complete;
784  }
785  }

◆ IsError()

bool ailiaSpeech.AiliaSpeechModel.IsError ( )
inline

Check is error occured.

Returns
If error is occured, it returns true , or false otherwise.
60  {
61  return m_error;
62  }

◆ IsProcessing()

bool ailiaSpeech.AiliaSpeechModel.IsProcessing ( )
inline

Check is processing sub thread.

Returns
If sub thread is processing, it returns true , or false otherwise.
744  {
745  lock (m_lock_async)
746  {
747  return m_processing;
748  }
749  }

◆ IsTranscribing()

bool ailiaSpeech.AiliaSpeechModel.IsTranscribing ( )
inline

Check is running Speech2Text.

Returns
If Speech2Text is running, it returns true , or false otherwise.
762  {
763  lock (m_lock_async)
764  {
765  return m_decoding;
766  }
767  }

◆ Open()

bool ailiaSpeech.AiliaSpeechModel.Open ( string  encoder_path,
string  decoder_path,
int  env_id,
int  memory_mode,
int  model_type,
int  task,
int  flag,
string  language 
)
inline

Create a instance.

Parameters
encoder_pathEncoder onnx file path
decoder_pathDecoder onnx file path
env_idRuntime environment (Ailia.AILIA_ENVIRONMENT_ID_AUTO for automatic selection)
memory_modeMemory mode (Ailia.AILIA_MEMORY_REDUCE_CONSTANT | Ailia.AILIA_MEMORY_REDUCE_CONSTANT_WITH_INPUT_INITIALIZER | Ailia.AILIA_MEMORY_REUSE_INTERSTAGE etc)
model_typeModel type (AiliaSpeech.AILIA_SPEECH_MODEL_TYPE_*)
taskTask (AiliaSpeech.AILIA_SPEECH_TASK_*)
flagOR of flags (AiliaSpeech.AILIA_SPEECH_FLAG_*)
languageLanguage (ja or en or etc. auto is automatic selection)
Returns
If this function is successful, it returns true , or false otherwise.
165  {
166  AiliaLicense.CheckAndDownloadLicense();
167 
168  if (net != null){
169  Close();
170  }
171 
172  AiliaSpeech.AILIASpeechApiCallback callback = AiliaSpeech.GetCallback();
173 
174  int status = AiliaSpeech.ailiaSpeechCreate(ref net, env_id, Ailia.AILIA_MULTITHREAD_AUTO, memory_mode, task, flag, callback, AiliaSpeech.AILIA_SPEECH_API_CALLBACK_VERSION);
175  Check(status, "ailiaSpeechCreate");
176  if (status != 0){
177  return false;
178  }
179 
180  status = AiliaSpeech.ailiaSpeechOpenModelFile(net, encoder_path, decoder_path, model_type);
181  Check(status, "ailiaSpeechOpenModelFile");
182  if (status != 0){
183  return false;
184  }
185 
186  if (language != "auto"){
187  status = AiliaSpeech.ailiaSpeechSetLanguage(net, language);
188  Check(status, "ailiaSpeechSetLanguage");
189  if (status != 0){
190  return false;
191  }
192  }
193 
194  status = AiliaSpeech.ailiaSpeechSetSilentThreshold(net, THRESHOLD_VOLUME, SPEECH_SEC, NO_SPEECH_SEC);
195  Check(status, "ailiaSpeechSetSilentThreshold");
196  if (status != 0){
197  return false;
198  }
199 
200  CreateInterrupt();
201 
202  status = AiliaSpeech.ailiaSpeechSetIntermediateCallback(net, IntermediateCallback, m_interrupt_ptr);
203  Check(status, "ailiaSpeechSetIntermediateCallback");
204  if (status != 0){
205  return false;
206  }
207 
208  CreateThread();
209 
210  m_error = false;
211  m_error_detail = "";
212 
213  if ((flag & AiliaSpeech.AILIA_SPEECH_FLAG_LIVE) != 0){
214  live_mode = true;
215  }else{
216  live_mode = false;
217  }
218 
219  return true;
220  }

◆ OpenDictionary()

bool ailiaSpeech.AiliaSpeechModel.OpenDictionary ( string  dictionary_path,
int  dictionary_type 
)
inline

Open dictionary file.

Parameters
dictionary_pathdictionary file path
dictionary_typedictionary type (AiliaSpeech.AILIA_SPEECH_DICTIONARY_TYPE_*)
Returns
If this function is successful, it returns true , or false otherwise.
269  {
270  if (net == null){
271  return false;
272  }
273  int status = AiliaSpeech.ailiaSpeechOpenDictionaryFile(net, dictionary_path, dictionary_type);
274  Check(status, "ailiaSpeechOpenDictionaryFile");
275  if (status != 0){
276  return false;
277  }
278  return true;
279  }

◆ OpenPostProcess()

bool ailiaSpeech.AiliaSpeechModel.OpenPostProcess ( string  encoder_path,
string  decoder_path,
string  source_path,
string  target_path,
string  prefix,
int  type 
)
inline

Open PostProcess file.

Parameters
encoder_pathThe path name to the onnx file
decoder_pathThe path name to the onnx file
source_pathThe path name to the tokenizer model file
target_pathThe path name to the tokenizer model file
prefixThe prefix of T5 (UTF8), null for FuguMT
post_process_typeAILIA_SPEECH_POST_PROCESS_TYPE_*
Returns
If this function is successful, it returns true , or false otherwise.
304  {
305  if (net == null){
306  return false;
307  }
308  int status;
309  if (prefix == null){
310  status = AiliaSpeech.ailiaSpeechOpenPostProcessFile(net, encoder_path, decoder_path, source_path, target_path, IntPtr.Zero, type);
311  }else{
312  byte[] text = System.Text.Encoding.UTF8.GetBytes(prefix+"\u0000");
313  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
314  IntPtr prefix_ptr = handle.AddrOfPinnedObject();
315  status = AiliaSpeech.ailiaSpeechOpenPostProcessFile(net, encoder_path, decoder_path, source_path, target_path, prefix_ptr, type);
316  handle.Free();
317  }
318  Check(status, "ailiaSpeechOpenPostProcessFile");
319  if (status != 0){
320  return false;
321  }
322  post_process_mode = true;
323  return true;
324  }

◆ OpenVad()

bool ailiaSpeech.AiliaSpeechModel.OpenVad ( string  vad_path,
int  vad_type 
)
inline

Open VAD file.

Parameters
vad_pathVAD onnx file path
vad_typeVAD type (AiliaSpeech.AILIA_SPEECH_VAD_TYPE_*)
Returns
If this function is successful, it returns true , or false otherwise.
237  {
238  if (net == null){
239  return false;
240  }
241  int status = AiliaSpeech.ailiaSpeechOpenVadFile(net, vad_path, vad_type);
242  Check(status, "ailiaSpeechOpenVadFile");
243  if (status != 0){
244  return false;
245  }
246  status = AiliaSpeech.ailiaSpeechSetSilentThreshold(net, THRESHOLD_VAD, SPEECH_SEC, NO_SPEECH_SEC);
247  Check(status, "ailiaSpeechSetSilentThreshold");
248  if (status != 0){
249  return false;
250  }
251  return true;
252  }

◆ ResetTranscribeState()

bool ailiaSpeech.AiliaSpeechModel.ResetTranscribeState ( )
inline

Initialize the Speech2Text state.

Returns
If this function is successful, it returns true , or false otherwise.
835  {
836  int status = AiliaSpeech.ailiaSpeechResetTranscribeState(net);
837  lock (m_lock_async)
838  {
839  m_complete = false;
840  }
841  if (status == 0){
842  return true;
843  }
844  return false;
845  }

◆ SetConstraint()

bool ailiaSpeech.AiliaSpeechModel.SetConstraint ( string  constraint,
int  constraint_type 
)
inline

Set constraint.

Parameters
constraintThe text of constraint (UTF8)
constraint_typeAILIA_SPEECH_CONSTRAINT_*
Returns
If this function is successful, it returns true , or false otherwise.
424  {
425  byte[] text = System.Text.Encoding.UTF8.GetBytes(constraint+"\u0000");
426  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
427  IntPtr input = handle.AddrOfPinnedObject();
428  int status = AiliaSpeech.ailiaSpeechSetConstraint(net, input, constraint_type);
429  handle.Free();
430  if (status != 0){
431  return false;
432  }
433  return true;
434  }

◆ SetPrompt()

bool ailiaSpeech.AiliaSpeechModel.SetPrompt ( string  prompt)
inline

Set prompt.

Parameters
promptThe text of prompt (UTF8)
Returns
If this function is successful, it returns true , or false otherwise.
392  {
393  byte[] text = System.Text.Encoding.UTF8.GetBytes(prompt+"\u0000");
394  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
395  IntPtr input = handle.AddrOfPinnedObject();
396  int status = AiliaSpeech.ailiaSpeechSetPrompt(net, input);
397  handle.Free();
398  if (status != 0){
399  return false;
400  }
401  return true;
402  }

◆ Transcribe()

bool ailiaSpeech.AiliaSpeechModel.Transcribe ( float[]  waveData,
uint  frequency,
uint  channels,
bool  tail 
)
inline

Perform speech recognition.

Parameters
waveQueueInput PCM
frequencyFrequency of PCM
channelsNumber of channels of PCM
tail Is last input
Returns
If this function is successful, it returns true , or false otherwise. @detail Run speech recognition. Non -blocked API. When the execution is completed, the IsTranscribed API returns True. Execution results can be obtained with the GetResults API. The result of execution can be obtained with the GetIntermediateText API.
717  {
718  if (waveData.Length == 0){
719  return false;
720  }
721  lock (m_lock_async)
722  {
723  threadChannels = channels;
724  threadFrequency = frequency;
725  threadWaveQueue.Add(waveData);
726  threadComplete = tail;
727  m_processing = true;
728  m_auto_event.Set();
729  }
730  return true;
731  }

The documentation for this class was generated from the following file:
ailiaSpeech.AiliaSpeechModel.Close
virtual void Close()
Destroys instance.
Definition: AiliaSpeechModel.cs:340
ailiaSpeech.AiliaSpeechModel.IntermediateCallback
static int IntermediateCallback(IntPtr handle, IntPtr text)
Definition: AiliaSpeechModel.cs:497
ailiaSpeech.AiliaSpeechModel.Dispose
virtual void Dispose()
Release resources.
Definition: AiliaSpeechModel.cs:357