ailia_speech  1.4.0.0
Public Member Functions | Static Public Member Functions | Protected Member Functions | List of all members
ailiaSpeech.AiliaSpeechModel Class Reference
Inheritance diagram for ailiaSpeech.AiliaSpeechModel:
Inheritance graph
[legend]
Collaboration diagram for ailiaSpeech.AiliaSpeechModel:
Collaboration graph
[legend]

Public Member Functions

bool IsError ()
 Check is error occured. More...
 
string GetErrorDetail ()
 Get error detail. More...
 
int GetEnvironmentId (bool is_gpu)
 Get the environmen id. More...
 
string GetEnvironmentName ()
 Get the environmen name. More...
 
bool Open (string encoder_path, string decoder_path, int env_id, int memory_mode, int model_type, int task, int flag, string language)
 Create a instance. More...
 
bool OpenVad (string vad_path, int vad_type)
 Open VAD file. More...
 
bool OpenDictionary (string dictionary_path, int dictionary_type)
 Open dictionary file. More...
 
bool OpenPostProcess (string encoder_path, string decoder_path, string source_path, string target_path, string prefix, int type)
 Open PostProcess file. More...
 
bool OpenDiarization (string segmentation_path, string embedding_path, int type)
 Open AI model for speaker diarization. More...
 
virtual void Close ()
 Destroys instance. More...
 
virtual void Dispose ()
 Release resources. More...
 
bool SetPrompt (string prompt)
 Set prompt. More...
 
bool SetConstraint (string constraint, int constraint_type)
 Set constraint. More...
 
bool Transcribe (float[] waveData, uint frequency, uint channels, bool tail)
 Perform speech recognition. More...
 
bool IsProcessing ()
 Check is processing sub thread. More...
 
bool IsTranscribing ()
 Check is running Speech2Text. More...
 
bool IsCompleted ()
 Gets whether all audio processing is complete. More...
 
List< string > GetResults ()
 Get results and clear of Speech2Text. More...
 
List< AiliaSpeechTextGetStructuredResults ()
 Get structured results and clear of Speech2Text. More...
 
string GetIntermediateText ()
 Get the intermediate result of Speech2Text. More...
 
bool ResetTranscribeState ()
 Initialize the Speech2Text state. More...
 

Static Public Member Functions

static int IntermediateCallback (IntPtr handle, IntPtr text)
 

Protected Member Functions

virtual void Dispose (bool disposing)
 

Member Function Documentation

◆ Close()

virtual void ailiaSpeech.AiliaSpeechModel.Close ( )
inlinevirtual

Destroys instance.

Destroys and initializes the instance.

380  {
381  DestroyThread();
382  DestroyInterrupt();
383  if (net != IntPtr.Zero)
384  {
385  AiliaSpeech.ailiaSpeechDestroy(net);
386  net = IntPtr.Zero;
387  }
388  }

◆ Dispose() [1/2]

virtual void ailiaSpeech.AiliaSpeechModel.Dispose ( )
inlinevirtual

Release resources.

398  {
399  Dispose(true);
400  }
virtual void Dispose()
Release resources.
Definition: AiliaSpeechModel.cs:397

◆ Dispose() [2/2]

virtual void ailiaSpeech.AiliaSpeechModel.Dispose ( bool  disposing)
inlineprotectedvirtual
403  {
404  if (disposing){
405  // release managed resource
406  }
407  Close(); // release unmanaged resource
408  }
virtual void Close()
Destroys instance.
Definition: AiliaSpeechModel.cs:379

◆ GetEnvironmentId()

int ailiaSpeech.AiliaSpeechModel.GetEnvironmentId ( bool  is_gpu)
inline

Get the environmen id.

Parameters
is_gpuWhether to use GPU
Returns
env_id
107  {
108  int env_id = Ailia.AILIA_ENVIRONMENT_ID_AUTO;
109  if (is_gpu) { // GPU
110  int count = 0;
111  Ailia.ailiaGetEnvironmentCount(ref count);
112  for (int i = 0; i < count; i++){
113  IntPtr env_ptr = IntPtr.Zero;
114  Ailia.ailiaGetEnvironment(ref env_ptr, (uint)i, Ailia.AILIA_ENVIRONMENT_VERSION);
115  Ailia.AILIAEnvironment env = (Ailia.AILIAEnvironment)Marshal.PtrToStructure(env_ptr, typeof(Ailia.AILIAEnvironment));
116 
117  if (env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_MPS || env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_CUDA || env.backend == Ailia.AILIA_ENVIRONMENT_BACKEND_VULKAN){
118  env_id = env.id;
119  env_name = Marshal.PtrToStringAnsi(env.name);
120  }
121  }
122  } else {
123  env_name = "cpu";
124  }
125  return env_id;
126  }

◆ GetEnvironmentName()

string ailiaSpeech.AiliaSpeechModel.GetEnvironmentName ( )
inline

Get the environmen name.

Returns
env_name
139  {
140  return env_name;
141  }

◆ GetErrorDetail()

string ailiaSpeech.AiliaSpeechModel.GetErrorDetail ( )
inline

Get error detail.

Returns
The error detail string.
84  {
85  return m_error_detail;
86  }

◆ GetIntermediateText()

string ailiaSpeech.AiliaSpeechModel.GetIntermediateText ( )
inline

Get the intermediate result of Speech2Text.

Returns
Transcribe results.
895  {
896  lock (m_intermediate_lock_async){
897  return m_intermediate_text;
898  }
899  }

◆ GetResults()

List<string> ailiaSpeech.AiliaSpeechModel.GetResults ( )
inline

Get results and clear of Speech2Text.

Returns
Transcribe results.
854  {
855  lock (m_lock_async)
856  {
857  List<string> results = new List<string>(m_results);
858  m_results.Clear();
859  return results;
860  }
861  }

◆ GetStructuredResults()

List<AiliaSpeechText> ailiaSpeech.AiliaSpeechModel.GetStructuredResults ( )
inline

Get structured results and clear of Speech2Text.

Returns
Transcribe results.
875  {
876  lock (m_lock_async)
877  {
878  List<AiliaSpeechText> results_struct = new List<AiliaSpeechText>(m_results_struct);
879  m_results_struct.Clear();
880  return results_struct;
881  }
882  }

◆ IntermediateCallback()

static int ailiaSpeech.AiliaSpeechModel.IntermediateCallback ( IntPtr  handle,
IntPtr  text 
)
inlinestatic
552  {
553  lock (m_intermediate_lock_async){
554  try{
555  string decoded_text = Marshal.PtrToStringAnsi(text);
556  m_intermediate_text = decoded_text;
557  }catch(Exception e){
558  }
559  }
560  if (Marshal.ReadInt32(handle) != 0){
561  return -1; // 中断
562  }
563  return 0;
564  }

◆ IsCompleted()

bool ailiaSpeech.AiliaSpeechModel.IsCompleted ( )
inline

Gets whether all audio processing is complete.

Returns
If Speech2Text is complete, it returns true , or false otherwise.
836  {
837  lock (m_lock_async)
838  {
839  return m_complete;
840  }
841  }

◆ IsError()

bool ailiaSpeech.AiliaSpeechModel.IsError ( )
inline

Check is error occured.

Returns
If error is occured, it returns true , or false otherwise.
69  {
70  return m_error;
71  }

◆ IsProcessing()

bool ailiaSpeech.AiliaSpeechModel.IsProcessing ( )
inline

Check is processing sub thread.

Returns
If sub thread is processing, it returns true , or false otherwise.
800  {
801  lock (m_lock_async)
802  {
803  return m_processing;
804  }
805  }

◆ IsTranscribing()

bool ailiaSpeech.AiliaSpeechModel.IsTranscribing ( )
inline

Check is running Speech2Text.

Returns
If Speech2Text is running, it returns true , or false otherwise.
818  {
819  lock (m_lock_async)
820  {
821  return m_decoding;
822  }
823  }

◆ Open()

bool ailiaSpeech.AiliaSpeechModel.Open ( string  encoder_path,
string  decoder_path,
int  env_id,
int  memory_mode,
int  model_type,
int  task,
int  flag,
string  language 
)
inline

Create a instance.

Parameters
encoder_pathEncoder onnx file path
decoder_pathDecoder onnx file path
env_idRuntime environment (Ailia.AILIA_ENVIRONMENT_ID_AUTO for automatic selection)
memory_modeMemory mode (Ailia.AILIA_MEMORY_REDUCE_CONSTANT | Ailia.AILIA_MEMORY_REDUCE_CONSTANT_WITH_INPUT_INITIALIZER | Ailia.AILIA_MEMORY_REUSE_INTERSTAGE etc)
model_typeModel type (AiliaSpeech.AILIA_SPEECH_MODEL_TYPE_*)
taskTask (AiliaSpeech.AILIA_SPEECH_TASK_*)
flagOR of flags (AiliaSpeech.AILIA_SPEECH_FLAG_*)
languageLanguage (ja or en or etc. auto is automatic selection)
Returns
If this function is successful, it returns true , or false otherwise.
174  {
175  AiliaLicense.CheckAndDownloadLicense();
176 
177  if (net != null){
178  Close();
179  }
180 
181  AiliaSpeech.AILIASpeechApiCallback callback = AiliaSpeech.GetCallback();
182 
183  int status = AiliaSpeech.ailiaSpeechCreate(ref net, env_id, Ailia.AILIA_MULTITHREAD_AUTO, memory_mode, task, flag, callback, AiliaSpeech.AILIA_SPEECH_API_CALLBACK_VERSION);
184  Check(status, "ailiaSpeechCreate");
185  if (status != 0){
186  return false;
187  }
188 
189  status = AiliaSpeech.ailiaSpeechOpenModelFile(net, encoder_path, decoder_path, model_type);
190  Check(status, "ailiaSpeechOpenModelFile");
191  if (status != 0){
192  return false;
193  }
194 
195  if (language != "auto"){
196  status = AiliaSpeech.ailiaSpeechSetLanguage(net, language);
197  Check(status, "ailiaSpeechSetLanguage");
198  if (status != 0){
199  return false;
200  }
201  }
202 
203  status = AiliaSpeech.ailiaSpeechSetSilentThreshold(net, THRESHOLD_VOLUME, SPEECH_SEC, NO_SPEECH_SEC);
204  Check(status, "ailiaSpeechSetSilentThreshold");
205  if (status != 0){
206  return false;
207  }
208 
209  CreateInterrupt();
210 
211  status = AiliaSpeech.ailiaSpeechSetIntermediateCallback(net, IntermediateCallback, m_interrupt_ptr);
212  Check(status, "ailiaSpeechSetIntermediateCallback");
213  if (status != 0){
214  return false;
215  }
216 
217  CreateThread();
218 
219  m_error = false;
220  m_error_detail = "";
221 
222  if ((flag & AiliaSpeech.AILIA_SPEECH_FLAG_LIVE) != 0){
223  live_mode = true;
224  }else{
225  live_mode = false;
226  }
227 
228  return true;
229  }
static int IntermediateCallback(IntPtr handle, IntPtr text)
Definition: AiliaSpeechModel.cs:552

◆ OpenDiarization()

bool ailiaSpeech.AiliaSpeechModel.OpenDiarization ( string  segmentation_path,
string  embedding_path,
int  type 
)
inline

Open AI model for speaker diarization.

Parameters
segmentation_pathThe path name to the onnx file
embedding_pathThe path name to the onnx file
typeAILIA_SPEECH_DIARIZATION_TYPE_PYANNOTE_AUDIO
Returns
If this function is successful, it returns true , or false otherwise.
352  {
353  if (net == null){
354  return false;
355  }
356  int status;
357  status = AiliaSpeech.ailiaSpeechOpenDiarizationFile(net, segmentation_path, embedding_path, type);
358  Check(status, "ailiaSpeechOpenDializationFile");
359  if (status != 0){
360  return false;
361  }
362  return true;
363  }

◆ OpenDictionary()

bool ailiaSpeech.AiliaSpeechModel.OpenDictionary ( string  dictionary_path,
int  dictionary_type 
)
inline

Open dictionary file.

Parameters
dictionary_pathdictionary file path
dictionary_typedictionary type (AiliaSpeech.AILIA_SPEECH_DICTIONARY_TYPE_*)
Returns
If this function is successful, it returns true , or false otherwise.
278  {
279  if (net == null){
280  return false;
281  }
282  int status = AiliaSpeech.ailiaSpeechOpenDictionaryFile(net, dictionary_path, dictionary_type);
283  Check(status, "ailiaSpeechOpenDictionaryFile");
284  if (status != 0){
285  return false;
286  }
287  return true;
288  }

◆ OpenPostProcess()

bool ailiaSpeech.AiliaSpeechModel.OpenPostProcess ( string  encoder_path,
string  decoder_path,
string  source_path,
string  target_path,
string  prefix,
int  type 
)
inline

Open PostProcess file.

Parameters
encoder_pathThe path name to the onnx file
decoder_pathThe path name to the onnx file
source_pathThe path name to the tokenizer model file
target_pathThe path name to the tokenizer model file
prefixThe prefix of T5 (UTF8), null for FuguMT
post_process_typeAILIA_SPEECH_POST_PROCESS_TYPE_*
Returns
If this function is successful, it returns true , or false otherwise.
313  {
314  if (net == null){
315  return false;
316  }
317  int status;
318  if (prefix == null){
319  status = AiliaSpeech.ailiaSpeechOpenPostProcessFile(net, encoder_path, decoder_path, source_path, target_path, IntPtr.Zero, type);
320  }else{
321  byte[] text = System.Text.Encoding.UTF8.GetBytes(prefix+"\u0000");
322  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
323  IntPtr prefix_ptr = handle.AddrOfPinnedObject();
324  status = AiliaSpeech.ailiaSpeechOpenPostProcessFile(net, encoder_path, decoder_path, source_path, target_path, prefix_ptr, type);
325  handle.Free();
326  }
327  Check(status, "ailiaSpeechOpenPostProcessFile");
328  if (status != 0){
329  return false;
330  }
331  post_process_mode = true;
332  return true;
333  }

◆ OpenVad()

bool ailiaSpeech.AiliaSpeechModel.OpenVad ( string  vad_path,
int  vad_type 
)
inline

Open VAD file.

Parameters
vad_pathVAD onnx file path
vad_typeVAD type (AiliaSpeech.AILIA_SPEECH_VAD_TYPE_*)
Returns
If this function is successful, it returns true , or false otherwise.
246  {
247  if (net == null){
248  return false;
249  }
250  int status = AiliaSpeech.ailiaSpeechOpenVadFile(net, vad_path, vad_type);
251  Check(status, "ailiaSpeechOpenVadFile");
252  if (status != 0){
253  return false;
254  }
255  status = AiliaSpeech.ailiaSpeechSetSilentThreshold(net, THRESHOLD_VAD, SPEECH_SEC, NO_SPEECH_SEC);
256  Check(status, "ailiaSpeechSetSilentThreshold");
257  if (status != 0){
258  return false;
259  }
260  return true;
261  }

◆ ResetTranscribeState()

bool ailiaSpeech.AiliaSpeechModel.ResetTranscribeState ( )
inline

Initialize the Speech2Text state.

Returns
If this function is successful, it returns true , or false otherwise.
912  {
913  int status = AiliaSpeech.ailiaSpeechResetTranscribeState(net);
914  lock (m_lock_async)
915  {
916  m_complete = false;
917  }
918  if (status == 0){
919  return true;
920  }
921  return false;
922  }

◆ SetConstraint()

bool ailiaSpeech.AiliaSpeechModel.SetConstraint ( string  constraint,
int  constraint_type 
)
inline

Set constraint.

Parameters
constraintThe text of constraint (UTF8)
constraint_typeAILIA_SPEECH_CONSTRAINT_*
Returns
If this function is successful, it returns true , or false otherwise.
464  {
465  byte[] text = System.Text.Encoding.UTF8.GetBytes(constraint+"\u0000");
466  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
467  IntPtr input = handle.AddrOfPinnedObject();
468  int status = AiliaSpeech.ailiaSpeechSetConstraint(net, input, constraint_type);
469  handle.Free();
470  if (status != 0){
471  return false;
472  }
473  return true;
474  }

◆ SetPrompt()

bool ailiaSpeech.AiliaSpeechModel.SetPrompt ( string  prompt)
inline

Set prompt.

Parameters
promptThe text of prompt (UTF8)
Returns
If this function is successful, it returns true , or false otherwise.
432  {
433  byte[] text = System.Text.Encoding.UTF8.GetBytes(prompt+"\u0000");
434  GCHandle handle = GCHandle.Alloc(text, GCHandleType.Pinned);
435  IntPtr input = handle.AddrOfPinnedObject();
436  int status = AiliaSpeech.ailiaSpeechSetPrompt(net, input);
437  handle.Free();
438  if (status != 0){
439  return false;
440  }
441  return true;
442  }

◆ Transcribe()

bool ailiaSpeech.AiliaSpeechModel.Transcribe ( float[]  waveData,
uint  frequency,
uint  channels,
bool  tail 
)
inline

Perform speech recognition.

Parameters
waveQueueInput PCM
frequencyFrequency of PCM
channelsNumber of channels of PCM
tail Is last input
Returns
If this function is successful, it returns true , or false otherwise. @detail Run speech recognition. Non -blocked API. When the execution is completed, the IsTranscribed API returns True. Execution results can be obtained with the GetResults API. The result of execution can be obtained with the GetIntermediateText API.
773  {
774  if (waveData.Length == 0){
775  return false;
776  }
777  lock (m_lock_async)
778  {
779  threadChannels = channels;
780  threadFrequency = frequency;
781  threadWaveQueue.Add(waveData);
782  threadComplete = tail;
783  m_processing = true;
784  m_auto_event.Set();
785  }
786  return true;
787  }

The documentation for this class was generated from the following file: