ailia_tokenizer  1.4.0.0
Public Member Functions | Protected Member Functions | List of all members
ailiaTokenizer.AiliaTokenizerModel Class Reference
Inheritance diagram for ailiaTokenizer.AiliaTokenizerModel:
Inheritance graph
[legend]
Collaboration diagram for ailiaTokenizer.AiliaTokenizerModel:
Collaboration graph
[legend]

Public Member Functions

bool Create (int type, int flag)
 Create a instance. More...
 
bool Open (string model_path=null, string dictionary_path=null, string vocab_path=null, string merge_path=null)
 Open a model. More...
 
virtual void Close ()
 Destroys instance. More...
 
virtual void Dispose ()
 Release resources. More...
 
int[] Encode (string utf8)
 Perform encode. More...
 
int[] EncodeWithSpecialTokens (string utf8)
 Perform encode with special tokens. More...
 
string Decode (int[] tokens)
 Perform decode @pram tokens Input tokens. More...
 
string DecodeWithSpecialTokens (int[] tokens)
 Perform decode with special tokens @pram tokens Input tokens. More...
 
int GetVocabSize ()
 Gets the size of vocab. More...
 
string GetVocab (int token)
 Acquiring vocab. More...
 
bool AddSpecialTokens (string[] tokens)
 Adding SpecialToken. More...
 

Protected Member Functions

virtual void Dispose (bool disposing)
 

Member Function Documentation

◆ AddSpecialTokens()

bool ailiaTokenizer.AiliaTokenizerModel.AddSpecialTokens ( string[]  tokens)
inline

Adding SpecialToken.

Parameters
tokensToken
Returns
If this function is successful, it returns true , or false otherwise.

This is valid only for AILIA_TOKENIZER_TYPE_ROBERTA and AILIA_TOKENIZER_TYPE_ROBERTA.

393  {
394  IntPtr[] utf8Strings = new IntPtr[tokens.Length];
395  for (int i = 0; i < tokens.Length; i++) {
396  byte[] utf8Bytes = System.Text.Encoding.UTF8.GetBytes(tokens[i] + '\0');
397  utf8Strings[i] = Marshal.AllocHGlobal(utf8Bytes.Length);
398  Marshal.Copy(utf8Bytes, 0, utf8Strings[i], utf8Bytes.Length);
399  }
400 
401  IntPtr tokensPtr = Marshal.AllocHGlobal(IntPtr.Size * tokens.Length);
402  Marshal.Copy(utf8Strings, 0, tokensPtr, utf8Strings.Length);
403 
404  int status = AiliaTokenizer.ailiaTokenizerAddSpecialTokens(net, tokensPtr, (uint)tokens.Length);
405 
406  Marshal.FreeHGlobal(tokensPtr);
407  foreach (IntPtr ptr in utf8Strings) {
408  Marshal.FreeHGlobal(ptr);
409  }
410 
411  if (status != 0){
412  if (logging){
413  Debug.Log("ailiaTokenizerAddSpecialTokens failed " + status);
414  }
415  return false;
416  }
417  return true;
418  }

◆ Close()

virtual void ailiaTokenizer.AiliaTokenizerModel.Close ( )
inlinevirtual

Destroys instance.

Destroys and initializes the instance.

134  {
135  if (net != IntPtr.Zero){
136  AiliaTokenizer.ailiaTokenizerDestroy(net);
137  net = IntPtr.Zero;
138  }
139  }

◆ Create()

bool ailiaTokenizer.AiliaTokenizerModel.Create ( int  type,
int  flag 
)
inline

Create a instance.

Parameters
typeType (AiliaTokenizer..AILIA_TOKENIZER_TYPE_*)
flagOR of flags (AiliaTokenizer..AILIA_TOKENIZER_FLAG_*)
Returns
If this function is successful, it returns true , or false otherwise.
37  {
38  if (net != IntPtr.Zero){
39  Close();
40  }
41 
42  int status = AiliaTokenizer.ailiaTokenizerCreate(ref net, type, flag);
43  if (status != 0){
44  if (logging){
45  Debug.Log("ailiaTokenizerCreate failed " + status);
46  }
47  return false;
48  }
49 
50  return true;
51  }

◆ Decode()

string ailiaTokenizer.AiliaTokenizerModel.Decode ( int[]  tokens)
inline

Perform decode @pram tokens Input tokens.

Returns
If this function is successful, it returns string , or empty string otherwise.
303  {
304  return DecodeCore(tokens, false);
305  }

◆ DecodeWithSpecialTokens()

string ailiaTokenizer.AiliaTokenizerModel.DecodeWithSpecialTokens ( int[]  tokens)
inline

Perform decode with special tokens @pram tokens Input tokens.

Returns
If this function is successful, it returns string , or empty string otherwise.
321  {
322  return DecodeCore(tokens, true);
323  }

◆ Dispose() [1/2]

virtual void ailiaTokenizer.AiliaTokenizerModel.Dispose ( )
inlinevirtual

Release resources.

149  {
150  Dispose(true);
151  }

◆ Dispose() [2/2]

virtual void ailiaTokenizer.AiliaTokenizerModel.Dispose ( bool  disposing)
inlineprotectedvirtual
154  {
155  if (disposing){
156  // release managed resource
157  }
158  Close(); // release unmanaged resource
159  }

◆ Encode()

int [] ailiaTokenizer.AiliaTokenizerModel.Encode ( string  utf8)
inline

Perform encode.

Parameters
utf8Input string
Returns
If this function is successful, it returns array of tokens , or empty array otherwise.
267  {
268  return EncodeCore(utf8, false);
269  }

◆ EncodeWithSpecialTokens()

int [] ailiaTokenizer.AiliaTokenizerModel.EncodeWithSpecialTokens ( string  utf8)
inline

Perform encode with special tokens.

Parameters
utf8Input string
Returns
If this function is successful, it returns array of tokens , or empty array otherwise.
285  {
286  return EncodeCore(utf8, true);
287  }

◆ GetVocab()

string ailiaTokenizer.AiliaTokenizerModel.GetVocab ( int  token)
inline

Acquiring vocab.

Parameters
tokenToken
Returns
If this function is successful, it returns string , or null otherwise.
363  {
364  IntPtr ptr = IntPtr.Zero;
365  int status = AiliaTokenizer.ailiaTokenizerGetVocab(net, token, ref ptr);
366  if (status != 0){
367  if (logging){
368  Debug.Log("ailiaTokenizerGetVocab failed " + status);
369  }
370  return null;
371  }
372  return Marshal.PtrToStringAnsi(ptr);
373  }

◆ GetVocabSize()

int ailiaTokenizer.AiliaTokenizerModel.GetVocabSize ( )
inline

Gets the size of vocab.

Returns
If this function is successful, it returns the size of vocab , or -1 otherwise.
337  {
338  uint len = 0;
339  int status = AiliaTokenizer.ailiaTokenizerGetVocabSize(net, ref len);
340  if (status != 0){
341  if (logging){
342  Debug.Log("ailiaTokenizerGetVocabSize failed " + status);
343  }
344  return -1;
345  }
346  return (int)len;
347  }

◆ Open()

bool ailiaTokenizer.AiliaTokenizerModel.Open ( string  model_path = null,
string  dictionary_path = null,
string  vocab_path = null,
string  merge_path = null 
)
inline

Open a model.

Parameters
model_pathPath for model (don't load if null)
dictionary_pathPath for dictionary (don't load if null)
vocab_pathPath for vocab (don't load if null)
merge_pathPath for merge (don't load if null)
Returns
If this function is successful, it returns true , or false otherwise.
72  {
73  if (net == IntPtr.Zero){
74  return false;
75  }
76 
77  int status = 0;
78 
79  if (model_path != null){
80  status = AiliaTokenizer.ailiaTokenizerOpenModelFile(net, model_path);
81  if (status != 0){
82  if (logging){
83  Debug.Log("ailiaTokenizerOpenModelFile failed " + status);
84  }
85  return false;
86  }
87  }
88  if (dictionary_path != null){
89  status = AiliaTokenizer.ailiaTokenizerOpenDictionaryFile(net, dictionary_path);
90  if (status != 0){
91  if (logging){
92  Debug.Log("ailiaTokenizerOpenDictionaryFile failed " + status);
93  }
94  return false;
95  }
96  }
97  if (vocab_path != null){
98  status = AiliaTokenizer.ailiaTokenizerOpenVocabFile(net, vocab_path);
99  if (status != 0){
100  if (logging){
101  Debug.Log("ailiaTokenizerOpenVocabFile failed " + status);
102  }
103  return false;
104  }
105  }
106  if (merge_path != null){
107  status = AiliaTokenizer.ailiaTokenizerOpenMergeFile(net, merge_path);
108  if (status != 0){
109  if (logging){
110  Debug.Log("ailiaTokenizerOpenMergeFile failed " + status);
111  }
112  return false;
113  }
114  }
115 
116  return true;
117  }

The documentation for this class was generated from the following file:
ailiaTokenizer.AiliaTokenizerModel.Dispose
virtual void Dispose()
Release resources.
Definition: AiliaTokenizerModel.cs:148
ailiaTokenizer.AiliaTokenizerModel.Close
virtual void Close()
Destroys instance.
Definition: AiliaTokenizerModel.cs:133