Go to the documentation of this file.
9 #ifndef INCLUDED_AILIA_TOKENIZER
10 #define INCLUDED_AILIA_TOKENIZER
18 #if defined(_WIN64) || defined(_M_X64) || defined(__amd64__) || defined(__x86_64__) || defined(__APPLE__) || \
19 defined(__ANDROID__) || defined(ANDROID) || defined(__linux__) || defined(NN_NINTENDO_SDK)
22 #define AILIA_API __stdcall
42 #define AILIA_TOKENIZER_TYPE_WHISPER (0)
53 #define AILIA_TOKENIZER_TYPE_CLIP (1)
64 #define AILIA_TOKENIZER_TYPE_XLM_ROBERTA (2)
75 #define AILIA_TOKENIZER_TYPE_MARIAN (3)
90 #define AILIA_TOKENIZER_TYPE_BERT_JAPANESE_WORDPIECE (4)
105 #define AILIA_TOKENIZER_TYPE_BERT_JAPANESE_CHARACTER (5)
122 #define AILIA_TOKENIZER_TYPE_T5 (6)
133 #define AILIA_TOKENIZER_TYPE_ROBERTA (7)
144 #define AILIA_TOKENIZER_TYPE_BERT (8)
155 #define AILIA_TOKENIZER_TYPE_GPT2 (9)
166 #define AILIA_TOKENIZER_TYPE_LLAMA (10)
181 #define AILIA_TOKENIZER_FLAG_NONE (0)
192 #define AILIA_TOKENIZER_FLAG_UTF8_SAFE (1)
198 struct AILIATokenizer;
745 #define ailiaTokenizerOpenModelFile ailiaTokenizerOpenModelFileW
746 #define ailiaTokenizerOpenDictionaryFile ailiaTokenizerOpenDictionaryFileW
747 #define ailiaTokenizerOpenVocabFile ailiaTokenizerOpenVocabFileW
748 #define ailiaTokenizerOpenMergeFile ailiaTokenizerOpenMergeFileW
749 #define ailiaTokenizerOpenAddedTokensFile ailiaTokenizerOpenAddedTokensFileW
750 #define ailiaTokenizerOpenConfigFile ailiaTokenizerOpenConfigFileW
752 #define ailiaTokenizerOpenModelFile ailiaTokenizerOpenModelFileA
753 #define ailiaTokenizerOpenDictionaryFile ailiaTokenizerOpenDictionaryFileA
754 #define ailiaTokenizerOpenVocabFile ailiaTokenizerOpenVocabFileA
755 #define ailiaTokenizerOpenMergeFile ailiaTokenizerOpenMergeFileA
756 #define ailiaTokenizerOpenAddedTokensFile ailiaTokenizerOpenAddedTokensFileA
757 #define ailiaTokenizerOpenTokenizerConfigFile ailiaTokenizerOpenTokenizerConfigFileA
int AILIA_API ailiaTokenizerGetTextLength(struct AILIATokenizer *net, unsigned int *len)
Gets the size of text. (Include null)
int AILIA_API ailiaTokenizerOpenAddedTokensFileA(struct AILIATokenizer *net, const char *path)
Open added tokens file.
int AILIA_API ailiaTokenizerOpenVocabFileA(struct AILIATokenizer *net, const char *path)
Open vocab file.
int AILIA_API ailiaTokenizerUtf8ToUtf32(unsigned int *utf32, unsigned int *processed_byte, const char *utf8, unsigned int utf8_len)
Convert UTF8 character to UTF32 character.
int AILIA_API ailiaTokenizerCreate(struct AILIATokenizer **net, int type, int flags)
Creates a tokenizer instance.
int AILIA_API ailiaTokenizerGetText(struct AILIATokenizer *net, char *text, unsigned int len)
Gets the decoded text.
void AILIA_API ailiaTokenizerDestroy(struct AILIATokenizer *net)
It destroys the tokenizer instance.
int AILIA_API ailiaTokenizerDecode(struct AILIATokenizer *net, const int *tokens, unsigned int token_count)
Perform decode.
int AILIA_API ailiaTokenizerEncode(struct AILIATokenizer *net, const char *utf8)
Perform encode.
int AILIA_API ailiaTokenizerUtf32ToUtf8(char *utf8, unsigned int *processed_byte, unsigned int utf32)
Convert UTF32 character to UTF8 character.
int AILIA_API ailiaTokenizerOpenModelFileW(struct AILIATokenizer *net, const wchar_t *path)
int AILIA_API ailiaTokenizerGetVocabSize(struct AILIATokenizer *net, unsigned int *size)
Gets the size of vocab. (Include null)
int AILIA_API ailiaTokenizerOpenVocabFileW(struct AILIATokenizer *net, const wchar_t *path)
int AILIA_API ailiaTokenizerGetVocab(struct AILIATokenizer *net, int token, const char **vocab)
Perform encode.
int AILIA_API ailiaTokenizerOpenTokenizerConfigFileW(struct AILIATokenizer *net, const wchar_t *path)
int AILIA_API ailiaTokenizerGetTokens(struct AILIATokenizer *net, int *tokens, unsigned int count)
Gets the tokens.
int AILIA_API ailiaTokenizerGetCharStarts(struct AILIATokenizer *net, int *char_starts, unsigned int count)
Gets the Char start positions.
int AILIA_API ailiaTokenizerOpenTokenizerConfigFileA(struct AILIATokenizer *net, const char *path)
Open tokenizer config file.
int AILIA_API ailiaTokenizerOpenDictionaryFileW(struct AILIATokenizer *net, const wchar_t *path)
#define AILIA_API
Definition: ailia_tokenizer.h:22
int AILIA_API ailiaTokenizerGetWordIDs(struct AILIATokenizer *net, int *word_ids, unsigned int count)
Gets the word ID.
int AILIA_API ailiaTokenizerOpenMergeFileA(struct AILIATokenizer *net, const char *path)
Open merges file.
int AILIA_API ailiaTokenizerOpenModelFileA(struct AILIATokenizer *net, const char *path)
Open model file.
int AILIA_API ailiaTokenizerOpenAddedTokensFileW(struct AILIATokenizer *net, const wchar_t *path)
int AILIA_API ailiaTokenizerGetTokenCount(struct AILIATokenizer *net, unsigned int *count)
Gets the number of tokens.
int AILIA_API ailiaTokenizerOpenDictionaryFileA(struct AILIATokenizer *net, const char *path)
Open dictionary file.
int AILIA_API ailiaTokenizerEncodeWithSpecialTokens(struct AILIATokenizer *net, const char *utf8)
Perform encode with special tokens.
int AILIA_API ailiaTokenizerOpenMergeFileW(struct AILIATokenizer *net, const wchar_t *path)
int AILIA_API ailiaTokenizerGetCharEnds(struct AILIATokenizer *net, int *char_ends, unsigned int count)
Gets the Char end positions.
int AILIA_API ailiaTokenizerDecodeWithSpecialTokens(struct AILIATokenizer *net, const int *tokens, unsigned int token_count)
Perform decode with special tokens.
int AILIA_API ailiaTokenizerAddSpecialTokens(struct AILIATokenizer *net, const char **tokens, unsigned int count)
Add SpecialToken.