ailia_tokenizer  1.3.0.0
API Usage

Overview of ailia Tokenizer API

Basic usage

This is an example of using ailia Tokenizer. By creating an instance and giving a UTF8 string as an argument, a token can be obtained.

#include <stdio.h>
#include <vector>
#include <stdint.h>
#include <stdlib.h>
int main(int argc, char *argv[]){
printf("Tokenizer type %d\n", type);
AILIATokenizer *net;
const char * text = u8"ハードウェア ソフトウェア";
printf("Input Text : %s\n", text);
unsigned int count;
std::vector<int> tokens(count);
ailiaTokenizerGetTokens(net, &tokens[0], count);
ailiaTokenizerDecode(net, &tokens[0], count);
printf("Tokens : ");
for (int i = 0; i < count; i++){
printf("%d ", tokens[i]);
}
printf("\n");
unsigned int len;
std::vector<char> out_text(len);
char * p_text = &out_text[0];
ailiaTokenizerGetText(net, p_text, len);
printf("Output Text : %s\n", p_text);
return 0;
}
ailiaTokenizerGetTextLength
int AILIA_API ailiaTokenizerGetTextLength(struct AILIATokenizer *net, unsigned int *len)
Gets the size of text. (Include null)
ailiaTokenizerCreate
int AILIA_API ailiaTokenizerCreate(struct AILIATokenizer **net, int type, int flags)
Creates a tokenizer instance.
ailiaTokenizerGetText
int AILIA_API ailiaTokenizerGetText(struct AILIATokenizer *net, char *text, unsigned int len)
Gets the decoded text.
ailiaTokenizerDecode
int AILIA_API ailiaTokenizerDecode(struct AILIATokenizer *net, const int *tokens, unsigned int token_count)
Perform decode.
ailiaTokenizerEncode
int AILIA_API ailiaTokenizerEncode(struct AILIATokenizer *net, const char *utf8)
Perform encode.
ailiaTokenizerGetTokens
int AILIA_API ailiaTokenizerGetTokens(struct AILIATokenizer *net, int *tokens, unsigned int count)
Gets the tokens.
AILIA_TOKENIZER_FLAG_NONE
#define AILIA_TOKENIZER_FLAG_NONE
Default flag.
Definition: ailia_tokenizer.h:181
ailia_tokenizer.h
ailiaTokenizerGetTokenCount
int AILIA_API ailiaTokenizerGetTokenCount(struct AILIATokenizer *net, unsigned int *count)
Gets the number of tokens.
AILIA_TOKENIZER_TYPE_WHISPER
#define AILIA_TOKENIZER_TYPE_WHISPER
Tokenizer for Whisper.
Definition: ailia_tokenizer.h:42