This is an example of using ailia Tokenizer. By creating an instance and giving a UTF8 string as an argument, a token can be obtained.
#include <stdio.h>
#include <vector>
#include <stdint.h>
#include <stdlib.h>
int main(int argc, char *argv[]){
printf("Tokenizer type %d\n", type);
AILIATokenizer *net;
const char * text = u8"ハードウェア ソフトウェア";
printf("Input Text : %s\n", text);
unsigned int count;
std::vector<int> tokens(count);
printf("Tokens : ");
for (int i = 0; i < count; i++){
printf("%d ", tokens[i]);
}
printf("\n");
unsigned int len;
std::vector<char> out_text(len);
char * p_text = &out_text[0];
printf("Output Text : %s\n", p_text);
return 0;
}
int AILIA_API ailiaTokenizerGetText(struct AILIATokenizer *net, char *text, unsigned int len)
Gets the decoded text.
#define AILIA_TOKENIZER_FLAG_NONE
Default flag.
Definition: ailia_tokenizer.h:181
int AILIA_API ailiaTokenizerEncode(struct AILIATokenizer *net, const char *utf8)
Perform encode.
int AILIA_API ailiaTokenizerDecode(struct AILIATokenizer *net, const int *tokens, unsigned int token_count)
Perform decode.
#define AILIA_TOKENIZER_TYPE_WHISPER
Tokenizer for Whisper.
Definition: ailia_tokenizer.h:42
int AILIA_API ailiaTokenizerGetTokens(struct AILIATokenizer *net, int *tokens, unsigned int count)
Gets the tokens.
int AILIA_API ailiaTokenizerGetTokenCount(struct AILIATokenizer *net, unsigned int *count)
Gets the number of tokens.
int AILIA_API ailiaTokenizerCreate(struct AILIATokenizer **net, int type, int flags)
Creates a tokenizer instance.
int AILIA_API ailiaTokenizerGetTextLength(struct AILIATokenizer *net, unsigned int *len)
Gets the size of text. (Include null)