Table of Contents

Class TokenizerConfig

Namespace
AiDotNet.Tokenization.HuggingFace
Assembly
AiDotNet.dll

Configuration for HuggingFace tokenizers.

public class TokenizerConfig
Inheritance
TokenizerConfig
Inherited Members

Properties

AdditionalSpecialTokens

Gets or sets additional special tokens.

[JsonProperty("additional_special_tokens")]
public List<string>? AdditionalSpecialTokens { get; set; }

Property Value

List<string>

BosToken

[JsonProperty("bos_token")]
public string? BosToken { get; set; }

Property Value

string

ClsToken

[JsonProperty("cls_token")]
public string? ClsToken { get; set; }

Property Value

string

DoLowerCase

Gets or sets whether to lowercase input.

[JsonProperty("do_lower_case")]
public bool DoLowerCase { get; set; }

Property Value

bool

EosToken

[JsonProperty("eos_token")]
public string? EosToken { get; set; }

Property Value

string

MaskToken

[JsonProperty("mask_token")]
public string? MaskToken { get; set; }

Property Value

string

MergesFile

Gets or sets the merges file (for BPE).

[JsonProperty("merges_file")]
public string? MergesFile { get; set; }

Property Value

string

ModelMaxLength

Gets or sets the model max length.

[JsonProperty("model_max_length")]
public int? ModelMaxLength { get; set; }

Property Value

int?

ModelType

Gets or sets the model type.

[JsonProperty("model_type")]
public string? ModelType { get; set; }

Property Value

string

PadToken

[JsonProperty("pad_token")]
public string? PadToken { get; set; }

Property Value

string

SepToken

[JsonProperty("sep_token")]
public string? SepToken { get; set; }

Property Value

string

TokenizerClass

Gets or sets the tokenizer type.

[JsonProperty("tokenizer_class")]
public string? TokenizerClass { get; set; }

Property Value

string

UnkToken

Gets or sets special tokens.

[JsonProperty("unk_token")]
public string? UnkToken { get; set; }

Property Value

string

VocabFile

Gets or sets the vocabulary file.

[JsonProperty("vocab_file")]
public string? VocabFile { get; set; }

Property Value

string