Source code for OpenAttack.text_process.tokenizer.transformers_tokenizer

from .base import Tokenizer
import transformers
[docs]class TransformersTokenizer(Tokenizer): """ Pretrained Tokenizer from transformers. Usually returned by :py:class:`.TransformersClassifier` . """ @property def TAGS(self): return { self.__lang_tag } @TAGS.setter def TAGS(self,value): self.__lang_tag = value def __init__(self, tokenizer : transformers.PreTrainedTokenizerBase, lang_tag): self.__tokenizer = tokenizer self.__lang_tag = lang_tag def do_tokenize(self, x, pos_tagging): if pos_tagging: raise ValueError("`%s` does not support pos tagging" % self.__class__.__name__) return self.__tokenizer.tokenize(x) def do_detokenize(self, x): return self.__tokenizer.convert_tokens_to_string(x)