Source code for OpenAttack.attack_assist.substitute.word.english_wordnet

from .base import WordSubstitute
from ....tags import TAG_English
from ....data_manager import DataManager
from ....exceptions import WordNotInDictionaryException



def prefilter(token, synonym):  # 预过滤(原词,一个候选词
    if (len(synonym.split()) > 2 or (  # the synonym produced is a phrase
            synonym == token) or (  # the pos of the token synonyms are different
            token == 'be') or (
            token == 'is') or (
            token == 'are') or (
            token == 'am')):  # token is be
        return False
    else:
        return True


[docs]class WordNetSubstitute(WordSubstitute): TAGS = { TAG_English }
[docs] def __init__(self, k = None): """ English word substitute based on wordnet. Args: k: Top-k results to return. If k is `None`, all results will be returned. Default: 50 :Data Requirements: :py:data:`.TProcess.NLTKWordNet` :Language: english """ self.wn = DataManager.load("TProcess.NLTKWordNet") self.k = k
def substitute(self, word: str, pos: str): if pos == "other": raise WordNotInDictionaryException() pos_in_wordnet = { "adv": "r", "adj": "a", "verb": "v", "noun": "n" }[pos] wordnet_synonyms = [] synsets = self.wn.synsets(word, pos=pos_in_wordnet) for synset in synsets: wordnet_synonyms.extend(synset.lemmas()) synonyms = [] for wordnet_synonym in wordnet_synonyms: spacy_synonym = wordnet_synonym.name().replace('_', ' ').split()[0] synonyms.append(spacy_synonym) # original word token = word.replace('_', ' ').split()[0] #TODO:bugs sss = [] for synonym in synonyms: if prefilter(token, synonym): sss.append(synonym) synonyms = sss[:] synonyms_1 = [] for synonym in synonyms: if synonym.lower() in synonyms_1: continue synonyms_1.append(synonym.lower()) ret = [] for syn in synonyms_1: ret.append((syn, 1)) if self.k is not None: ret = ret[:self.k] return ret