Source code for OpenAttack.attack_assist.substitute.word.english_wordnet

from .base import WordSubstitute
from ....tags import TAG_English
from ....data_manager import DataManager
from ....exceptions import WordNotInDictionaryException



def prefilter(token, synonym):  # 预过滤（原词，一个候选词
    if (len(synonym.split()) > 2 or (  # the synonym produced is a phrase
            synonym == token) or (  # the pos of the token synonyms are different
            token == 'be') or (
            token == 'is') or (
            token == 'are') or (
            token == 'am')):  # token is be
        return False
    else:
        return True


[docs]class WordNetSubstitute(WordSubstitute):

    TAGS = { TAG_English }

[docs]    def __init__(self, k = None):
        """
        English word substitute based on wordnet.

        Args:
            k: Top-k results to return. If k is `None`, all results will be returned. Default: 50
        
        :Data Requirements: :py:data:`.TProcess.NLTKWordNet`
        :Language: english
        
        """

        self.wn = DataManager.load("TProcess.NLTKWordNet")
        self.k = k

    def substitute(self, word: str, pos: str):
        if pos == "other":
            raise WordNotInDictionaryException()
        pos_in_wordnet = {
            "adv": "r",
            "adj": "a",
            "verb": "v",
            "noun": "n"
        }[pos]

        wordnet_synonyms = []
        synsets = self.wn.synsets(word, pos=pos_in_wordnet)
        for synset in synsets:
            wordnet_synonyms.extend(synset.lemmas())
        synonyms = []
        for wordnet_synonym in wordnet_synonyms:
            spacy_synonym = wordnet_synonym.name().replace('_', ' ').split()[0]
            synonyms.append(spacy_synonym)  # original word
        token = word.replace('_', ' ').split()[0]  #TODO:bugs

        sss = []
        for synonym in synonyms:
            if prefilter(token, synonym):
                sss.append(synonym)
        synonyms = sss[:]

        synonyms_1 = []
        for synonym in synonyms:
            if synonym.lower() in synonyms_1:
                continue
            synonyms_1.append(synonym.lower())

        ret = []
        for syn in synonyms_1:
            ret.append((syn, 1))
        if self.k is not None:
            ret = ret[:self.k]
        return ret