Source code for OpenAttack.attackers.viper

import numpy as np
import random


from ..classification import ClassificationAttacker, Classifier, ClassifierGoal
from ...text_process.tokenizer import Tokenizer, get_default_tokenizer
from ...utils import get_language, check_language, language_by_name
from ...tags import Tag
from ...attack_assist.substitute.char import DCESSubstitute, ECESSubstitute

DEFAULT_CONFIG = {
    "prob": 0.3,
    "topn": 12,
    "generations": 120,
    "eces": True
}


[docs]class VIPERAttacker(ClassificationAttacker): @property def TAGS(self): return { self.__lang_tag, Tag("get_pred", "victim") }
[docs] def __init__(self, prob : float = 0.3, topn : int = 12, generations : int = 120, method: str = "eces", ): """ Text Processing Like Humans Do: Visually Attacking and Shielding NLP Systems. Steffen Eger, Gözde Gül ¸Sahin, Andreas Rücklé, Ji-Ung Lee, Claudia Schulz, Mohsen Mesgar, Krishnkant Swarnkar, Edwin Simpson, Iryna Gurevych. NAACL-HLT 2019. `[pdf] <https://www.aclweb.org/anthology/N19-1165>`__ `[code] <https://github.com/UKPLab/naacl2019-like-humans-visual-attacks>`__ Args: prob: The probability of changing a char in a sentence. **Default:** 0.3 topn: Number of substitutes while using DCES substitute. **Default:** 12 generations: Maximum number of sentences generated per attack. **Default:** 120 method: The method of this attack. Must be one of the following: ``["eces", "dces"]``. **Default:** eces :Classifier Capacity: * get_pred """ self.prob = prob self.topn = topn self.generations = generations self.method = method if method == "dces": self.substitute = DCESSubstitute() elif method == "eces": self.substitute = ECESSubstitute() else: raise ValueError("Unknown method `%s` expect `%s`" % (method, ["dces", "eces"])) self.__lang_tag = get_language([self.substitute]) self.sim_dict = {}
def attack(self, victim: Classifier, sentence : str, goal: ClassifierGoal): for _ in range(self.generations): out = [] for c in sentence: if self.method == "dces": if c not in self.sim_dict: similar_chars, probs = [], [] dces_list = self.substitute(c)[:self.topn] for sc, pr in dces_list: similar_chars.append(sc) probs.append(pr) probs = probs / np.sum(probs) self.sim_dict[c] = (similar_chars, probs) else: similar_chars, probs = self.sim_dict[c] r = random.random() if r < self.prob and len(similar_chars) > 0: s = np.random.choice(similar_chars, 1, replace=True, p=probs)[0] else: s = c out.append(s) else: r = random.random() if r < self.prob: s = self.substitute(c)[0][0] else: s = c out.append(s) ans = "".join(out) pred = victim.get_pred([ans])[0] if goal.check(ans, pred): return ans return None