Source code for OpenAttack.metric.algorithms.levenshtein

from typing import List
from .base import AttackMetric
import torch
from ...text_process.tokenizer import Tokenizer

[docs]class Levenshtein(AttackMetric): NAME = "Levenshtein Edit Distance"
[docs] def __init__(self, tokenizer : Tokenizer) -> None: """ Args: tokenizer: A tokenizer that will be used in this metric. Must be an instance of :py:class:`.Tokenizer` """ self.tokenizer = tokenizer
@property def TAGS(self): if hasattr(self.tokenizer, "TAGS"): return self.tokenizer.TAGS return set()
[docs] def calc_score(self, a : List[str], b : List[str]) -> int: """ Args: a: The first list. b: The second list. Returns: Levenshtein edit distance between two sentences. Both parameters can be str or list, str for char-level edit distance while list for token-level edit distance. """ la = len(a) lb = len(b) f = torch.zeros(la + 1, lb + 1, dtype=torch.long) for i in range(la + 1): for j in range(lb + 1): if i == 0: f[i][j] = j elif j == 0: f[i][j] = i elif a[i - 1] == b[j - 1]: f[i][j] = f[i - 1][j - 1] else: f[i][j] = min(f[i - 1][j - 1], f[i - 1][j], f[i][j - 1]) + 1 return f[la][lb].item()
def after_attack(self, input, adversarial_sample): if adversarial_sample is not None: return self.calc_score( self.tokenizer.tokenize(input["x"], pos_tagging=False), self.tokenizer.tokenize(adversarial_sample, pos_tagging=False) )