Source code for OpenAttack.metric.algorithms.jaccard_word
from .base import AttackMetric
from ...tags import *
from ...text_process.tokenizer import Tokenizer
[docs]class JaccardWord(AttackMetric):
NAME = "Jaccard Word Similarity"
[docs] def __init__(self, tokenizer : Tokenizer):
"""
Args:
tokenizer: A tokenizer that will be used in this metric. Must be an instance of :py:class:`.Tokenizer`
"""
self.tokenizer = tokenizer
@property
def TAGS(self):
if hasattr(self.tokenizer, "TAGS"):
return self.tokenizer.TAGS
return set()
[docs] def calc_score(self, sentA : str, sentB : str) -> float:
"""
Args:
sentA: First sentence.
sentB: Second sentence.
Returns:
Jaccard word similarity of two sentences.
"""
tokenA = self.tokenizer.tokenize(sentA, pos_tagging=False)
tokenB = self.tokenizer.tokenize(sentB, pos_tagging=False)
AS=set()
BS=set()
for i in range(len(tokenA)):
AS.add(tokenA[i])
for i in range(len(tokenB)):
BS.add(tokenB[i])
return len(AS&BS)/len(AS|BS)
def after_attack(self, input, adversarial_sample):
if adversarial_sample is not None:
return self.calc_score( input["x"], adversarial_sample )
return None