Source code for OpenAttack.metric.algorithms.gptlm

import math
import transformers
from ...tags import *
from .base import AttackMetric

[docs]class GPT2LM(AttackMetric): NAME = "Fluency (ppl)" TAGS = { TAG_English }
[docs] def __init__(self): """ Language Models are Unsupervised Multitask Learners. `[pdf] <https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf>`__ `[code] <https://github.com/openai/gpt-2>`__ :Language: english """ self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained("gpt2") self.lm = transformers.GPT2LMHeadModel.from_pretrained("gpt2")
def after_attack(self, input, adversarial_sample): if adversarial_sample is not None: ipt = self.tokenizer(adversarial_sample, return_tensors="pt", verbose=False) return math.exp( self.lm(**ipt, labels=ipt.input_ids)[0] ) return None
[docs]class GPT2LMChinese(AttackMetric): NAME = "Fluency (ppl)" TAGS = { TAG_Chinese }
[docs] def __init__(self): """ Language Models are Unsupervised Multitask Learners. `[pdf] <https://d4mucfpksywv.cloudfront.net/better-language-models/language-models.pdf>`__ `[code] <https://github.com/openai/gpt-2>`__ :Package Requirements: * tensorflow>=2 :Language: chinese """ ## TODO train a pytorch chinese gpt-2 model self.tokenizer = transformers.BertTokenizerFast.from_pretrained("mymusise/EasternFantasyNoval") self.lm = transformers.GPT2LMHeadModel.from_pretrained("mymusise/EasternFantasyNoval", from_tf=True)
## FIXME after_attack