Source code for OpenAttack.utils.visualizer

import os
import numpy as np

widths = [
    (126,    1), (159,    0), (687,     1), (710,   0), (711,   1), 
    (727,    0), (733,    1), (879,     0), (1154,  1), (1161,  0), 
    (4347,   1), (4447,   2), (7467,    1), (7521,  0), (8369,  1), 
    (8426,   0), (9000,   1), (9002,    2), (11021, 1), (12350, 2), 
    (12351,  1), (12438,  2), (12442,   0), (19893, 2), (19967, 1),
    (55203,  2), (63743,  1), (64106,   2), (65039, 1), (65059, 0),
    (65131,  2), (65279,  1), (65376,   2), (65500, 1), (65510, 2),
    (120831, 1), (262141, 2), (1114109, 1),
]
 
def char_width( o ):
    global widths
    if o == 0xe or o == 0xf:
        return 0
    for num, wid in widths:
        if o <= num:
            return wid
    return 1

def sent_len(s):
    assert isinstance(s, str)
    ret = 0
    for it in s:
        ret += char_width(ord(it))
    return ret

def right_bar_print(info, key_len=20, val_len=10):
    ret = []
    ret.append( " " * (key_len + val_len) )
    for key, val in info.items():
        row = " %s: " % (key[:key_len - 3])
        row += " " * (key_len - sent_len(row))
        if isinstance(val, bool):
            if val:
                row += " yes" + " " * (val_len - 4)
            else:
                row += " no" + " " * (val_len - 3)
        elif isinstance(val, int):
            val_str = " %d" % val
            row += val_str + " " * (val_len - sent_len(val_str))
        elif isinstance(val, float):
            val_str = " %.5g" % val
            if sent_len(val_str) > val_len:
                val_str = (" %.7f" % val)[:val_len]
            row += val_str + " " * (val_len - sent_len(val_str))
        else:
            val_str = (" %s" % val)[:val_len]
            row += val_str + " " * (val_len - sent_len(val_str))
        ret.append(row)
    ret.append( " " * (key_len + val_len) )
    return ret

def word_align(wordA, wordB):
    if sent_len(wordA) < sent_len(wordB):
        wordA += " " * (sent_len(wordB) - sent_len(wordA))
    else:
        wordB += " " * (sent_len(wordA) - sent_len(wordB))
    return wordA, wordB

def levenshtein_visual(a, b):
    la = len(a)
    lb = len(b)
    f = np.zeros((la + 1, lb + 1), dtype=np.uint64)
    for i in range(la + 1):
        for j in range(lb + 1):
            if i == 0:
                f[i][j] = j
            elif j == 0:
                f[i][j] = i
            elif a[i - 1].lower() == b[j - 1].lower():
                f[i][j] = f[i - 1][j - 1]
            else:
                f[i][j] = min(f[i - 1][j - 1], f[i - 1][j], f[i][j - 1]) + 1
    
    p, q = la, lb
    ret = []
    while p > 0 and q > 0:
        if a[p - 1].lower() == b[q - 1].lower():
            ret.append( (a[p - 1], b[q - 1]) )
            p -= 1
            q -= 1
        else:
            if f[p][q] == f[p - 1][q - 1] + 1:
                # modify
                ret.append( word_align(a[p - 1], b[q - 1]) )
                p -= 1
                q -= 1
            elif f[p][q] == f[p - 1][q] + 1:
                # remove
                ret.append( word_align(a[p - 1], "") )
                p -= 1
            else:
                assert f[p][q] == f[p][q - 1] + 1
                ret.append( word_align("", b[q - 1]) )
                q -= 1
    while p > 0:
        ret.append( word_align( a[p - 1], "" ) )
        p -= 1
    while q > 0:
        ret.append( word_align( "", b[q - 1] ) )
        q -= 1
    return ret[::-1]

def left_bar_print(x_orig, y_orig, x_adv, y_adv, max_len, tokenizer):
    ret = []

    assert isinstance(y_orig, int) == isinstance(y_adv, int)
    if isinstance(y_orig, int):
        head_str = "Label: %d --> %d" % (y_orig, y_adv)
    else:
        head_str = "Label: %d (%.2lf%%) --> %d (%.2lf%%)" % (y_orig.argmax(), y_orig.max() * 100, y_adv.argmax(), y_adv.max() * 100)
    ret.append(("\033[32m%s\033[0m" % head_str) + " " * (max_len - sent_len(head_str)))
    ret.append(" " * max_len)
    
    token_orig = tokenizer.tokenize(x_orig, pos_tagging = False)
    token_adv = tokenizer.tokenize(x_adv, pos_tagging = False)
    pairs = levenshtein_visual(token_orig, token_adv)
    
    curr1 = ""
    curr2 = ""
    length = 0
    for tokenA, tokenB in pairs:
        assert sent_len(tokenA) == sent_len(tokenB)
        if length + sent_len(tokenA) + 1 > max_len:
            ret.append(curr1 + " " * (max_len - length))
            ret.append(curr2 + " " * (max_len - length))
            ret.append(" " * max_len)
            length = sent_len(tokenA) + 1
            if tokenA.lower() == tokenB.lower():
                curr1 = tokenA + " "
                curr2 = tokenB + " "
            else:
                curr1 = "\033[1;31m" + tokenA + "\033[0m" + " "
                curr2 = "\033[1;32m" + tokenB + "\033[0m" + " "
        else:
            length += 1 + sent_len(tokenA)
            if tokenA.lower() == tokenB.lower():
                curr1 += tokenA + " "
                curr2 += tokenB + " "
            else:
                curr1 += "\033[1;31m" + tokenA + "\033[0m" + " "
                curr2 += "\033[1;32m" + tokenB + "\033[0m" + " "
    if length > 0:
        ret.append(curr1 + " " * (max_len - length))
        ret.append(curr2 + " " * (max_len - length))
        ret.append(" " * max_len)
    return ret

def left_bar_failed(x_orig, y_orig, max_len, tokenizer):
    ret = []

    if isinstance(y_orig, int):
        head_str = "Label: %d --> Failed!" % y_orig
    else:
        head_str = "Label: %d (%.2lf%%) --> Failed!" % (y_orig.argmax(), y_orig.max() * 100)
    ret.append(("\033[31m%s\033[0m" % head_str) + " " * (max_len - sent_len(head_str)))
    ret.append(" " * max_len)
    tokens = tokenizer.tokenize(x_orig, pos_tagging=False)
    curr = ""
    for tk in tokens:
        if sent_len(curr) + sent_len(tk) + 1 > max_len:
            ret.append(curr + " " * (max_len - sent_len(curr)))
            curr = tk + " "
        else:
            curr += tk + " "
    if sent_len(curr) > 0:
        ret.append(curr + " " * (max_len - sent_len(curr)))
    ret.append(" " * max_len)
    return ret

[docs]def visualizer(idx, x_orig, y_orig, x_adv, y_adv, info, stream_writer, tokenizer, key_len=25, val_len=10): """ Visualization tools used in :py:class:`.DefaultAttackEval`. """ try: cols = os.get_terminal_size().columns except OSError: cols = 80 headline = "Sample: %d " % idx headline = headline + ("=" * (cols - sent_len(headline) - 1)) + "\n" stream_writer(headline) max_len = cols - 1 - key_len - val_len right = right_bar_print(info, key_len=key_len, val_len=val_len) if x_adv is None: # Failed left = left_bar_failed(x_orig, y_orig, max_len, tokenizer) else: left = left_bar_print(x_orig, y_orig, x_adv, y_adv, max_len, tokenizer) if len(left) < len(right): delta = len(right) - len(left) if delta % 2 == 1: left.append(" " * max_len) delta -= 1 while delta > 0: delta -= 2 left.insert(1, " " * max_len) left.append(" " * max_len) elif len(right) < len(left): delta = len(left) - len(right) if delta % 2 == 1: right.append(" " * (key_len + val_len)) delta -= 1 while delta > 0: delta -= 2 right.insert(0, " " * (key_len + val_len)) right.append(" " * (key_len + val_len)) assert len(left) == len(right) for l, r in zip(left, right): stream_writer(l) stream_writer("|") stream_writer(r) stream_writer("\n")
[docs]def result_visualizer(result, stream_writer): """ Visualization tools used in :py:class:`.DefaultAttackEval`. """ try: cols = os.get_terminal_size().columns except OSError: cols = 80 left = [] right = [] for key, val in result.items(): left.append(" " + key + ": ") if isinstance(val, bool): right.append(" " + "yes" if val else "no" ) elif isinstance(val, int): right.append(" %d" % val) elif isinstance(val, float): right.append(" %.5g" % val) else: right.append(" %s" % val) right[-1] += " " max_left = max(list(map(len, left))) max_right = max(list(map(len, right))) if max_left + max_right + 3 > cols: delta = max_left + max_right + 3 - cols if delta % 2 == 1: delta -= 1 max_left -= 1 max_left -= delta // 2 max_right -= delta // 2 total = max_left + max_right + 3 title = "Summary" if total - 2 < len(title): title = title[:total - 2] offtitle = ((total - len(title)) // 2) - 1 stream_writer("+" + ("=" * (total - 2)) + "+\n" ) stream_writer("|" + " " * offtitle + title + " " * (total - 2 - offtitle - len(title)) + "|" + "\n") stream_writer("+" + ("=" * (total - 2)) + "+\n" ) for l, r in zip(left, right): l = l[:max_left] r = r[:max_right] l += " " * (max_left - len(l)) r += " " * (max_right - len(r)) stream_writer("|" + l + "|" + r + "|" + "\n") stream_writer("+" + ("=" * (total - 2)) + "+\n" )