参考链接:
- bertviz github
- 使用指南:BERT可视化工具bertviz体验
- 一些bert中存在的模式解析:解构 BERT:可视化注意力的内部运作,最强NLP模型BERT可视化学习
代码(保存成html)
from transformers import AutoTokenizer, AutoModel, utils
from bertviz import model_view, head_view
utils.logging.set_verbosity_error() # Suppress standard warnings
model_name='/downloads/bert-base-chinese'
input_text = item['title']+item['key_word']
tokenizer = AutoTokenizer.from_pretrained('/downloads/bert-base-chinese')
tokenizer.add_special_tokens({ "additional_special_tokens": [ "[unused1]", "[unused2]", "[unused3]"] })
model = AutoModel.from_pretrained(model_name, output_attentions=True) # Configure model to return attention values
inputs = tokenizer.encode(input_text, return_tensors='pt') # Tokenize input text
print('inputs:', inputs)
outputs = model(inputs) # Run model
attention = outputs[-1] # Retrieve attention from model outputs
tokens = tokenizer.convert_ids_to_tokens(inputs[0]) # Convert input ids to token strings
print('tokens:', tokens)
# print('attention:',attention)
# model_view(attention, tokens) # Display model view
html_head_view = head_view(attention, tokens, html_action='return')
with open("views/bert.html", 'w') as file:
file.write(html_head_view.data)