将文字内容转换成pyg格式的代码
'''
python 字符串转图算法
王金龙 写于2023/9/4
'''
import spacy
from spacy import displacy
from transformers import BertTokenizer, TFBertModel
import os
os.environ["http_proxy"] = "http://127.0.0.1:7890"
os.environ["https_proxy"] = "http://127.0.0.1:7890"
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
text ="Beautiful girl, I love you."
nlp = spacy.load('en_core_web_md')
doc = nlp(text)
bert_tocker = []
index_list = []
edge = [[],[]]
jsq = 1
for char in doc:
index_list.append(jsq)
bmh = tokenizer.encode(char.text,add_special_tokens=False)#用bert给文字编码
jsq = jsq + len(bmh)#加上编码后的长度
bert_tocker =bert_tocker +bmh#编码后合并
for char in doc:
for eidx in char.subtree:
if eidx == char:
continue
else:
edge[0].append(index_list[char.i])
edge[1].append(index_list[eidx.i])
print(edge)
print(bert_tocker)
displacy.serve(doc, style="dep")请安装 spacy库使用,并且下载相关源码
安装
1. spacy 安装:
conda install spacy
2. en库安装:
python -m spacy download en
3. en_core_web_md库安装:
python -m spacy download en_core_web_md
运行效果
[[2, 5, 5, 5, 5, 5, 5], [1, 1, 2, 3, 4, 6, 7]]
对应的图

暂无评论
发表评论