医疗知识图谱问答 —— 数据同步
作者:北桥苏
- 2023-08-02 广东
本文字数:5391 字
阅读完需:约 18 分钟
前言
前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
环境
Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)
编码
\1. 引入依赖
import json
from py2neo import Graph, Node
复制代码
\2. 类的初始化 (连接 neo4j)
def __init__(self):
self.data_path = "./data/medical.json"
self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))
复制代码
\3. 读取数据
def read_data(self):
# 疾病
diseases = []
# 症状
symptoms = []
# 科室
departments = []
# 药品
drugs = []
# 食物
foods = []
# 出药厂商
producers = []
# 检查项目
checks = []
# 疾病信息
disease_info = []
# 疾病与症状
rels_symptom = []
# 疾病与并发症
rels_acompany = []
# 疾病与科室
rels_category = []
# 科室与科室
rels_department = []
# 疾病与通用药品
rels_commondrug = []
# 疾病与推荐药品
rels_recommenddrug = []
# 疾病与不可吃
rels_noteat = []
# 疾病与可以吃
rels_doeat = []
# 疾病与推荐吃
rels_recommendeat = []
# 疾病与检查项
rels_check = []
# 厂商与药品
rels_drug_producer = []
for data in open(self.data_path, encoding="utf8", mode="r"):
data_json = json.loads(data)
disease = data_json['name']
disease_dict = dict()
disease_dict['get_prob'] = ''
disease_dict['yibao_status'] = ''
disease_dict['easy_get'] = ''
disease_dict['get_way'] = ''
disease_dict['cure_lasttime'] = ''
disease_dict['cured_prob'] = ''
disease_dict['cost_money'] = ''
disease_dict['cure_department'] = []
diseases.append(disease)
disease_dict['name'] = disease
disease_dict['desc'] = data_json['desc']
disease_dict['prevent'] = data_json['prevent']
disease_dict['cause'] = data_json['cause']
if "get_prob" in data_json:
disease_dict['get_prob'] = data_json['get_prob']
if "yibao_status" in data_json:
disease_dict['yibao_status'] = data_json['yibao_status']
if "easy_get" in data_json:
disease_dict['easy_get'] = data_json['easy_get']
if "get_way" in data_json:
disease_dict['get_way'] = data_json['get_way']
if "cure_lasttime" in data_json:
disease_dict['cure_lasttime'] = data_json['cure_lasttime']
if "cured_prob" in data_json:
disease_dict['cured_prob'] = data_json['cured_prob']
if "cost_money" in data_json:
disease_dict['cost_money'] = data_json['cost_money']
disease_info.append(disease_dict)
symptom = data_json['symptom']
for symptom_i in symptom:
rels_symptom.append([disease, symptom_i])
symptoms += symptom
# 科室
if "cure_department" in data_json:
cure_department = data_json['cure_department']
departments += cure_department
if len(cure_department) == 1:
rels_category.append([disease, cure_department[0]])
if len(cure_department) == 2:
large = cure_department[0]
small = cure_department[1]
rels_department.append([large, small])
rels_category.append([disease, large])
disease_dict['cure_department'] = cure_department
# 并发症
if 'acompany' in data_json:
acompanys = data_json['acompany']
for acompany in data_json['acompany']:
rels_acompany.append([disease, acompany])
symptoms += acompanys
if 'common_drug' in data_json:
commondrug = data_json['common_drug']
drugs += commondrug
for drug_c in commondrug:
rels_commondrug.append([disease, drug_c])
recommenddrug = data_json['recommand_drug']
for drug_recom in recommenddrug:
rels_recommenddrug.append([disease, drug_recom])
drugs += recommenddrug
if 'not_eat' in data_json:
noteat = data_json['not_eat']
for noteat_i in noteat:
rels_noteat.append([disease, noteat_i])
foods += noteat
if 'do_eat' in data_json:
doeat = data_json['do_eat']
for doeat_i in doeat:
rels_doeat.append([disease, doeat_i])
foods += doeat
if 'recommand_eat' in data_json:
recommendfood = data_json['recommand_eat']
for food_i in recommendfood:
rels_recommendeat.append([disease, food_i])
foods += recommendfood
checkitem = data_json['check']
for check_i in checkitem:
check_i.replace("'", "")
if check_i != "血清5'-核苷酸酶(5'-NT)":
rels_check.append([disease, check_i])
checks += checkitem
# 厂商与药品
druginfo = data_json['drug_detail']
producers += [name.split("(")[0] for name in druginfo]
rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]
return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set(
checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \
rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer
复制代码
\4. 创建节点
def create_medical_nodes(self):
print("start create nodes")
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\
rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\
rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data()
# 创建疾病节点
# self.create_node('Diseases', diseases)
# 创建症状节点
# self.create_node('Symptoms', symptoms)
# 创建科室
# self.create_node('Departments', departments)
# 创建药品
# self.create_node('Drugs', drugs)
# 创建食品
# self.create_node('Foods', foods)
# 创建出药厂商
# self.create_node('Producers', producers)
# 创建检查项
# self.create_node('Checks', checks)
self.create_disease_node('Diseases', disease_info)
return
# 疾病节点单独创建
def create_node(self, label, values):
count = 0;
for val in values:
count += 1
print("节点: " + label + ", 名称为: " + val)
node = Node(label, name = val)
self.neo4j.create(node)
return count
def create_disease_node(self, label, values):
count = 0
for disease in values:
print("节点" + label + ", 名称:" + disease['name'])
node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'],
get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'],
get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'],
cost_money=disease['cost_money'],cure_department=disease['cure_department'])
self.neo4j.create(node)
return count
复制代码
\5. 创建关联边
def create_medical_rels(self):
print("start create rels")
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data()
# 疾病与状态
# self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状")
# 疾病与并发症
# self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症")
# 疾病与科室
# self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室")
# 科室与科室
# self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属")
# 疾病与通用药品
# self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药")
# 疾病与推荐药品
# self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药")
# 疾病与忌口
# self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃")
# 疾病与可以吃
# self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃")
# 疾病与推荐吃
# self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃")
# 疾病与检查项
self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查")
# 厂商与药品
# self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")
def create_rel(self, start_node, end_node, list, rel_name, rel_attr):
count = 0
for item in list:
count += 1
s = item[0]
e = item[1]
print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)
query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % (
start_node, end_node, s, e, rel_name, rel_attr
)
self.neo4j.run(query)
return count
复制代码
\6. 导出节点数据
# 导出实体的节点分词
def export_data(self):
diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \
rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \
rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \
build_medical_graph.read_data()
# 疾病名
# f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+")
# f_diseases.write("\n".join(list(diseases)))
# 症状名
f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+")
f_symptoms.write("\n".join(list(symptoms)))
f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+")
f_producers.write("\n".join(list(producers)))
f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+")
f_departments.write("\n".join(list(departments)))
f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+")
f_drugs.write("\n".join(list(drugs)))
f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+")
f_foods.write("\n".join(list(foods)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
f_checks.write("\n".join(list(checks)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+")
f_checks.write("\n".join(list(checks)))
复制代码
划线
评论
复制
发布于: 刚刚阅读数: 3
版权声明: 本文为 InfoQ 作者【北桥苏】的原创文章。
原文链接:【http://xie.infoq.cn/article/a421887cc0937d0f9c4de20fd】。文章转载请联系作者。
北桥苏
关注
公众号:ZERO开发 2023-05-08 加入
专注后端实战技术分享,不限于PHP,Python,JavaScript, Java等语言,致力于给猿友们提供有价值,有干货的内容。
评论