写点什么

医疗知识图谱问答 —— 数据同步

作者:北桥苏
  • 2023-08-02
    广东
  • 本文字数:5391 字

    阅读完需:约 18 分钟

前言

​ 前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。



环境

Anaconda3


Python3.8


Py2neo (新版)


数据来源 (结构)

编码

\1. 引入依赖


import jsonfrom py2neo import Graph, Node
复制代码


\2. 类的初始化 (连接 neo4j)


def __init__(self):    self.data_path = "./data/medical.json"    self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))
复制代码


\3. 读取数据


def read_data(self):    # 疾病    diseases = []    # 症状    symptoms = []    # 科室    departments = []    # 药品    drugs = []    # 食物    foods = []    # 出药厂商    producers = []    # 检查项目    checks = []
# 疾病信息 disease_info = []
# 疾病与症状 rels_symptom = [] # 疾病与并发症 rels_acompany = [] # 疾病与科室 rels_category = [] # 科室与科室 rels_department = [] # 疾病与通用药品 rels_commondrug = [] # 疾病与推荐药品 rels_recommenddrug = [] # 疾病与不可吃 rels_noteat = [] # 疾病与可以吃 rels_doeat = [] # 疾病与推荐吃 rels_recommendeat = [] # 疾病与检查项 rels_check = [] # 厂商与药品 rels_drug_producer = []
for data in open(self.data_path, encoding="utf8", mode="r"): data_json = json.loads(data) disease = data_json['name'] disease_dict = dict() disease_dict['get_prob'] = '' disease_dict['yibao_status'] = '' disease_dict['easy_get'] = '' disease_dict['get_way'] = '' disease_dict['cure_lasttime'] = '' disease_dict['cured_prob'] = '' disease_dict['cost_money'] = '' disease_dict['cure_department'] = [] diseases.append(disease) disease_dict['name'] = disease disease_dict['desc'] = data_json['desc'] disease_dict['prevent'] = data_json['prevent'] disease_dict['cause'] = data_json['cause']
if "get_prob" in data_json: disease_dict['get_prob'] = data_json['get_prob'] if "yibao_status" in data_json: disease_dict['yibao_status'] = data_json['yibao_status'] if "easy_get" in data_json: disease_dict['easy_get'] = data_json['easy_get'] if "get_way" in data_json: disease_dict['get_way'] = data_json['get_way'] if "cure_lasttime" in data_json: disease_dict['cure_lasttime'] = data_json['cure_lasttime'] if "cured_prob" in data_json: disease_dict['cured_prob'] = data_json['cured_prob'] if "cost_money" in data_json: disease_dict['cost_money'] = data_json['cost_money'] disease_info.append(disease_dict)
symptom = data_json['symptom'] for symptom_i in symptom: rels_symptom.append([disease, symptom_i]) symptoms += symptom
# 科室 if "cure_department" in data_json: cure_department = data_json['cure_department'] departments += cure_department if len(cure_department) == 1: rels_category.append([disease, cure_department[0]]) if len(cure_department) == 2: large = cure_department[0] small = cure_department[1] rels_department.append([large, small]) rels_category.append([disease, large]) disease_dict['cure_department'] = cure_department
# 并发症 if 'acompany' in data_json: acompanys = data_json['acompany'] for acompany in data_json['acompany']: rels_acompany.append([disease, acompany]) symptoms += acompanys
if 'common_drug' in data_json: commondrug = data_json['common_drug'] drugs += commondrug for drug_c in commondrug: rels_commondrug.append([disease, drug_c])
recommenddrug = data_json['recommand_drug'] for drug_recom in recommenddrug: rels_recommenddrug.append([disease, drug_recom]) drugs += recommenddrug
if 'not_eat' in data_json: noteat = data_json['not_eat'] for noteat_i in noteat: rels_noteat.append([disease, noteat_i]) foods += noteat
if 'do_eat' in data_json: doeat = data_json['do_eat'] for doeat_i in doeat: rels_doeat.append([disease, doeat_i]) foods += doeat
if 'recommand_eat' in data_json: recommendfood = data_json['recommand_eat'] for food_i in recommendfood: rels_recommendeat.append([disease, food_i]) foods += recommendfood
checkitem = data_json['check'] for check_i in checkitem: check_i.replace("'", "") if check_i != "血清5'-核苷酸酶(5'-NT)": rels_check.append([disease, check_i]) checks += checkitem
# 厂商与药品 druginfo = data_json['drug_detail'] producers += [name.split("(")[0] for name in druginfo] rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo]
return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set( checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \ rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer
复制代码


\4. 创建节点


def create_medical_nodes(self):    print("start create nodes")    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\    rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\    rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \        build_medical_graph.read_data()
# 创建疾病节点 # self.create_node('Diseases', diseases) # 创建症状节点 # self.create_node('Symptoms', symptoms) # 创建科室 # self.create_node('Departments', departments) # 创建药品 # self.create_node('Drugs', drugs) # 创建食品 # self.create_node('Foods', foods) # 创建出药厂商 # self.create_node('Producers', producers) # 创建检查项 # self.create_node('Checks', checks) self.create_disease_node('Diseases', disease_info) return
# 疾病节点单独创建def create_node(self, label, values): count = 0; for val in values: count += 1 print("节点: " + label + ", 名称为: " + val) node = Node(label, name = val) self.neo4j.create(node) return count
def create_disease_node(self, label, values): count = 0 for disease in values: print("节点" + label + ", 名称:" + disease['name']) node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'], get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'], get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'], cost_money=disease['cost_money'],cure_department=disease['cure_department']) self.neo4j.create(node) return count
复制代码


\5. 创建关联边


def create_medical_rels(self):    print("start create rels")    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \        build_medical_graph.read_data()
# 疾病与状态 # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状") # 疾病与并发症 # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症") # 疾病与科室 # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室") # 科室与科室 # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属") # 疾病与通用药品 # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药") # 疾病与推荐药品 # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药") # 疾病与忌口 # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃") # 疾病与可以吃 # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃") # 疾病与推荐吃 # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃") # 疾病与检查项 self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查") # 厂商与药品 # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")

def create_rel(self, start_node, end_node, list, rel_name, rel_attr): count = 0 for item in list: count += 1 s = item[0] e = item[1]
print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e)
query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % ( start_node, end_node, s, e, rel_name, rel_attr ) self.neo4j.run(query)
return count
复制代码


\6. 导出节点数据


# 导出实体的节点分词def export_data(self):    diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \    rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \    rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \        build_medical_graph.read_data()
# 疾病名 # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+") # f_diseases.write("\n".join(list(diseases))) # 症状名 f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+") f_symptoms.write("\n".join(list(symptoms)))
f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+") f_producers.write("\n".join(list(producers)))
f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+") f_departments.write("\n".join(list(departments)))
f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+") f_drugs.write("\n".join(list(drugs)))
f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+") f_foods.write("\n".join(list(foods)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks)))
f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks)))
复制代码




发布于: 刚刚阅读数: 3
用户头像

北桥苏

关注

公众号:ZERO开发 2023-05-08 加入

专注后端实战技术分享,不限于PHP,Python,JavaScript, Java等语言,致力于给猿友们提供有价值,有干货的内容。

评论

发布
暂无评论
医疗知识图谱问答 —— 数据同步_Python_北桥苏_InfoQ写作社区