import math
import random
cla_all_num = 0
cla_num = {}
cla_tag_num = {}
landa = 0.6# 拉普拉斯修正值
def train(taglist, cla): # 训练,每次插入一条数据
# 插入分类
global cla_all_num
cla_all_num += 1
if cla in cla_num: # 是否已存在该分类
cla_num[cla] += 1
else:
cla_num[cla] = 1
if cla not in cla_tag_num:
cla_tag_num[cla] = {} # 创建每个分类的标签字典
# 插入标签
tmp_tags = cla_tag_num[cla] # 浅拷贝,用作别名
for tag in taglist:
if tag in tmp_tags:
tmp_tags[tag] += 1
else:
tmp_tags[tag] = 1
def P_C(cla): # 计算分类 cla 的先验概率
return cla_num[cla] / cla_all_num
def P_W_C( tag, cla): # 计算分类 cla 中标签 tag 的后验概率
tmp_tags = cla_tag_num[cla] # 浅拷贝,用作别名
if tag not in cla_tag_num[cla]:
return landa / (cla_num[cla] + len(tmp_tags) * landa) # 拉普拉斯修正
return (tmp_tags[tag] + landa) / (cla_num[cla] + len(tmp_tags) * landa)
def test( test_tags): # 测试
res = '' #结果
res_P = None
for cla in cla_num.keys():
log_P_W_C = 0
for tag in test_tags:
log_P_W_C += math.log(P_W_C(tag, cla),2)
tmp_P = log_P_W_C + math.log(P_C(cla),2) # P(w|Ci) * P(Ci)
if res_P is None:
res = cla
res_P = tmp_P
if tmp_P > res_P:
res = cla
res_P = tmp_P
return res,res_P
def create_MarriageData():
p0=['青绿','乌黑','浅白']
p1=['蜷缩','稍蜷','硬挺']
p2=['浊响','沉闷','清脆']
p3=['清晰','稍糊','模糊']
p4=['凹陷','稍凹','平坦']
p5=['硬滑','软粘']
dataset = []#创建样本
dataset.append(random.choice(p0))#每个样本随机选择长相
dataset.append(random.choice(p1))#同理,随机选择性格
dataset.append(random.choice(p2))#同理
dataset.append(random.choice(p3))#同理
dataset.append(random.choice(p4))#同理
dataset.append(random.choice(p5))#同理
print("随机产生西瓜为:",dataset)
return dataset
def beyesi():
# 训练模型
data=[ ['青绿','蜷缩','浊响','清晰','凹陷','硬滑','是'],
['乌黑','蜷缩','沉闷','清晰','凹陷','硬滑','是'],
['乌黑','蜷缩','浊响','清晰','凹陷','硬滑','是'],
['青绿','蜷缩','沉闷','清晰','凹陷','硬滑','是'],
['浅白','蜷缩','浊响','清晰','凹陷','硬滑','是'],
['青绿','稍蜷','浊响','清晰','稍凹','软粘','是'],
['乌黑','稍蜷','浊响','稍糊','稍凹','软粘','是'],
['乌黑','稍蜷','浊响','清晰','稍凹','硬滑','是'],
['乌黑','稍蜷','沉闷','稍糊','稍凹','硬滑','否'],
['青绿','硬挺','清脆','清晰','平坦','软粘','否'],
['浅白','硬挺','清脆','模糊','平坦','硬滑','否'],
['浅白','蜷缩','浊响','模糊','平坦','软粘','否'],
['浅白','稍蜷','浊响','稍糊','凹陷','硬滑','否'],
['浅白','稍蜷','沉闷','稍糊','凹陷','硬滑','否'],
['乌黑','稍蜷','沉闷','稍糊','稍凹','软粘','否'],
['浅白','蜷缩','浊响','模糊','平坦','硬滑','否'],
['青绿','蜷缩','沉闷','稍糊','稍凹','硬滑','否']]
for x in data:
train(x[0:6],x[-1])
# 测试模型
#for x in data:
# print('测试结果:', test(x[0:6]))
if __name__ == '__main__':
beyesi() #创建朴素贝叶斯分类
#单例测试模型
testcs=['青绿','蜷缩','浊响','清晰','凹陷','硬滑']
print("单例测试为:",testcs)
print('测试结果:', test(testcs))
#随机测试模型
for i in range(1,20):
print('测试结果:', test(create_MarriageData()))
评论