a b c d 类
p(a)>p(b)>p(c)>p(d) m 属于 a 类
class Bayes: def __init__(self): self.length=-1 self.label=dic() self.vector=dic() def fit (self,dataSet,labels): if(len(dataSet))!=len(labels): raise ValueError ("Wrong imput!") self.length=len(dataSet[0])#测试特征值长度 labelsnum=len(labels)#所有类别数量 norlabels=set(labels)#不重复类别 for item in norlabels: thislabel=item labelcount[thislabel]=labels.count(thislabel)/labelsnum#当前种类占总类别的比例 for vector,labels in zip(dataSet,labels): if (label not in vectorcount): self.vectorcount[label]=[] self.vectorcount[label].append(vector) print ("train finished!") return self def bayes_test(self,testData,labelSet): if (self.length==-1): raise ValueError("you havn't taining yet!") #计算 testdata 各个类别的概率 lbdic=dic() for thislb in labelSet: p=1 alllabel=self.labelcount[thislb] allvector=self.vectorvount[thislb] vnum=len(allvector) allvector = numpy.array(allvector).T for index in range(0,len(testData)): vector=list(allvector[indext]) p*=vector.count(testData[indext])/vnum lbdic[thislb]=p*alllabel thislb=sorted(lbdic,key=lambda x:lbdic[x],reverse=True)[0] return thislb *********************************************************************************************
import numpy as npy class Bayes: def __init__(self): self.length=-1 self.labelcount=dict() self.vectorcount=dict() def fit(self,dataSet:list,labels:list): if(len(dataSet)!=len(labels)): raise ValueError("您输入的测试数组跟类别数组长度不一致") self.length=len(dataSet[0])#测试数据特征值的长度 labelsnum=len(labels)#类别所有的数量 norlabels=set(labels)#不重复类别的数量 for item in norlabels: thislabel=item labelcount[thislabel]=labels.count(thislabel)/labelsnum#求的当前类别占类别总数的比例 for vector,label in zip(dataSet,labels): if(label not in vectorcount): self.vectorcount[label]=[] self.vectorcount[label].append(vector) print("训练结束") return self def btest(self,TestData,labelsSet): if(self.length==-1): raise ValueError("您还没有进行训练,请先训练") #计算testdata分别为各个类别的概率 lbDict=dict() for thislb in labelsSet: p=1 alllabel=self.labelcount[thislb] allvector=self.vectorcount[thislb] vnum=len(allvector) allvector=numpy.array(allvector).T for index in range(0,len(TestData)): vector=list(allvector[index]) p*=vector.count(TestData[index])/vnum lbDict[thislb]=p*alllabel thislabel=sorted(lbDict,key=lambda x:lbDict[x],reverse=True)[0] return thislabel