您好,欢迎访问三七文档
fromnumpyimport*importoperatorfromosimportlistdirdefclassify0(inX,dataSet,labels,k):dataSetSize=dataSet.shape[0]diffMat=tile(inX,(dataSetSize,1))-dataSetsqDiffMat=diffMat**2sqDistances=sqDiffMat.sum(axis=1)distances=sqDistances**0.5sortedDistIndicies=distances.argsort()classCount={}foriinrange(k):voteIlabel=labels[sortedDistIndicies[i]]classCount[voteIlabel]=classCount.get(voteIlabel,0)+1sortedClassCount=sorted(classCount.iteritems(),key=operator.itemgetter(1),reverse=True)returnsortedClassCount[0][0]defcreateDataSet():group=array([[1.0,1.1],[1.0,1.0],[0,0],[0,0.1]])labels=['A','A','B','B']returngroup,labelsdeffile2matrix(filename):fr=open(filename)numberOfLines=len(fr.readlines())#getthenumberoflinesinthefilereturnMat=zeros((numberOfLines,3))#preparematrixtoreturnclassLabelVector=[]#preparelabelsreturnfr=open(filename)index=0forlineinfr.readlines():line=line.strip()listFromLine=line.split('\t')returnMat[index,:]=listFromLine[0:3]classLabelVector.append(int(listFromLine[-1]))index+=1returnreturnMat,classLabelVectordefautoNorm(dataSet):minVals=dataSet.min(0)maxVals=dataSet.max(0)ranges=maxVals-minValsnormDataSet=zeros(shape(dataSet))m=dataSet.shape[0]normDataSet=dataSet-tile(minVals,(m,1))normDataSet=normDataSet/tile(ranges,(m,1))#elementwisedividereturnnormDataSet,ranges,minValsdefdatingClassTest():hoRatio=0.50#holdout10%datingDataMat,datingLabels=file2matrix('datingTestSet2.txt')#loaddatasetfromfilenormMat,ranges,minVals=autoNorm(datingDataMat)m=normMat.shape[0]numTestVecs=int(m*hoRatio)errorCount=0.0foriinrange(numTestVecs):classifierResult=classify0(normMat[i,:],normMat[numTestVecs:m,:],datingLabels[numTestVecs:m],3)printtheclassifiercamebackwith:%d,therealansweris:%d%(classifierResult,datingLabels[i])if(classifierResult!=datingLabels[i]):errorCount+=1.0printthetotalerrorrateis:%f%(errorCount/float(numTestVecs))printerrorCountdefimg2vector(filename):returnVect=zeros((1,1024))fr=open(filename)foriinrange(32):lineStr=fr.readline()forjinrange(32):returnVect[0,32*i+j]=int(lineStr[j])returnreturnVectdefhandwritingClassTest():hwLabels=[]trainingFileList=listdir('trainingDigits')#loadthetrainingsetm=len(trainingFileList)trainingMat=zeros((m,1024))foriinrange(m):fileNameStr=trainingFileList[i]fileStr=fileNameStr.split('.')[0]#takeoff.txtclassNumStr=int(fileStr.split('_')[0])hwLabels.append(classNumStr)trainingMat[i,:]=img2vector('trainingDigits/%s'%fileNameStr)testFileList=listdir('testDigits')#iteratethroughthetestseterrorCount=0.0mTest=len(testFileList)foriinrange(mTest):fileNameStr=testFileList[i]fileStr=fileNameStr.split('.')[0]#takeoff.txtclassNumStr=int(fileStr.split('_')[0])vectorUnderTest=img2vector('testDigits/%s'%fileNameStr)classifierResult=classify0(vectorUnderTest,trainingMat,hwLabels,3)printtheclassifiercamebackwith:%d,therealansweris:%d%(classifierResult,classNumStr)if(classifierResult!=classNumStr):errorCount+=1.0print\nthetotalnumberoferrorsis:%d%errorCountprint\nthetotalerrorrateis:%f%(errorCount/float(mTest))if__name__=='__main__':datingClassTest()print('helloworld!')
本文标题:kNN算法分析报告
链接地址:https://www.777doc.com/doc-4209550 .html