您好,欢迎访问三七文档
当前位置:首页 > 商业/管理/HR > 经营企划 > 决策树算法 java源码
importjava.util.HashMap;importjava.util.HashSet;importjava.util.LinkedHashSet;importjava.util.Iterator;//1、selectAtrribute中的一个数组下标出错2、两个字符串相等的判断//3、输入的数据有一个错误4、selectAtrribute中最后一个循环忘记了i++//决策树的树结点类classTreeNode{Stringelement;//该值为数据的属性名称Stringvalue;//上一个分裂属性在此结点的值LinkedHashSetTreeNodechilds;//结点的子结点,以有顺序的链式哈希集存储publicTreeNode(){this.element=null;this.value=null;this.childs=null;}publicTreeNode(Stringvalue){this.element=null;this.value=value;this.childs=null;}publicStringgetElement(){returnthis.element;}publicvoidsetElement(Stringe){this.element=e;}publicStringgetValue(){returnthis.value;}publicvoidsetValue(Stringv){this.value=v;}publicLinkedHashSetTreeNodegetChilds(){returnthis.childs;}publicvoidsetChilds(LinkedHashSetTreeNodechilds){this.childs=childs;}}//决策树类classDecisionTree{TreeNoderoot;//决策树的树根结点publicDecisionTree(){root=newTreeNode();}publicDecisionTree(TreeNoderoot){this.root=root;}publicTreeNodegetRoot(){returnroot;}publicvoidsetRoot(TreeNoderoot){this.root=root;}publicStringselectAtrribute(TreeNodenode,String[][]deData,booleanflags[],LinkedHashSetStringatrributes,HashMapString,IntegerattrIndexMap){//Gain数组存放当前结点未分类属性的Gain值doubleGain[]=newdouble[atrributes.size()];//每条数据中归类的下标,为每条数据的最后一个值intclass_index=deData[0].length-1;//属性名,该结点在该属性上进行分类Stringreturn_atrribute=null;//计算每个未分类属性的Gain值intcount=0;//计算到第几个属性for(Stringatrribute:atrributes){//该属性有多少个值,该属性有多少个分类intvalues_count,class_count;//属性值对应的下标intindex=attrIndexMap.get(atrribute);//存放属性的各个值和分类值LinkedHashSetStringvalues=newLinkedHashSetString();LinkedHashSetStringclasses=newLinkedHashSetString();for(inti=0;ideData.length;i++){if(flags[i]==true){values.add(deData[i][index]);classes.add(deData[i][class_index]);}}values_count=values.size();class_count=classes.size();intvalues_vector[]=newint[values_count*class_count];intclass_vector[]=newint[class_count];for(inti=0;ideData.length;i++){if(flags[i]==true){intj=0;for(Stringv:values){if(deData[i][index].equals(v)){break;}else{j++;}}intk=0;for(Stringc:classes){if(deData[i][class_index].equals(c)){break;}else{k++;}}values_vector[j*class_count+k]++;class_vector[k]++;}}/*//输出各项统计值for(inti=0;ivalues_count*class_count;i++){System.out.print(values_vector[i]+);}System.out.println();for(inti=0;iclass_count;i++){System.out.print(class_vector[i]+);}System.out.println();*///计算InforDdoubleInfoD=0.0;doubleclass_total=0.0;for(inti=0;iclass_vector.length;i++){class_total+=class_vector[i];}for(inti=0;iclass_vector.length;i++){if(class_vector[i]==0){continue;}else{doubled=Math.log(class_vector[i]/class_total)/Math.log(2.0)*class_vector[i]/class_total;InfoD=InfoD-d;}}//计算InfoAdoubleInfoA=0.0;for(inti=0;ivalues_count;i++){doublemiddle=0.0;doubleattr_count=0.0;for(intj=0;jclass_count;j++){attr_count+=values_vector[i*class_count+j];}for(intj=0;jclass_count;j++){if(values_vector[i*class_count+j]!=0){doublek=values_vector[i*class_count+j];middle=middle-Math.log(k/attr_count)/Math.log(2.0)*k/attr_count;}}InfoA+=middle*attr_count/class_total;}Gain[count]=InfoD-InfoA;count++;}doublemax=0.0;inti=0;for(Stringatrribute:atrributes){if(Gain[i]max){max=Gain[i];return_atrribute=atrribute;}i++;}returnreturn_atrribute;}//node:在当前结点构造决策树//deData:数据集//flags:指示在当前结点构造决策树时哪些数据是需要的//attributes:未分类的属性集//attrIndexMap:属性与对应数据下标publicvoidbuildDecisionTree(TreeNodenode,String[][]deData,booleanflags[],LinkedHashSetStringattributes,HashMapString,IntegerattrIndexMap){//如果待分类属性已空if(attributes.isEmpty()==true){//从数据集中选择多数类,遍历符合条件的所有数据HashMapString,IntegerclassMap=newHashMapString,Integer();intclassIndex=deData[0].length-1;for(inti=0;ideData.length;i++){if(flags[i]==true){if(classMap.containsKey(deData[i][classIndex])){intcount=classMap.get(deData[i][classIndex]);classMap.put(deData[i][classIndex],count+1);}else{classMap.put(deData[i][classIndex],1);}}}//选择多数类StringmostClass=null;intmostCount=0;IteratorStringit=classMap.keySet().iterator();while(it.hasNext()){StringstrClass=(String)it.next();if(classMap.get(strClass)mostCount){mostClass=strClass;mostCount=classMap.get(strClass);}}//对结点进行赋值,该结点为叶结点node.setElement(mostClass);node.setChilds(null);System.out.println(yezhi:+node.getElement()+:+node.getValue());return;}//如果待分类数据全都属于一个类intclass_index=deData[0].length-1;Stringclass_name=null;HashSetStringclassSet=newHashSetString();for(inti=0;ideData.length;i++){if(flags[i]==true){class_name=deData[i][class_index];classSet.add(class_name);}}//则该结点为叶结点,设置有关值,然后返回if(classSet.size()==1){node.setElement(class_name);node.setChilds(null);System.out.println(leaf:+node.getElement()+:+node.getValue());return;}//给定的分枝没有元组,是不是有这种情况?//选择一个分类属性Stringattribute=selectAtrribute(node,deData,flags,attributes,attrIndexMap);//设置分裂结点的值node.setElement(attribute);//System.out.println(attribute);if(node==root){System.out.println(root:+node.getElement()+:+node.getValue());}else{System.out.println(branch:+node.getElement()+:+node.getValue());}//生成和设置各个子结点intattrIndex=attrIndexMap.get(attribute);LinkedHashSetStringattrValues=newLinkedHashSetStrin
本文标题:决策树算法 java源码
链接地址:https://www.777doc.com/doc-4328283 .html