您好,欢迎访问三七文档
当前位置:首页 > 商业/管理/HR > 经营企划 > C45算法建立决策树JAVA练习
【决策树】—C4.5算法建立决策树JAVA练习以下程序是我练习写的,不一定正确也没做存储优化。有问题请留言交流。转载请挂连接。当前的属性为:ageincomestudentcredit_rating当前的数据集为(最后一列是TARGET_VALUE):---------------------------------youthhighnofairnoyouthhighnoexcellentnomiddle_agedhighnofairyesseniorlowyesfairyesseniorlowyesexcellentnomiddle_agedlowyesexcellentyesyouthmediumnofairnoyouthlowyesfairyesseniormediumyesfairyesyouthmediumyesexcellentyesmiddle_agedhighyesfairyesseniormediumnoexcellentno---------------------------------C4.5建立树类packageC45Test;importjava.util.ArrayList;importjava.util.List;importjava.util.Map;publicclassDecisionTree{publicTreeNodecreateDT(ListArrayListStringdata,ListStringattributeList){System.out.println(当前的DATA为);for(inti=0;idata.size();i++){ArrayListStringtemp=data.get(i);for(intj=0;jtemp.size();j++){System.out.print(temp.get(j)+);}System.out.println();}System.out.println(---------------------------------);System.out.println(当前的ATTR为);for(inti=0;iattributeList.size();i++){System.out.print(attributeList.get(i)+);}System.out.println();System.out.println(---------------------------------);TreeNodenode=newTreeNode();Stringresult=InfoGain.IsPure(InfoGain.getTarget(data));if(result!=null){node.setNodeName(leafNode);node.setTargetFunValue(result);returnnode;}if(attributeList.size()==0){node.setTargetFunValue(result);returnnode;}else{InfoGaingain=newInfoGain(data,attributeList);doublemaxGain=0.0;intattrIndex=-1;for(inti=0;iattributeList.size();i++){doubletempGain=gain.getGainRatio(i);if(maxGaintempGain){maxGain=tempGain;attrIndex=i;}}System.out.println(选择出的最大增益率属性为:+attributeList.get(attrIndex));node.setAttributeValue(attributeList.get(attrIndex));ListArrayListStringresultData=null;MapString,LongattrvalueMap=gain.getAttributeValue(attrIndex);for(Map.EntryString,Longentry:attrvalueMap.entrySet()){resultData=gain.getData4Value(entry.getKey(),attrIndex);TreeNodeleafNode=null;System.out.println(当前为+attributeList.get(attrIndex)+的+entry.getKey()+分支。);if(resultData.size()==0){leafNode=newTreeNode();leafNode.setNodeName(attributeList.get(attrIndex));leafNode.setTargetFunValue(result);leafNode.setAttributeValue(entry.getKey());}else{for(intj=0;jresultData.size();j++){resultData.get(j).remove(attrIndex);}ArrayListStringresultAttr=newArrayListString(attributeList);resultAttr.remove(attrIndex);leafNode=createDT(resultData,resultAttr);}node.getChildTreeNode().add(leafNode);node.getPathName().add(entry.getKey());}}returnnode;}classTreeNode{privateStringattributeValue;privateListTreeNodechildTreeNode;privateListStringpathName;privateStringtargetFunValue;privateStringnodeName;publicTreeNode(StringnodeName){this.nodeName=nodeName;this.childTreeNode=newArrayListTreeNode();this.pathName=newArrayListString();}publicTreeNode(){this.childTreeNode=newArrayListTreeNode();this.pathName=newArrayListString();}publicStringgetAttributeValue(){returnattributeValue;}publicvoidsetAttributeValue(StringattributeValue){this.attributeValue=attributeValue;}publicListTreeNodegetChildTreeNode(){returnchildTreeNode;}publicvoidsetChildTreeNode(ListTreeNodechildTreeNode){this.childTreeNode=childTreeNode;}publicStringgetTargetFunValue(){returntargetFunValue;}publicvoidsetTargetFunValue(StringtargetFunValue){this.targetFunValue=targetFunValue;}publicStringgetNodeName(){returnnodeName;}publicvoidsetNodeName(StringnodeName){this.nodeName=nodeName;}publicListStringgetPathName(){returnpathName;}publicvoidsetPathName(ListStringpathName){this.pathName=pathName;}}}增益率计算类(取log的时候底用的是e,没用2)packageC45Test;importjava.util.ArrayList;importjava.util.HashMap;importjava.util.HashSet;importjava.util.Iterator;importjava.util.List;importjava.util.Map;importjava.util.Set;//C4.5实现publicclassInfoGain{privateListArrayListStringdata;privateListStringattribute;publicInfoGain(ListArrayListStringdata,ListStringattribute){this.data=newArrayListArrayListString();for(inti=0;idata.size();i++){ListStringtemp=data.get(i);ArrayListStringt=newArrayListString();for(intj=0;jtemp.size();j++){t.add(temp.get(j));}this.data.add(t);}this.attribute=newArrayListString();for(intk=0;kattribute.size();k++){this.attribute.add(attribute.get(k));}/*this.data=data;this.attribute=attribute;*/}//获得熵publicdoublegetEntropy(){MapString,LongtargetValueMap=getTargetValue();SetStringtargetkey=targetValueMap.keySet();doubleentropy=0.0;for(Stringkey:targetkey){doublep=MathUtils.div((double)targetValueMap.get(key),(double)data.size());entropy+=(-1)*p*Math.log(p);}returnentropy;}//获得InfoApublicdoublegetInfoAttribute(intattributeIndex){MapString,LongattributeValueMap=getAttributeValue(attributeIndex);doubleinfoA=0.0;for(Map.EntryString,Longentry:attributeValueMap.entrySet()){intsize=data.size();doubleattributeP=MathUtils.div((double)entry.getValue(),(double)size);MapString,LongtargetValueMap=getAttributeValueTargetValue(entry.getKey(),attributeIndex);longtotalCount=0L;for(Map.EntryString,LongentryValue:targetValueMap.entrySet()){totalCount+=entryValue.getValue();}doublevalueSum=0.0
本文标题:C45算法建立决策树JAVA练习
链接地址:https://www.777doc.com/doc-2902462 .html