您好,欢迎访问三七文档
当前位置:首页 > 金融/证券 > 股票报告 > matlab实现的C4.5分类决策树算法
functionD=C4_5(train_features,train_targets,inc_node,region)%ClassifyusingQuinlan'sC4.5algorithm%Inputs:%features-Trainfeatures%targets-Traintargets%inc_node-Percentageofincorrectlyassignedsamplesatanode%region-Decisionregionvector:[-xx-yynumber_of_points]%%Outputs%D-Decisionsufrace%NOTE:Inthisimplementationitisassumedthatafeaturevectorwithfewerthan10uniquevalues(theparameterNu)%isdiscrete,andwillbetreatedassuch.Othervectorswillbetreatedascontinuous[Ni,M]=size(train_features);inc_node=inc_node*M/100;Nu=10;%ForthedecisionregionN=region(5);mx=ones(N,1)*linspace(region(1),region(2),N);my=linspace(region(3),region(4),N)'*ones(1,N);flatxy=[mx(:),my(:)]';%Preprocessing%[f,t,UW,m]=PCA(train_features,train_targets,Ni,region);%train_features=UW*(train_features-m*ones(1,M));;%flatxy=UW*(flatxy-m*ones(1,N^2));;%Findwhichoftheinputfeaturesarediscrete,anddiscretisizethecorresponding%dimensiononthedecisionregiondiscrete_dim=zeros(1,Ni);fori=1:Ni,Nb=length(unique(train_features(i,:)));if(Nb=Nu),%Thisisadiscretefeaturediscrete_dim(i)=Nb;[H,flatxy(i,:)]=high_histogram(flatxy(i,:),Nb);endend%Buildthetreerecursivelydisp('Buildingtree')tree=make_tree(train_features,train_targets,inc_node,discrete_dim,max(discrete_dim),0);%Makethedecisionregionaccordingtothetreedisp('Buildingdecisionsurfaceusingthetree')targets=use_tree(flatxy,1:N^2,tree,discrete_dim,unique(train_targets));D=reshape(targets,N,N);%ENDfunctiontargets=use_tree(features,indices,tree,discrete_dim,Uc)%Classifyrecursivelyusingatreetargets=zeros(1,size(features,2));if(tree.dim==0)%Reachedtheendofthetreetargets(indices)=tree.child;breakend%Thisisnotthelastlevelofthetree,so:%First,findthedimensionwearetoworkondim=tree.dim;dims=1:size(features,1);%Andclassifyaccordingtoitif(discrete_dim(dim)==0),%Continuousfeaturein=indices(find(features(dim,indices)=tree.split_loc));targets=targets+use_tree(features(dims,:),in,tree.child(1),discrete_dim(dims),Uc);in=indices(find(features(dim,indices)tree.split_loc));targets=targets+use_tree(features(dims,:),in,tree.child(2),discrete_dim(dims),Uc);else%DiscretefeatureUf=unique(features(dim,:));fori=1:length(Uf),in=indices(find(features(dim,indices)==Uf(i)));targets=targets+use_tree(features(dims,:),in,tree.child(i),discrete_dim(dims),Uc);endend%ENDuse_treefunctiontree=make_tree(features,targets,inc_node,discrete_dim,maxNbin,base)%Buildatreerecursively[Ni,L]=size(features);Uc=unique(targets);tree.dim=0;%tree.child(1:maxNbin)=zeros(1,maxNbin);tree.split_loc=inf;ifisempty(features),breakend%Whentostop:Ifthedimensionisoneorthenumberofexamplesissmallif((inc_nodeL)|(L==1)|(length(Uc)==1)),H=hist(targets,length(Uc));[m,largest]=max(H);tree.child=Uc(largest);breakend%Computethenode'sIfori=1:length(Uc),Pnode(i)=length(find(targets==Uc(i)))/L;endInode=-sum(Pnode.*log(Pnode)/log(2));%Foreachdimension,computethegainratioimpurity%Thisisdoneseparatelyfordiscreteandcontinuousfeaturesdelta_Ib=zeros(1,Ni);split_loc=ones(1,Ni)*inf;fori=1:Ni,data=features(i,:);Nbins=length(unique(data));if(discrete_dim(i)),%ThisisadiscretefeatureP=zeros(length(Uc),Nbins);forj=1:length(Uc),fork=1:Nbins,indices=find((targets==Uc(j))&(features(i,:)==k));P(j,k)=length(indices);endendPk=sum(P);P=P/L;Pk=Pk/sum(Pk);info=sum(-P.*log(eps+P)/log(2));delta_Ib(i)=(Inode-sum(Pk.*info))/-sum(Pk.*log(eps+Pk)/log(2));else%ThisisacontinuousfeatureP=zeros(length(Uc),2);%Sortthefeatures[sorted_data,indices]=sort(data);sorted_targets=targets(indices);%CalculatetheinformationforeachpossiblesplitI=zeros(1,L-1);forj=1:L-1,fork=1:length(Uc),P(k,1)=length(find(sorted_targets(1:j)==Uc(k)));P(k,2)=length(find(sorted_targets(j+1:end)==Uc(k)));endPs=sum(P)/L;P=P/L;info=sum(-P.*log(eps+P)/log(2));I(j)=Inode-sum(info.*Ps);end[delta_Ib(i),s]=max(I);split_loc(i)=sorted_data(s);endend%Findthedimensionminimizingdelta_Ib[m,dim]=max(delta_Ib);dims=1:Ni;tree.dim=dim;%Splitalongthe'dim'dimensionNf=unique(features(dim,:));Nbins=length(Nf);if(discrete_dim(dim)),%Discretefeaturefori=1:Nbins,indices=find(features(dim,:)==Nf(i));tree.child(i)=make_tree(features(dims,indices),targets(indices),inc_node,discrete_dim(dims),maxNbin,base);endelse%Continuousfeaturetree.split_loc=split_loc(dim);indices1=find(features(dim,:)=split_loc(dim));indices2=find(features(dim,:)split_loc(dim));tree.child(1)=make_tree(features(dims,indices1),targets(indices1),inc_node,discrete_dim(dims),maxNbin);tree.child(2)=make_tree(features(dims,indices2),targets(indices2),inc_node,discrete_dim(dims),maxNbin);end
本文标题:matlab实现的C4.5分类决策树算法
链接地址:https://www.777doc.com/doc-6587135 .html