您好,欢迎访问三七文档
第一部分:基本统计方法注:主要讲述过程:means(描述性统计);freq(算频数表);univariate(检验);anova(方差分析);ttest(检验);glm(广义线性回归);npar1way(非参,wilcox)一:计量资料的统计分析方法1.01均值+频数表+百分位数+正态检验、茎叶图、箱形图、正态概率图dataex2_1;inputx@@;low=2.3;dis=0.3;z=x-mod(x-low,dis);cards;3.964.234.423.595.124.024.323.724.764.164.614.263.774.204.363.074.893.974.283.644.664.044.554.254.633.914.413.525.034.014.304.194.754.144.574.264.563.793.894.214.953.984.293.674.694.124.564.264.664.283.834.205.244.024.333.764.814.173.963.274.614.263.964.233.764.014.293.673.394.124.273.614.984.243.834.203.714.034.344.693.624.184.264.365.284.214.424.363.664.024.314.833.593.973.964.495.114.204.364.543.723.974.284.763.214.044.564.254.924.234.473.605.234.024.324.684.763.694.614.263.894.214.363.425.014.014.293.684.714.134.574.264.035.464.163.644.163.76;/*freq语句,算频数表*/procfreq;tablesz;run;procmeansdata=ex2_1nmeanstdstderrclm;varx;run;dataex2_1;inputxf@@;cards;3.0723.2733.4793.67143.87224.07304.27214.47154.67104.8765.0745.272;run;procmeans;freqf;varx;run;/*把freqf改成weightf就是把f当权重或频数来算,f则在0,1之间*//*计算x的95%的置信区间*/procunivariatedata=ex2_1;varx;outputout=pctpctlpre=ppctlpts=2.597.5;run;procprintdata=pct;run;/*正态检验、茎叶图、箱形图、正态概率图*/procunivariatedata=ex2_1normalplot;varx;run;/*ExtremeObservation显示的值是最小的5个极值和最大的5个极值*/1.02几何均值dataex2_5;inputxf@@;y=log10(x);cards;1042034010801016011320156401412802;procmeansnoprint;/*调用means过程,不显示结果*/vary;freqf;outputout=b/*结果输出到数据集b中*/mean=logmean;/*把数据集b中均数的变量名mean改为logmean*/run;datac;/*新建数据集c*/setb;/*调用数据集b*/g=10**logmean;/*计算变量logmean的反对数,该值就是x的几何均数,将该值赋值给变量g*/procprintdata=c;varg;run;/*这个是计算平通平均数的值*/procmeansdata=ex2_5;varx;freqf;run;1.03已知均值和方差求置信区间-单样本+单样本与总体/*单样本*/dataex3_2;n=10;mean=166.95;std=3.64;t=tinv(0.975,n-1);pts=t*std/sqrt(n);lclm=mean-pts;uclm=mean+pts;procprint;varlclmuclm;run;/*单样本与总体均值*/dataex3_5;n=36;/*样本量*/s_m=130.83;/*样本均值*/std=25.74;/*样本标准差*/p_m=140;/*总体均值*/df=n-1;/*自由度*/t=(s_m-p_m)/(std/sqrt(n));p=(1-probt(abs(t),df))*2;/*根据t值计算p值*/run;procprint;vartp;run;1.06双样本均值相等检验+两组分开+两组一起算+两组样本量不同/*双样本分开算*/dataex3_4;n1=29;n2=32;m1=20.10;m2=16.89;s1=7.02;s2=8.46;ss1=s1**2*(n1-1);ss2=s2**2*(n2-1);sc2=(ss1+ss2)/(n1+n2-2);se=sqrt(sc2*(1/n1+1/n2));t=tinv(0.975,n1+n2-2);lclm=(m1-m2)-t*se;uclm=(m1-m2)+t*se;procprint;vartselclmuclm;run;/*双样本相减后再算*//*用MEANS作配对资料两个样本均数比较的t检验*/dataex3_6;inputx1x2@@;d=x1-x2;cards;0.8400.5800.5910.5090.6740.5000.6320.3160.6870.3370.9780.5170.7500.4540.7300.5121.2000.9970.8700.506;procmeanstprt;vard;run;/*用UNIVARIATE过程作配对资料两样本均数比较的t检验*/procunivariatedata=ex3_6;vard;run;/*双样本两组样本量不同*/dataex3_7;inputx@@;if_n_21thenc=1;/*当观测数小于21时,变量c的值为1,表示试验组*/elsec=2;/*其余变量c的值为2,表示对照组*/cards;-0.70-5.602.002.800.703.504.005.807.10-0.502.50-1.601.703.000.404.504.602.506.00-1.403.706.505.005.200.800.200.603.406.60-1.106.003.802.001.602.002.201.203.101.70-2.00;procttest;/*调用ttest过程*/varx;/*定义分析变量为x*/classc;/*定义分组变量为c*/run;1.08-1.13anova方差分析过程+一维分组+二维分组+三维分组/*只有一组分组因素*/dataex4_2;inputxc@@;cards;3.5312.4222.8630.8944.5913.3622.2831.0644.3414.3222.3931.0842.6612.3422.2831.2743.5912.6822.4831.6343.1312.9522.2831.8943.3012.3623.4831.3144.0412.5622.4232.5143.5312.5222.4131.8843.5612.2722.6631.4143.8512.9823.2933.1944.0713.7222.7031.9241.3712.6522.6630.9443.9312.2223.6832.1142.3312.9022.6532.8142.9811.9822.6631.9844.0012.6322.3231.7443.5512.8622.6132.1642.6412.9323.6433.3742.5612.1722.5832.9743.5012.7223.6531.6943.2511.5623.2131.1942.9613.1122.2332.1744.3011.8122.3232.2843.5211.7722.6831.7243.9312.8023.0432.4744.1913.5722.8131.0242.9612.9723.0232.5244.1614.0221.9732.1042.5912.3121.6833.714;procanova;/*调用anova过程*/classc;/*定义分组变量为c*/modelx=c;/*定义模型,分析g对x的影响*/meansc/dunnett;/*用LSD法对多组均数过行两两比较*/meansc/hovtest;/*作方差齐性检验,默认levene法,p值大于0.05,则认为是g组方差相等*/run;quit;/*有两组分组因素*/dataex4_4;inputxab@@;cards;0.82110.65210.51310.73120.54220.23320.43130.34230.28330.41140.21240.31340.68150.43250.2435;procanova;classab;/*定义分组变量a和b*/modelx=ab;/*定义模型,分析a和b对x影响*/meansa/snk;/*用SNK法对变量a的多组均数进行两两比较*/run;quit;1.15嵌套设计资料的方差分析glm过程一级因素+二组因素/*嵌套设计资料的方差分析*/dataex11_6;inputxab@@;cards;821184119112881285138313652461246225592556266026713767377538783885398939;procglm;/*调用glm过程*/classab;/*定义分组变量为a和b*/modelx=aa(b);/*定义模型,以a为一组因素,b为二级因素*/run;quit;1.17重复测量资料的方差分析dataex12_2;inputt1t2g@@;/*确定变量名称,t1和t2分别为两个时间点的分析变量,g为处理因素变量,b为区组变量*/cards;130114112411011361261128116112210211181001116981138122112610811241061118124213212221341322114962118124212811821181162132122212012421341282;procglm;/*调用glm过程*/classg;/*定义分组变量g*/modelt1t2=g;/*定义模型,分析g对变量t1和t2的影响*/repeatedtime2/*命名重复因子为time,有2个水平*/contrast(1)/*表示以第一时间点为对照点*//summary;/*考察不同时间点与对照时间点比较的结果*/run;quit;dataex12_3;inputt0-t4g@@;cards;120108112120117111810911512612311191121191241181121112119126120112712112713312611211201181311372122121119129133212812912613514221171151111231312118114116123133213111911813512931291281211481323123123120143136312312111614512631251241181421303;procglm;classg;modelt0-t4=g;repeatedtime5/*命名重复因子为time,有2个水平*/contrast(1);run;quit;二:计数资料的统计分析方法2.1四格表资料的卡方检验dataex7_1;inputrcf@@;/*确定变量名称,r为行变量,c为列变量,f为频数变量*/cards;119912521752221;procfreq;/*调用freq过程*/wei
本文标题:SAS各过程笔记+描述性统计+线性回归+logistic回归+生存分析+判别分析+聚类分析+主成分分
链接地址:https://www.777doc.com/doc-3888539 .html