educoder平台MapReduce基础实战

MapReduce第1关：成绩统计过关代码：importjava.io.IOException;importjava.util.StringTokenizer;importjava.io.IOException;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.*;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.util.GenericOptionsParser;publicclassWordCount{/**********Begin**********/publicstaticclassTokenizerMapperextendsMapperLongWritable,Text,Text,IntWritable{privatefinalstaticIntWritableone=newIntWritable(1);privateTextword=newText();privateintmaxValue=0;publicvoidmap(LongWritablekey,Textvalue,Contextcontext)throwsIOException,InterruptedException{StringTokenizeritr=newStringTokenizer(value.toString(),\n);while(itr.hasMoreTokens()){String[]str=itr.nextToken().split();Stringname=str[0];one.set(Integer.parseInt(str[1]));word.set(name);context.write(word,one);}//context.write(word,one);}}publicstaticclassIntSumReducerextendsReducerText,IntWritable,Text,IntWritable{privateIntWritableresult=newIntWritable();publicvoidreduce(Textkey,IterableIntWritablevalues,Contextcontext)throwsIOException,InterruptedException{intmaxAge=0;intage=0;for(IntWritableintWritable:values){maxAge=Math.max(maxAge,intWritable.get());}result.set(maxAge);context.write(key,result);}}publicstaticvoidmain(String[]args)throwsException{Configurationconf=newConfiguration();Jobjob=newJob(conf,wordcount);job.setJarByClass(WordCount.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);Stringinputfile=/user/test/input;StringoutputFile=/user/test/output/;FileInputFormat.addInputPath(job,newPath(inputfile));FileOutputFormat.setOutputPath(job,newPath(outputFile));job.waitForCompletion(true);/**********End**********/}}命令行touchfile01echoHelloWorldByeWorldcatfile01echoHelloWorldByeWorldfile01catfile01touchfile02echoHelloHadoopGoodbyeHadoopfile02catfile02start-dfs.shhadoopfs-mkdir/usrhadoopfs-mkdir/usr/inputhadoopfs-ls/usr/outputhadoopfs-ls/hadoopfs-ls/usrhadoopfs-putfile01/usr/inputhadoopfs-putfile02/usr/inputhadoopfs-ls/usr/input测评——————————————————————————————————MapReduce第2关：文件内容合并去重代码importjava.io.IOException;importjava.util.*;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.*;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.util.GenericOptionsParser;publicclassMerge{/***@paramargs*对A,B两个文件进行合并，并剔除其中重复的内容，得到一个新的输出文件C*///在这重载map函数，直接将输入中的value复制到输出数据的key上注意在map方法中要抛出异常：throwsIOException,InterruptedException/**********Begin**********/publicstaticclassMapextendsMapperLongWritable,Text,Text,Text{protectedvoidmap(LongWritablekey,Textvalue,MapperLongWritable,Text,Text,Text.Contextcontext)throwsIOException,InterruptedException{Stringstr=value.toString();String[]data=str.split();Textt1=newText(data[0]);Textt2=newText(data[1]);context.write(t1,t2);}}/**********End**********///在这重载reduce函数，直接将输入中的key复制到输出数据的key上注意在reduce方法上要抛出异常：throwsIOException,InterruptedException/**********Begin**********/publicstaticclassReduceextendsReducerText,Text,Text,Text{protectedvoidreduce(Textkey,IterableTextvalues,ReducerText,Text,Text,Text.Contextcontext)throwsIOException,InterruptedException{ListStringlist=newArrayList();for(Texttext:values){Stringstr=text.toString();if(!list.contains(str)){list.add(str);}}Collections.sort(list);for(Stringtext:list){context.write(key,newText(text));}}/**********End**********/}publicstaticvoidmain(String[]args)throwsException{Configurationconf=newConfiguration();Jobjob=newJob(conf,wordcount);job.setJarByClass(Merge.class);job.setMapperClass(Map.class);job.setCombinerClass(Reduce.class);job.setReducerClass(Reduce.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(Text.class);StringinputPath=/user/tmp/input/;//在这里设置输入路径StringoutputPath=/user/tmp/output/;//在这里设置输出路径FileInputFormat.addInputPath(job,newPath(inputPath));FileOutputFormat.setOutputPath(job,newPath(outputPath));System.exit(job.waitForCompletion(true)?0:1);}}测评———————————————————————————————————————MapReduce第3关：信息挖掘-挖掘父子关系代码importjava.io.IOException;importjava.util.*;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoo

educoder平台MapReduce基础实战

免费阅读已结束，点击付费阅读剩下 ... 页

阅读已结束，您可以下载文档离线阅读

业务流程优化报告-Ver2[1].0

030101-7_水泥混凝土垫层检验批质量验收记录表

品质部培训教材QC七大手法[1]tmp

所系结合培养一流创新型人才(2)

531瓦斯治理情况汇报

企业领导力培育与领导团队的建设

客户一体化战略外传

非公企业信息采集表

【社会科学类】新会计公式实用手册

中国对外贸易大纲

相关文档

相关搜索