educoder平台HDFS和MapReduce综合实验

=========================第一关=======================第1关：实践题已完成WordCount词频统计importjava.io.IOException;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.util.GenericOptionsParser;publicclassWordCount{publicstaticclassTokenizerMapperextendsMapperObject,Text,Text,IntWritable{privatefinalstaticIntWritableone=newIntWritable(1);privateTextword=newText();publicvoidmap(Objectkey,Textvalue,Contextcontext)throwsIOException,InterruptedException{StringTokenizeritr=newStringTokenizer(value.toString());while(itr.hasMoreTokens()){word.set(itr.nextToken());context.write(word,one);}}}publicstaticclassIntSumReducerextendsReducerText,IntWritable,Text,IntWritable{privateIntWritableresult=newIntWritable();publicvoidreduce(Textkey,IterableIntWritablevalues,Contextcontext)throwsIOException,InterruptedException{intsum=0;for(IntWritableval:values){sum+=val.get();}result.set(sum);context.write(key,result);}}publicstaticvoidmain(String[]args)throwsException{Configurationconf=newConfiguration();String[]otherArgs=newGenericOptionsParser(conf,args).getRemainingArgs();if(otherArgs.length!=2){System.err.println(Usage:wordcountinout);System.exit(2);}Jobjob=newJob(conf,wordcount);job.setJarByClass(WordCount.class);job.setMapperClass(TokenizerMapper.class);job.setCombinerClass(IntSumReducer.class);job.setReducerClass(IntSumReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.addInputPath(job,newPath(otherArgs[0]));FileOutputFormat.setOutputPath(job,newPath(otherArgs[1]));System.exit(job.waitForCompletion(true)?0:1);}}=========================第二关=======================第2关：实践题已完成HDFS文件读写importjava.io.IOException;importjava.sql.Date;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.FSDataInputStream;importorg.apache.hadoop.fs.FSDataOutputStream;importorg.apache.hadoop.fs.FileStatus;importorg.apache.hadoop.fs.FileSystem;importorg.apache.hadoop.fs.Path;publicclasshdfs{publicstaticvoidmain(String[]args)throwsIOException{Configurationconf=newConfiguration();FileSystemfs=FileSystem.get(conf);System.out.println(fs.getUri());Pathfile=newPath(/user/hadoop/myfile);if(fs.exists(file)){System.out.println(Fileexists.);}else{FSDataOutputStreamoutStream=fs.create(file);outStream.writeUTF(chinacstorcstorcstorchina);outStream.close();}FSDataInputStreaminStream=fs.open(file);Stringdata=inStream.readUTF();FileSystemhdfs=file.getFileSystem(conf);FileStatus[]fileStatus=hdfs.listStatus(file);for(FileStatusstatus:fileStatus){System.out.println(FileOwer:+status.getOwner());System.out.println(FileReplication:+status.getReplication());System.out.println(FileModificationTime:+newDate(status.getModificationTime()));System.out.println(FileBlockSize:+status.getBlockSize());}System.out.println(data);System.out.println(Filename:+file.getName());inStream.close();fs.close();}}=========================第三关=======================第3关：实践题已完成倒排索引importjava.io.IOException;importjava.util.HashMap;importjava.util.Hashtable;importjava.util.StringTokenizer;importorg.apache.hadoop.conf.Configuration;importorg.apache.hadoop.fs.Path;importorg.apache.hadoop.io.IntWritable;importorg.apache.hadoop.io.LongWritable;importorg.apache.hadoop.io.Text;importorg.apache.hadoop.mapreduce.Job;importorg.apache.hadoop.mapreduce.Mapper;importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;importorg.apache.hadoop.mapreduce.lib.input.FileSplit;importjava.util.Iterator;importorg.apache.hadoop.mapreduce.Reducer;importorg.apache.hadoop.util.GenericOptionsParser;publicclassInvertedIndex{publicstaticclassInvertedIndexMapperextendsMapperLongWritable,Text,Text,Text{publicvoidmap(LongWritablekey,Textvalue,Contextcontext)throwsIOException,InterruptedException{FileSplitfileSplit=(FileSplit)context.getInputSplit();StringfileName=fileSplit.getPath().getName();Stringword;IntWritablefrequence=newIntWritable();intone=1;HashtableString,Integerhashmap=newHashtable();StringTokenizeritr=newStringTokenizer(value.toString());for(;itr.hasMoreTokens();){word=itr.nextToken();if(hashmap.containsKey(word)){hashmap.put(word,hashmap.get(word)+1);}else{hashmap.put(word,one);}}for(IteratorStringit=hashmap.keySet().iterator();it.hasNext();){word=it.next();frequence=newIntWritable(hashmap.get(word));TextfileName_frequence=newText(fileName+@+frequence.toString());context.write(newText(word),fileName_frequence);}}}publicstaticclassInvertedIndexCombinerextendsReducerText,Text,Text,Text{protectedvoidreduce(Textkey,IterableTextvalues,Contextcontext)throwsIOException,InterruptedException{StringfileNam

educoder平台HDFS和MapReduce综合实验

免费阅读已结束，点击付费阅读剩下 ... 页

阅读已结束，您可以下载文档离线阅读

第三章国际货物买卖合同的标的物

品质管理培训

ISO14000环境管理体系培训手冊(1)

医疗机构临床用血管理办法(XXXX)79211896

第二章物流学概论

中国创业者易犯的35个错误（DOC41页）

档案业务培训班培训班（PPT121页)

【职位描述】系统测试工程师

第12章人力资源的调配与流动

医务人员医德规范

相关文档

相关搜索