MapReduce单词统计
WordcountMapper类
package com.sky.mr.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.junit.Test; import java.io.IOException; public class WordcountMapper extends Mapper<LongWritable,Text, Text, IntWritable> { //由于每读一行文本数据,就要调用一次map方法,为了避免多次创建对象,浪费内存资源,将Text,IntWritable对象创建在 //map方法之外 Text k = new Text(); IntWritable v = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { //获取每一行的文本内容 String line = value.toString(); //按空格分割 String[] words = line.split(" "); //转换数据格式,输出 for ( String word: words) { k.set(word); context.write(k, v); } } }
WordcountReducer类
package com.sky.mr.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordcountReducer extends Reducer<Text, IntWritable, Text,IntWritable> { IntWritable v = new IntWritable(); @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { //求每组相同key的总个数 int sum = 0; for ( IntWritable count:values) { sum += count.get(); } //输出 v.set(sum); context.write(key, v); } }
WordcountDriver类
package com.sky.mr.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class WordcountDriver { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { //1、获取配置信息以及job对象 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //2、设置jar包路径 job.setJarByClass(WordcountDriver.class); //3、关联自定义mapper和reducer类 job.setMapperClass(WordcountMapper.class); job.setReducerClass(WordcountReducer.class); //4、设置Map输出key和value类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); //5、设置最终结果key,value类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //6、设置文件输入输出路径 FileInputFormat.setInputPaths(job,new Path(args[0])); FileOutputFormat.setOutputPath(job,new Path(args[1])); //7、将封装了MapReduce程序运行参数的job对象,提交到Yarn集群 boolean result = job.waitForCompletion(true); System.exit(result?0:1); } }
输入文件
import org apache hadoop io
import org apache hadoop io
import org apache hadoop
import java io IOException
输出文件
IOException 1
apache 3
hadoop 3
import 4
io 3
java 1
org 3
相关推荐
tomli 2020-07-26
ErixHao 2020-05-16
zhangll00 2020-05-07
香帅 2020-04-15
strongyoung 2020-04-11
sujins 2020-03-06
lixiaotao 2020-03-05
tomli 2020-03-05
WeiHHH 2020-02-23
zzjmay 2020-02-23
通过实现MapReduce计算结果保存到MySql数据库过程,掌握多种方式保存计算结果的技术,加深了对MapReduce的理解;创建maven项目,项目名称hdfs,这里不再说明。红色部分为增加内容:
大白配小猪 2020-02-09
IT智囊 2020-01-01
yogoma 2020-01-10
sujins 2020-01-09