鹅厂Tencent面试题
今天面试被虐。。。好久没写MapRedue 忘记了。额额。。。
import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.net.URI; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.math3.stat.descriptive.SummaryStatistics; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * * * File Name: PageVisitAge_Avg.java * * General Description: Copyright and file header. * * Revision History: * Modification * Author Date(MM/DD/YYYY) JiraID Description of Changes * --------------------- ------------ ---------- ----------------------------- * @author Bill Zhang 2017年9月6日 * */ public class PageVisitAge_Avg extends Configured implements Tool{ static class PageVisitMapper extends Mapper<LongWritable,Text,Text,Text>{ @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException { String line = value.toString(); String url = line.split(",")[1]; context.write(new Text(url), value); } } static class PageVisitReducer extends Reducer<Text, Text, Text, Text>{ private ConcurrentHashMap<String, String> dictTable = new ConcurrentHashMap<String, String>(); @Override protected void setup(Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { URI[] path_arr = context.getCacheFiles(); if (path_arr.length == 0) { throw new FileNotFoundException("Distributed cache file not found."); } URI dict_uri = path_arr[0]; FileSystem fs = FileSystem.get(context.getConfiguration()); FSDataInputStream in = fs.open(new Path(dict_uri)); BufferedReader br = new BufferedReader(new InputStreamReader(in)); String user_dict = null; while ((user_dict = br.readLine()) != null) { String[] records = user_dict.split(","); //加载字典表 dictTable.put(records[0], user_dict); } } @Override protected void reduce(Text arg0, Iterable<Text> arg1, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { SummaryStatistics statistics = new SummaryStatistics (); //计算访问访问page的平均年龄 for(Text value : arg1){ String id = value.toString().split(",")[0]; String userInfo = dictTable.get(id); String age = userInfo.split(",")[2]; statistics.addValue(Double.valueOf(age)); System.out.println(value); } double avg = statistics.getMean(); context.write(arg0, new Text(avg+"")); } } @Override public int run(String[] args) throws Exception { Configuration conf =getConf(); conf.set("mapreduce.input.fileinputformat.input.dir.recursive", "true"); Job job = Job.getInstance(conf,PageVisitAge_Avg.class.getSimpleName()); job.setJarByClass(getClass()); /** * 用户访问的网页历史 * 例如: * 1,http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html * 2,http://docs.oracle.com/javase/8/ * 3,http://docs.oracle.com/javase/8/docs/api/index.html */ FileInputFormat.addInputPath(job,new Path(args[0])); /** * 用户字典 * 例如: * 1,zhangsan,20,male * 2,zhangsi,23,female * 3,susan,30,male * */ job.addCacheFile(new URI(args[1])); FileOutputFormat.setOutputPath(job,new Path(args[2])); job.setMapOutputKeyClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(PageVisitMapper.class); job.setReducerClass(PageVisitReducer.class); job.setNumReduceTasks(10); return job.waitForCompletion(true)?0:1; } /** * * @param args * @throws Exception */ public static void main(String [] args ) throws Exception{ int ec = ToolRunner.run(new Configuration(), new PageVisitAge_Avg(), args); System.exit(ec); } }
相关推荐
Kafka 2020-09-18
Wepe0 2020-10-30
杜倩 2020-10-29
windle 2020-10-29
minerd 2020-10-28
mengzuchao 2020-10-22
Junzizhiai 2020-10-10
bxqybxqy 2020-09-30
风之沙城 2020-09-24
kingszelda 2020-09-22
大唐帝国前营 2020-08-18
yixu0 2020-08-17
TangCuYu 2020-08-15
xiaoboliu00 2020-08-15
songshijiazuaa 2020-08-15
xclxcl 2020-08-03
zmzmmf 2020-08-03
newfarhui 2020-08-03
likesyour 2020-08-01