job on hadoop

//http://distributed-agility.blogspot.com/2010/01/hadoop-0201-example-inverted-line-index.html

//https://portal.futuregrid.org/manual/hadoop-wordcount

importorg.apache.hadoop.conf.Configuration;

importorg.apache.hadoop.conf.Configured;

importorg.apache.hadoop.fs.Path;

importorg.apache.hadoop.io.Text;

importorg.apache.hadoop.mapreduce.Job;

importorg.apache.hadoop.mapreduce.lib.input.FileInputFormat;

importorg.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

importorg.apache.hadoop.util.Tool;

importorg.apache.hadoop.util.ToolRunner;

/**

*LineIndexerCreatesaninvertedindexoverallthewordsinadocumentcorpus,mappingeachobservedwordtoalist

*offilename@offsetlocationswhereitoccurs.

*/

publicclassLineIndexerextendsConfiguredimplementsTool{

//wheretoputthedatainhdfswhenwe'redone

privatestaticfinalStringOUTPUT_PATH="output";

//wheretoreadthedatafrom.

privatestaticfinalStringINPUT_PATH="input";

publicstaticvoidmain(String[]args)throwsException{

intres=ToolRunner.run(newConfiguration(),newLineIndexer(),args);

System.exit(res);

}

publicintrun(String[]args)throwsException{

Configurationconf=getConf();

Jobjob=newJob(conf,"LineIndexer1");

job.setJarByClass(LineIndexer.class);

job.setMapperClass(LineIndexMapper.class);

job.setReducerClass(LineIndexReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

FileInputFormat.addInputPath(job,newPath(INPUT_PATH));

FileOutputFormat.setOutputPath(job,newPath(OUTPUT_PATH));

returnjob.waitForCompletion(true)?0:1;

}

}

Afterupdating,makesuretorungenerateanewjar,removeanythingunderthedirectory"output"(sincetheprogramdoesnotcleanthatup),andexecutethenewversion.

training@training-vm:~/git/exercises/shakespeare$antjar

Buildfile:build.xml

compile:

[javac]Compiling4sourcefilesto/home/training/git/exercises/shakespeare/bin

jar:

[jar]Buildingjar:/home/training/git/exercises/shakespeare/indexer.jar

BUILDSUCCESSFUL

Totaltime:1second

Ihaveadded2ASCIIbooksintheinputdirectory:theworksfromLeonardoDaVinciandthefirstvolumeofthebook"Theoutlineofscience".

training@training-vm:~/git/exercises/shakespeare$hadoopfs-lsinput

Found3items

-rw-r--r--1trainingsupergroup53427612009-12-3011:57/user/training/input/all-shakespeare

-rw-r--r--1trainingsupergroup14277692010-01-0417:42/user/training/input/leornardo-davinci-all.txt

-rw-r--r--1trainingsupergroup6747622010-01-0417:42/user/training/input/the-outline-of-science-vol1.txt

Theexecutionandoutputofrunningthisexampleisshownasfollows.

training@training-vm:~/git/exercises/shakespeare$hadoopjarindexer.jarindex.LineIndexer

10/01/0421:11:55INFOinput.FileInputFormat:Totalinputpathstoprocess:3

10/01/0421:11:56INFOmapred.JobClient:Runningjob:job_200912301017_0017

10/01/0421:11:57INFOmapred.JobClient:map0%reduce0%

相关推荐