MapReduce直接连接MySQL获取数据

longxiang

2013-07-31

MySQL中数据：

MapReduce程序代码，ConnMysql.java:

package com.linuxidc.study;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.net.URI;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Iterator;

import org.apache.Hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class ConnMysql {

private static Configuration conf = new Configuration();

static {
conf.addResource(new Path("F:/linuxidc-hadoop/hdfs-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/mapred-site.xml"));
conf.addResource(new Path("F:/linuxidc-hadoop/core-site.xml"));
conf.set("mapred.job.tracker", "10.133.103.21:50021");
}

public static class TblsRecord implements Writable, DBWritable {
String tbl_name;
String tbl_type;

public TblsRecord() {

}

@Override
public void write(PreparedStatement statement) throws SQLException {
// TODO Auto-generated method stub
statement.setString(1, this.tbl_name);
statement.setString(2, this.tbl_type);
}

@Override
public void readFields(ResultSet resultSet) throws SQLException {
// TODO Auto-generated method stub
this.tbl_name = resultSet.getString(1);
this.tbl_type = resultSet.getString(2);
}

@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
Text.writeString(out, this.tbl_name);
Text.writeString(out, this.tbl_type);
}

@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.tbl_name = Text.readString(in);
this.tbl_type = Text.readString(in);
}

public String toString() {
return new String(this.tbl_name + " " + this.tbl_type);
}

}

public static class ConnMysqlMapper extends Mapper<LongWritable,TblsRecord,Text,Text> {
public void map(LongWritable key,TblsRecord values,Context context)
throws IOException,InterruptedException {
context.write(new Text(values.tbl_name), new Text(values.tbl_type));
}
}

public static class ConnMysqlReducer extends Reducer<Text,Text,Text,Text> {
public void reduce(Text key,Iterable<Text> values,Context context)
throws IOException,InterruptedException {
for(Iterator<Text> itr = values.iterator();itr.hasNext();) {
context.write(key, itr.next());
}
}
}

public static void main(String[] args) throws Exception {
Path output = new Path("/user/linuxidc/output/");

FileSystem fs = FileSystem.get(URI.create(output.toString()), conf);
if (fs.exists(output)) {
fs.delete(output);
}

//mysql的jdbc驱动
DistributedCache.addFileToClassPath(new Path(
"hdfs://hd022-test.nh.sdo.com/user/liuxiaowen/mysql-connector-java-5.1.13-bin.jar"), conf);

DBConfiguration.configureDB(conf, "com.mysql.jdbc.Driver",
"jdbc:mysql://10.133.103.22:3306/hive", "hive", "hive");

Job job = new Job(conf,"test mysql connection");
job.setJarByClass(ConnMysql.class);

job.setMapperClass(ConnMysqlMapper.class);
job.setReducerClass(ConnMysqlReducer.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setInputFormatClass(DBInputFormat.class);
FileOutputFormat.setOutputPath(job, output);

//列名
String[] fields = { "TBL_NAME", "TBL_TYPE" };
//六个参数分别为：
//1.Job;2.Class<? extends DBWritable>
//3.表名;4.where条件
//5.order by语句;6.列名
DBInputFormat.setInput(job, TblsRecord.class,
"linuxidc_tbls", "TBL_NAME like 'linuxidc%'", "TBL_NAME", fields);

System.exit(job.waitForCompletion(true) ? 0 : 1);
}

}

运行结果：

推荐阅读：

mapreduce apache mysql table

安科网

MapReduce直接连接MySQL获取数据

longxiang

longxiang

相关推荐

hadoop框架三大组件hdfs、mapreduce、yarn 内容

JStorm介绍

mapreduce求topN

HBase与MapReduce交互

分布式计算框架——MapReduce

Hadoop之MapReduce学习(三)之ip去重、MaxScore示例、TotalScoreMapper示例

HBase和MapReduce

MapReduce(分布式计算)_01

MapReduce编程模型

bitmap、Trie、数据库索引、倒排索引、外排序、Mapreduce

MapReduce的使用

Hadoop基础---MapReduce对数据进行排序

Hadoop——MapReduce介绍

Hadoop架构及集群

MapReduce编程实战（2）-词频统计结果存入mysql数据库

MapReduce和Hive学习文档链接学习顺序

Hadoop学习之路(7)MapReduce自定义排序

python| MongoDB聚合(count、distinct、group、MapReduce)

Hadoop

hadoop自带性能测试

longxiang