抽取ORACLE表数据到HIVE

软件设计

2017-09-14

1 #!/bin/bash
 2 ##-----------------------------------------------------------------------------------------
 3 ##--程序名称：抽取ORACLE表数据到HIVE
 4 ##--功能描述: 每日全量抽取。
 5 ##--参    数：etl_date         业务日期
 6 ##--编 写 人：
 7 ##--编写日期：2017年6月15日
 8 ##-----------------------------------------------------------------------------------------
 9 
10 #参数个数不正确处理
11 if [ $# -lt 1 ]
12   then
13     echo "本shell需要至少1个参数";
14     echo "1: 日期 YYYY-MM-DD";
15     exit 1;
16 fi;
17 
18 #业务日期
19 etl_date=$1;
20 
21 #${hadoop_script_path} #脚本路径
22 config_file=${hadoop_script_path}/etc/hdp_db_info.cfg;
23 
24 #通用配置文件检查
25 if [ ! -r $config_file ]
26   then
27     echo "hdp_db_info.cfg配置文件不存在或不可读,程序异常退出。";
28     exit 1;
29 fi;
30 
31 #源系统数据库
32 src_db="core";
33 
34 #配置信息检查
35 config_info=`grep ${src_db} "${config_file}"`;
36 
37 if [ $? == 1 ]
38   then
39     echo "hdp_db_info.cfg配置文件中未找到${src_db}配置信息，程序异常退出。";
40     exit 1;
41 fi;
42 #ORACLE连接信息
43 db_user=`awk -F ',' '{if ($1 == "'"${src_db}"'") print $2}' $config_file`;    #用户名
44 db_passwd=`awk -F ',' '{if ($1 == "'"${src_db}"'") print $3}' $config_file`;  #密码
45 db_conn=`awk -F ',' '{if ($1 == "'"${src_db}"'") print $4}' $config_file`;    #连接串
46 source_db=`awk -F ',' '{if ($1 == "'"${src_db}"'") print $5}' $config_file`;  #源库名
47 target_db=`awk -F ',' '{if ($1 == "'"${src_db}"'") print $6}' $config_file`;  #目标库名
48 
49 source_db=`echo ${source_db} | tr a-z A-Z`   #源库名字母转大写
50 
51 #所抽取表的信息
52 src_table="SYS_DICT";       #ORACLE表名
53 hive_table="SYS_DICT";      #HIVE表名
54 contidion_str="where ";            #抽取条件
55 query_str="ID,DICT_CODE,DICT_NAME,DICT_TYPE,VERSION,VALIDATE_STATE";  #抽取字段
56 pk_col="ID"; #主键字段
57 
58 #数据从ORACLE载入HIVE
59 #${sqoop_path} #sqoop安装路径
60 cd ${sqoop_path}
61 sqoop import --connect ${db_conn} --username ${db_user} --password ${db_passwd} --query "select ${query_str} from ${source_db}.${src_table} ${contidion_str} \$CONDITIONS" --hive-import --hive-table ${hive_table} --target-dir /user/${src_table} --delete-target-dir --split-by ${pk_col}  --hive-drop-import-delims --hive-database ${target_db} --hive-overwrite --hive-partition-key ETL_DATE --hive-partition-value "${etl_date}";
62 #import                 将数据库表的数据导入到hive中，如果在hive中没有对应的表，则自动生成与数据库表名相同的表。
63 #--connect                指定数据库表中的主键字段名，在这里为id。
64 #--username                Jdbc url中的数据库连接用户名
65 #--password                Jdbc url中的数据库连接密码
66 #--query                分别接查询和插入SQL语句
67 #--hive-import            将数据从关系数据库中导入到hive表中 
68 #--hive-table            后面接要创建的hive表
69 #--target-dir            hive表对应的hdfs路径
70 #--delete-target-dir    如果指定目录存在，则先删除掉
71 #--split-by             表的列名，用来切分工作单元，一般后面跟主键ID
72 #--hive-drop-import-delims 在导入数据到hive中时，去掉数据中\n,\r和\01这样的字符
73 #--hive-database        指定关系数据库库名
74 #--hive-overwrite        覆盖掉在hive表中已经存在的数据
75 #--hive-partition-key     创建分区，后面直接跟分区名即可，创建完毕后，通过describe 表名可以看到分区名，默认为string型 
76 #--hive-partition-value 该值是在导入数据到hive中时，与–hive-partition-key设定的key对应的value值。
77 
78 RET_CODE=$? >>8;        #$?表示前一条命令执行后的返回状态，返回值为0表示执行正确，任何非0值均表示出现异常
79 
80 if [ $RET_CODE == 0 ]
81   then
82       echo ""${src_table}数据抽取成功"";
83       exit 0;
84   else
85       sqoop import --connect ${db_conn} --username ${db_user} --password ${db_passwd} --table ${source_db}.${src_table}  --target-dir /user/${target_db}/${src_table}${etl_date}
86       echo ""${src_table}数据抽取异常"";
87       exit 1;
88 fi;

数据抽取 hive