使用dataX将数据从Mysql数据库同步到Oracle数据
最近有需求将数据从mysql同步到oracle,之前有使用kettle将表从oracle同步到mysql,这里使用的插件依然是阿里的dataX
详细见:Githup地址:https://github.com/alibaba/DataX
这里也是根据阿里云的一篇帖子来进行验证
https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F
1 环境需要
JDK(1.8以上,推荐1.8)
Python(推荐Python2.6.X)
Apache Maven 3.x (Compile DataX)
2 #服务器配置
[ ~]# yum search java|grep jdk [ ~]# yum install java-1.8.0-openjdk.x86_64 -y [ ~]# java -version openjdk version "1.8.0_242" OpenJDK Runtime Environment (build 1.8.0_242-b07) OpenJDK 64-Bit Server VM (build 25.242-b07, mixed mode) [root@yhq ~]# python --version Python 2.6.6 下载地址:https://maven.apache.org/download.cgi [root@yhq soft]# ls apache-maven-3.6.3-bin.tar.gz apache-maven-3.6.3-bin.tar.gz [root@yhq soft]# tar -zxvf apache-maven-3.6.3-bin.tar.gz [ soft]# mkdir /opt/maven [ soft]# mv apache-maven-3.6.3/* /opt/maven/ [ soft]# ln -s /opt/maven/b bin/ boot/ [root@yhq soft]# ln -s /opt/maven/bin/mvn /usr/bin/mvn [ soft]# vim /etc/profile.d/maven.sh export M2_HOME=/opt/maven export PATH=${M2_HOME}/bin:${PATH} [root@yhq soft]# mvn -v Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f) Maven home: /opt/maven Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jre Default locale: en_US, platform encoding: UTF-8 OS name: "linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix" #安装datax [ soft]# tar -zxvf datax.tar.gz [ soft]# ls datax bin conf job lib plugin script tmp #自检测试脚本 [ soft]# python datax/bin/datax.py datax/job/job.json DataX (DATAX-OPENSOURCE-3.0), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved. 2020-02-09 23:27:35.717 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl 2020-02-09 23:27:35.763 [main] INFO Engine - the machine info => osInfo: Oracle Corporation 1.8 25.242-b07 jvmInfo: Linux amd64 2.6.32-754.el6.x86_64 cpu num: 2 2020-02-09 23:27:46.974 [job-0] INFO JobContainer - PerfTrace not enable! 2020-02-09 23:27:46.975 [job-0] INFO StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.020s | All Task WaitReaderTime 0.121s | Percentage 100.00% 2020-02-09 23:27:46.976 [job-0] INFO JobContainer - 任务启动时刻 : 2020-02-09 23:27:35 任务结束时刻 : 2020-02-09 23:27:46 任务总计耗时 : 10s 任务平均流量 : 253.91KB/s 记录写入速度 : 10000rec/s 读出记录总数 : 100000 读写失败总数 : 0
#测试使用
mysql 表 CREATE TABLE `datax_test` ( `id` int(11) NOT NULL, `name` varchar(20) DEFAULT NULL, `birthday` date DEFAULT NULL, `memo` varchar(32) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; #插入10w数据
#在oracle里面可以先创建好表结构,或者用kettle同步表
create table datax_test (id number, name varchar2(20), birthday date, memo varchar2(32));
#创建配置文件
#可以通过命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}
# python datax.py -r streamreader -w streamwriter [ job]# vim job1.json { "job": { "setting": { "speed": { "channel": 5 } }, "content": [ { "reader": { "name": "mysqlreader", "parameter": { "username": "system", "password": "****", "column": ["*"], "connection": [ { "table": ["datax_test"], "jdbcUrl": ["jdbc:mysql://ip:3306/vision"] } ] } }, "writer": { "name": "oraclewriter", "parameter": { "username": "vision", "password": "***", "column": ["*"], "connection": [ { "jdbcUrl": "jdbc:oracle:thin:@ip:1521:bol", "table": ["datax_test"] } ] } } } ] } }
#启动数据同步
[ job]# python /data/soft/datax/bin/datax.py job1.json DataX (DATAX-OPENSOURCE-3.0), From Alibaba ! Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved. 2020-02-10 00:12:03.796 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl 2020-02-10 00:12:03.807 [main] INFO Engine - the machine info => osInfo: Oracle Corporation 1.8 25.242-b07 jvmInfo: Linux amd64 2.6.32-754.el6.x86_64 cpu num: 2 totalPhysicalMemory: -0.00G freePhysicalMemory: -0.00G maxFileDescriptorCount: -1 currentOpenFileDescriptorCount: -1 GC Names [PS MarkSweep, PS Scavenge] MEMORY_NAME | allocation_size | init_size PS Eden Space | 256.00MB | 256.00MB Code Cache | 240.00MB | 2.44MB Compressed Class Space | 1,024.00MB | 0.00MB PS Survivor Space | 42.50MB | 42.50MB PS Old Gen | 683.00MB | 683.00MB Metaspace | -0.00MB | 0.00MB 2020-02-10 00:12:03.869 [main] INFO Engine - 2020-02-10 00:13:48.061 [job-0] INFO JobContainer - PerfTrace not enable! 2020-02-10 00:13:48.061 [job-0] INFO StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 42.323s | All Task WaitReaderTime 29.424s | Percentage 100.00% 2020-02-10 00:13:48.069 [job-0] INFO JobContainer - 任务启动时刻 : 2020-02-10 00:12:05 任务结束时刻 : 2020-02-10 00:13:48 任务总计耗时 : 102s 任务平均流量 : 21.96KB/s 记录写入速度 : 1190rec/s 读出记录总数 : 99999 读写失败总数 : 0
#在oracle查看数据
select count(*) from datax_test; #同步正常
#剩下的增量同步,在crontab中定时任务就行。
$ crontab -e
#会进入已有crontab文件编辑界面,继续增加定时任务即可,本示例增加以下内容,并保存
0,10,20,30,40,50 * * * * python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S` 2>&1
测试环境
#测试环境 ==mysql db 5.7.27 centos 6.10 create database vision;==oracle db 11.2.0.4 centos 6.10 select file#,name from v$datafile; create tablespace vision logging datafile ‘/u01/app/oracle/oradata/bol/vision01.dbf‘ size 100m autoextend on next 500m; create user vision identified by **** default tablespace vision ; grant connect,resource,dba to vision;== delimiter // create procedure insrt_t () begin declare i int default 1; declare j int default 1; while i < 100000 do #insert into test values (i,‘a‘); insert into datax_test (id,name,birthday,memo) VALUES (i,‘yhq‘,now(),‘123‘); set i=i+1; set j=j+1; if j=5000 then set j=1; commit; end if; end while; end // delimiter ;