使用dataX将数据从Mysql数据库同步到Oracle数据

最近有需求将数据从mysql同步到oracle,之前有使用kettle将表从oracle同步到mysql,这里使用的插件依然是阿里的dataX

详细见:Githup地址:https://github.com/alibaba/DataX

这里也是根据阿里云的一篇帖子来进行验证

https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F

1 环境需要

    JDK(1.8以上,推荐1.8)
    Python(推荐Python2.6.X)
    Apache Maven 3.x (Compile DataX)

2 #服务器配置

[ ~]# yum search java|grep jdk
[ ~]# yum install java-1.8.0-openjdk.x86_64 -y
[ ~]# java -version
openjdk version "1.8.0_242"
OpenJDK Runtime Environment (build 1.8.0_242-b07)
OpenJDK 64-Bit Server VM (build 25.242-b07, mixed mode)
[root@yhq ~]# python --version
Python 2.6.6
下载地址:https://maven.apache.org/download.cgi
[root@yhq soft]# ls apache-maven-3.6.3-bin.tar.gz 
apache-maven-3.6.3-bin.tar.gz
[root@yhq soft]# tar -zxvf apache-maven-3.6.3-bin.tar.gz 
[ soft]#  mkdir /opt/maven
[ soft]# mv apache-maven-3.6.3/* /opt/maven/
[ soft]# ln -s /opt/maven/b
bin/  boot/ 
[root@yhq soft]# ln -s /opt/maven/bin/mvn /usr/bin/mvn
[ soft]# vim /etc/profile.d/maven.sh
export M2_HOME=/opt/maven
export PATH=${M2_HOME}/bin:${PATH}
[root@yhq soft]# mvn -v
Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
Maven home: /opt/maven
Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jre
Default locale: en_US, platform encoding: UTF-8
OS name: "linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix"
#安装datax
[ soft]# tar -zxvf datax.tar.gz
[ soft]# ls datax
bin  conf  job  lib  plugin  script  tmp
#自检测试脚本
[ soft]# python datax/bin/datax.py datax/job/job.json 

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.


2020-02-09 23:27:35.717 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-09 23:27:35.763 [main] INFO  Engine - the machine info  => 

    osInfo:    Oracle Corporation 1.8 25.242-b07
    jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
    cpu num:    2
2020-02-09 23:27:46.974 [job-0] INFO  JobContainer - PerfTrace not enable!
2020-02-09 23:27:46.975 [job-0] INFO  StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.020s |  All Task WaitReaderTime 0.121s | Percentage 100.00%
2020-02-09 23:27:46.976 [job-0] INFO  JobContainer - 
任务启动时刻                    : 2020-02-09 23:27:35
任务结束时刻                    : 2020-02-09 23:27:46
任务总计耗时                    :                 10s
任务平均流量                    :          253.91KB/s
记录写入速度                    :          10000rec/s
读出记录总数                    :              100000
读写失败总数                    :                   0

#测试使用

mysql 表
 CREATE TABLE `datax_test` (
  `id` int(11) NOT NULL,
  `name` varchar(20) DEFAULT NULL,
  `birthday` date DEFAULT NULL,
  `memo` varchar(32) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
#插入10w数据

#在oracle里面可以先创建好表结构,或者用kettle同步表

create table datax_test
(id number,
name varchar2(20),
birthday date,
memo varchar2(32));

#创建配置文件
#可以通过命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}

#  python datax.py -r streamreader -w streamwriter
[ job]# vim job1.json
{
    "job": {
        "setting": {
            "speed": {
                "channel": 5
            }
        },
        "content": [
            {
                "reader": {
                    "name": "mysqlreader",
                    "parameter": {
                        "username": "system",
                        "password": "****",
                        "column": ["*"],
                        "connection": [
                           {
                              "table": ["datax_test"],
                              "jdbcUrl": ["jdbc:mysql://ip:3306/vision"]
                           }
                        ]
                    }
                },
                "writer": {
                    "name": "oraclewriter",
                    "parameter": {
                        "username": "vision",
                        "password": "***",
                        "column": ["*"],
                        "connection": [
                            {
                               "jdbcUrl": "jdbc:oracle:thin:@ip:1521:bol",
                               "table": ["datax_test"]
                            }
                        ]
                      }
                    }
                }
        ]
    }
}

#启动数据同步

[ job]# python /data/soft/datax/bin/datax.py job1.json 

DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.


2020-02-10 00:12:03.796 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-10 00:12:03.807 [main] INFO  Engine - the machine info  => 

    osInfo:    Oracle Corporation 1.8 25.242-b07
    jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
    cpu num:    2

    totalPhysicalMemory:    -0.00G
    freePhysicalMemory:    -0.00G
    maxFileDescriptorCount:    -1
    currentOpenFileDescriptorCount:    -1

    GC Names    [PS MarkSweep, PS Scavenge]

    MEMORY_NAME                    | allocation_size                | init_size                      
    PS Eden Space                  | 256.00MB                       | 256.00MB                       
    Code Cache                     | 240.00MB                       | 2.44MB                         
    Compressed Class Space         | 1,024.00MB                     | 0.00MB                         
    PS Survivor Space              | 42.50MB                        | 42.50MB                        
    PS Old Gen                     | 683.00MB                       | 683.00MB                       
    Metaspace                      | -0.00MB                        | 0.00MB                         


2020-02-10 00:12:03.869 [main] INFO  Engine - 
2020-02-10 00:13:48.061 [job-0] INFO  JobContainer - PerfTrace not enable!
2020-02-10 00:13:48.061 [job-0] INFO  StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 42.323s |  All Task WaitReaderTime 29.424s | Percentage 100.00%
2020-02-10 00:13:48.069 [job-0] INFO  JobContainer - 
任务启动时刻                    : 2020-02-10 00:12:05
任务结束时刻                    : 2020-02-10 00:13:48
任务总计耗时                    :                102s
任务平均流量                    :           21.96KB/s
记录写入速度                    :           1190rec/s
读出记录总数                    :               99999
读写失败总数                    :                   0

#在oracle查看数据
select count(*) from datax_test; #同步正常
#剩下的增量同步,在crontab中定时任务就行。
$ crontab -e
#会进入已有crontab文件编辑界面,继续增加定时任务即可,本示例增加以下内容,并保存
0,10,20,30,40,50 * * * *  python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json  >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S`  2>&1

测试环境

#测试环境
==mysql db 5.7.27 centos 6.10
create database vision;==oracle db 11.2.0.4  centos 6.10
select file#,name from v$datafile;

create tablespace vision
  logging
  datafile ‘/u01/app/oracle/oradata/bol/vision01.dbf‘
  size 100m
  autoextend on
  next 500m;
  
  create user vision identified by ****  default tablespace vision ;
  grant connect,resource,dba to vision;==
delimiter //
create procedure insrt_t ()
begin
declare i int default 1;
declare j int default 1;
while i < 100000 do
#insert into test values (i,‘a‘);
insert into datax_test
(id,name,birthday,memo)
VALUES
(i,‘yhq‘,now(),‘123‘);
set i=i+1;
set j=j+1;
if j=5000 then
set j=1;
commit;
end if;
end while;
end
//
delimiter ;

相关推荐