# 国内镜像下载: https://npm.taobao.org/mirrors/python/2.6.9/Python-2.6.9.tgz export PYTHON_HOME="/usr/local/python27" wget https://www.python.org/ftp/python/2.7.18/Python-2.7.18.tgz tar -zxf Python-2.7.18.tgz -C /usr/local/ apt install gcc g++ make cd /usr/local/Python-2.7.18 ./configure --prefix=/usr/local/python27 make && make install ln -s /usr/local/python27/bin/python2.7 /usr/bin/python ln -s /usr/local/python27/bin/python2.7 /usr/bin/python2 python --version && python2 --version
export DATAX_HOME="/usr/local/datax" $ wget http://datax-opensource.oss-cn-hangzhou.aliyuncs.com/datax.tar.gz $ tar -zxf datax.tar.gz -C /usr/local/ $ cd ${DATAX_HOME}/bin $ python ${DATAX_HOME}/bin/datax.py ${DATAX_HOME}/job/job.json $ ln -s ${DATAX_HOME}/bin/datax.py /usr/local/datax.py
运行测试 描述: 采用 Datax 自带的 job/job.json 进行运行测试验证安装环境。 /usr/local/datax# ./bin/datax.py job/job.json # (1) 显示机器相关信息(CPU/内存、以及JVM相关信息) 2021-10-26 11:20:54.301 [main] INFO Engine - the machine info => osInfo: Eclipse Foundation 16 16.0.2+7 jvmInfo: Linux amd64 5.4.0-88-generic cpu num: 4 totalPhysicalMemory: -0.00G freePhysicalMemory: -0.00G maxFileDescriptorCount: -1 currentOpenFileDescriptorCount: -1 GC Names [G1 Young Generation, G1 Old Generation] MEMORY_NAME | allocation_size | init_size CodeHeap 'profiled nmethods' | 117.21MB | 2.44MB G1 Old Gen | 1,024.00MB | 970.00MB G1 Survivor Space | -0.00MB | 0.00MB CodeHeap 'non-profiled nmethods' | 117.22MB | 2.44MB Compressed Class Space | 1,024.00MB | 0.00MB Metaspace | -0.00MB | 0.00MB G1 Eden Space | -0.00MB | 54.00MB CodeHeap 'non-nmethods' | 5.57MB | 2.44MB # (2) Job 任务执行情况 2021-10-26 11:21:04.364 [job-0] INFO StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.021s | All Task WaitReaderTim # (3) job 任务执行CPU与GC占比信息 2021-10-26 11:21:04.367 [job-0] INFO JobContainer - [total cpu info] => averageCpu | maxDeltaCpu | minDeltaCpu -1.00% | -1.00% | -1.00% [total gc info] => NAME | totalGCCount | maxDeltaGCCount | minDeltaGCCount | totalGCTime | maxDeltaGCTime | minDeltaGCTime G1 Young Generation | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s G1 Old Generation | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s # (4) Job 任务执行完毕总计数据(非常重要) 、可以验证同步的数据是否全部同步成功。 2021-10-26 11:21:04.367 [job-0] INFO StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.021s | All Task WaitReaderTime 0.041s | Percentage 100.00% 2021-10-26 11:21:04.368 [job-0] INFO JobContainer - 任务启动时刻 : 2021-10-26 11:20:54 任务结束时刻 : 2021-10-26 11:21:04 任务总计耗时 : 10s 任务平均流量 : 253.91KB/s 记录写入速度 : 10000rec/s 读出记录总数 : 100000 读写失败总数 : 0 2.基础使用 描述: 我们可以通过DataX数据源参考指南(https://github.com/alibaba/DataX/wiki/DataX-all-data-channels)来查看具体每个插件需要或者可选的插件。 插件示例获取: $ ./bin/datax.py -r streamreader -w streamwriter # (1) 此处将会显示 读写 插件的使用文档说明 Please refer to the streamreader document: https://github.com/alibaba/DataX/blob/master/streamreader/doc/streamreader.md Please refer to the streamwriter document:https://github.com/alibaba/DataX/blob/master/streamwriter/doc/streamwriter.md # (2) 命令执行示例 python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json # (3) Job 任务配置示例 Json 格式 (以下参数我简单描述) tee job/stream2stream.json <<'EOF' { "job": { "content": [ { // 读插件 "reader": { "name": "streamreader", // 指定插件名称 "parameter": { "column": [ // 字段类与值 (必须进行指定) { "value": "WeiyiGeek", "type": "string" }, { "value": 2021, "type": "long" }, { "value": "2021-01-01 00:00:00", "type": "date" }, { "value": true, "type": "bool" }, { "value": "test", "type": "bytes" } ], "sliceRecordCount": "10" // 切片记录计数 } }, // 写插件 "writer": { "name": "streamwriter", // 指定使用的插件名称 "parameter": { "encoding": "UTF-8", // 编码格式 "print": true // 是否终端打印 } } } ], "setting": { "speed": { // 同步速度采用的类型 "channel": "2" // 并发数 //"byte": 10485760 // 字节数 } } } } EOF 执行结果: (执行时请删除上述备注) python bin/datax.py job/stream2stream.json # (1) 两个任务进程 2021-10-26 16:28:33.568 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [2] channels for [2] tasks. # (2) 每个任务进程执行10条 (即总数20条) 2021-10-26 16:28:33.579 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started 2021-10-26 16:28:33.595 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[1] attemptCount[1] is started WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test WeiyiGeek 2021 2021-01-01 00:00:00 true test # (3) 执行结果信息 2021-10-26 16:28:43.576 [job-0] INFO StandAloneJobContainerCommunicator - Total 20 records, 520 bytes | Speed 52B/s, 2 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReade rTime 0.002s | Percentage 100.00% 2021-10-26 16:28:43.576 [job-0] INFO JobContainer - 任务启动时刻 : 2021-10-26 16:28:33 任务结束时刻 : 2021-10-26 16:28:43 任务总计耗时 : 10s 任务平均流量 : 52B/s 记录写入速度 : 2rec/s 读出记录总数 : 20 读写失败总数 : 0
标签:bin,MB,安装,job,usr,local,datax From: https://www.cnblogs.com/Lcch/p/16760659.html