一、關鍵點
要使用Oozie提交的Job,其中的Action(包括Java、Shell等等)必須能在任意一個NodeManager所在的主機上運行,其執行環境、依賴文件(jar等等)、執行用戶、用戶權限、輸入輸出路徑必須在所有NodeManager的主機上配置好
二、NodeManager部署
基於上述描述,首先應該將集羣中的每個NodeManager節點都配置好,都能單獨運行datax
1、上傳datax安裝包
hdfs dfs -put datax.tar.gz /usr/local/
2、解壓到/usr/local
cd /usr/local
tar -xvf datax.tar.gz
3、編寫全局執行腳本
vim /usr/local/datax/datax_start.sh
#!/bin/bash echo $1 > /usr/local/datax/job/$2 python2.7 /usr/local/datax/bin/datax.py /usr/local/datax/job/$2
其中$1 :workflow.xml文件中的第一個argument,datax的配置內容
$2:workflow.xml文件中的第二個argument,配置文件名
4、作超鏈接
ln -s /usr/local/datax/datax_start.sh /usr/bin/dataxstart
三、測試
dataxstart “{}” “11.json”
四、oozie配置文件
job.properties
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
nameNode=hdfs://hadoop-ha
jobTracker=yarn-ha
queueName=default
examplesRoot=examples
oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/datax
workflow.xml
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<workflow-app xmlns="uri:oozie:workflow:0.4" name="shell-datax">
<start to="shell-node"/>
<action name="shell-node">
<shell xmlns="uri:oozie:shell-action:0.2">
<job-tracker>${jobTracker}</job-tracker>
<name-node>${nameNode}</name-node>
<configuration>
<property>
<name>mapred.job.queue.name</name>
<value>${queueName}</value>
</property>
</configuration>
<exec>dataxstart</exec>
<argument>{"job": {}}</argument>
<argument>11.json</argument>
<capture-output/>
</shell>
<ok to="end"/>
<error to="fail"/>
</action>
<decision name="check-output">
<switch>
<case to="end">
${wf:actionData('shell-node')['my_output'] eq 'Hello Oozie'}
</case>
<default to="fail-output"/>
</switch>
</decision>
<kill name="fail">
<message>Shell action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
</kill>
<kill name="fail-output">
<message>Incorrect output, expected [Hello Oozie] but was [${wf:actionData('shell-node')['my_output']}]</message>
</kill>
<end name="end"/>
</workflow-app>