Datax 支持增量 oracle update
datax介紹
DataX 是阿里巴巴集團內被廣泛使用的離線數據同步工具/平臺,實現包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各種異構數據源之間高效的數據同步功能。
支持增量 oracle update
我們使用datax 希望支持oracle 增量導入數據:地址:https://gitee.com/cecotw/DataX
鏈接:https://pan.baidu.com/s/1mbEvLsDZZNWMYrTTTeYkAw 密碼:v97c
修改 OracleWriter.java
刪除限制
修改WriterUtil.java
添加oracle 數據插入類型轉換:
public static String getWriteTemplate(List<String> columnHolders, List<String> valueHolders, String writeMode, DataBaseType dataBaseType, boolean forceUseUpdate) {
boolean isWriteModeLegal = writeMode.trim().toLowerCase().startsWith("insert")
|| writeMode.trim().toLowerCase().startsWith("replace")
|| writeMode.trim().toLowerCase().startsWith("update");
if (!isWriteModeLegal) {
throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
String.format("您所配置的 writeMode:%s 錯誤. 因爲DataX 目前僅支持replace,update 或 insert 方式. 請檢查您的配置並作出修改.", writeMode));
}
// && writeMode.trim().toLowerCase().startsWith("replace")
String writeDataSqlTemplate;
if (forceUseUpdate ||
((dataBaseType == DataBaseType.MySql || dataBaseType == DataBaseType.Tddl) && writeMode.trim().toLowerCase().startsWith("update"))
) {
//update只在mysql下使用
writeDataSqlTemplate = new StringBuilder()
.append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")")
.append(onDuplicateKeyUpdateString(columnHolders))
.toString();
} else {
if (dataBaseType == DataBaseType.Oracle) {
writeDataSqlTemplate = new StringBuilder().append(onMergeIntoDoString(writeMode, columnHolders, valueHolders)).append("INSERT (")
.append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")").toString();
} else {
//這裏是保護,如果其他錯誤的使用了update,需要更換爲replace
if (writeMode.trim().toLowerCase().startsWith("update")) {
writeMode = "replace";
}
writeDataSqlTemplate = new StringBuilder().append(writeMode)
.append(" INTO %s (").append(StringUtils.join(columnHolders, ","))
.append(") VALUES(").append(StringUtils.join(valueHolders, ","))
.append(")").toString();
}
}
return writeDataSqlTemplate;
}
增加onMergeIntoDoString方法:
public static String onMergeIntoDoString(String merge, List<String> columnHolders, List<String> valueHolders) {
String[] sArray = getStrings(merge);
StringBuilder sb = new StringBuilder();
sb.append("MERGE INTO %s A USING ( SELECT ");
boolean first = true;
boolean first1 = true;
StringBuilder str = new StringBuilder();
StringBuilder update = new StringBuilder();
for (String columnHolder : columnHolders) {
if (Arrays.asList(sArray).contains(columnHolder)) {
if (!first) {
sb.append(",");
str.append(" AND ");
} else {
first = false;
}
str.append("TMP.").append(columnHolder);
sb.append("?");
str.append(" = ");
sb.append(" AS ");
str.append("A.").append(columnHolder);
sb.append(columnHolder);
}
}
for (String columnHolder : columnHolders) {
if (!Arrays.asList(sArray).contains(columnHolder)) {
if (!first1) {
update.append(",");
} else {
first1 = false;
}
update.append(columnHolder);
update.append(" = ");
update.append("?");
}
}
sb.append(" FROM DUAL ) TMP ON (");
sb.append(str);
sb.append(" ) WHEN MATCHED THEN UPDATE SET ");
sb.append(update);
sb.append(" WHEN NOT MATCHED THEN ");
return sb.toString();
}
增加getStrings方法:
public static String[] getStrings(String merge) {
merge = merge.replace("update", "");
merge = merge.replace("(", "");
merge = merge.replace(")", "");
merge = merge.replace(" ", "");
return merge.split(",");
}
修改CommonRdbmsWriter.java
public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector, Connection connection) {
this.taskPluginCollector = taskPluginCollector;
List<String> columns = new ArrayList<>();
List<String> columnsOne = new ArrayList<>();
List<String> columnsTwo = new ArrayList<>();
if (this.dataBaseType == DataBaseType.Oracle) {
String merge = this.writeMode;
String[] sArray = WriterUtil.getStrings(merge);
int size = this.columns.size();
int i = 0;
for (int j = 0; j < size; j++) {
if (Arrays.asList(sArray).contains(this.columns.get(j))) {
columnsOne.add(this.columns.get(j));
}
}
for (int j = 0; j < size; j++) {
if (!Arrays.asList(sArray).contains(this.columns.get(j))) {
columnsTwo.add(this.columns.get(j));
}
}
for (String column : columnsOne) {
columns.add(i, column);
i++;
}
for (String column : columnsTwo) {
columns.add(i, column);
i++;
}
}
columns.addAll(this.columns);
// 用於寫入數據的時候的類型根據目的表字段類型轉換
this.resultSetMetaData = DBUtil.getColumnMetaData(connection,
this.table, StringUtils.join(columns, ","));
// 寫數據庫的SQL語句
calcWriteRecordSql();
List<Record> writeBuffer = new ArrayList<Record>(this.batchSize);
int bufferBytes = 0;
try {
Record record;
while ((record = recordReceiver.getFromReader()) != null) {
if (record.getColumnNumber() != this.columnNumber && this.dataBaseType != DataBaseType.Oracle) {
// 源頭讀取字段列數與目的表字段寫入列數不相等,直接報錯
throw DataXException
.asDataXException(
DBUtilErrorCode.CONF_ERROR,
String.format(
"列配置信息有錯誤. 因爲您配置的任務中,源頭讀取字段數:%s 與 目的表要寫入的字段數:%s 不相等. 請檢查您的配置並作出修改.",
record.getColumnNumber(),
this.columnNumber));
}
writeBuffer.add(record);
bufferBytes += record.getMemorySize();
if (writeBuffer.size() >= batchSize || bufferBytes >= batchByteSize) {
doBatchInsert(connection, writeBuffer);
writeBuffer.clear();
bufferBytes = 0;
}
}
if (!writeBuffer.isEmpty()) {
doBatchInsert(connection, writeBuffer);
writeBuffer.clear();
bufferBytes = 0;
}
} catch (Exception e) {
throw DataXException.asDataXException(
DBUtilErrorCode.WRITE_DATA_ERROR, e);
} finally {
writeBuffer.clear();
bufferBytes = 0;
DBUtil.closeDBResources(null, null, connection);
}
}
protected void doBatchInsert(Connection connection, List<Record> buffer)
throws SQLException {
PreparedStatement preparedStatement = null;
try {
connection.setAutoCommit(false);
preparedStatement = connection
.prepareStatement(this.writeRecordSql);
if (this.dataBaseType == DataBaseType.Oracle) {
String merge = this.writeMode;
String[] sArray = WriterUtil.getStrings(merge);
for (Record record : buffer) {
List<Column> recordOne = new ArrayList<>();
for (int j = 0; j < this.columns.size(); j++) {
if (Arrays.asList(sArray).contains(this.columns.get(j))) {
recordOne.add(record.getColumn(j));
}
}
for (int j = 0; j < this.columns.size(); j++) {
if (!Arrays.asList(sArray).contains(this.columns.get(j))) {
recordOne.add(record.getColumn(j));
}
}
for (int j = 0; j < this.columns.size(); j++) {
recordOne.add(record.getColumn(j));
}
for (int j = 0; j < recordOne.size(); j++) {
record.setColumn(j, recordOne.get(j));
}
preparedStatement = fillPreparedStatement(
preparedStatement, record);
preparedStatement.addBatch();
}
} else {
for (Record record : buffer) {
preparedStatement = fillPreparedStatement(
preparedStatement, record);
preparedStatement.addBatch();
}
}
preparedStatement.executeBatch();
connection.commit();
} catch (SQLException e) {
LOG.warn("回滾此次寫入, 採用每次寫入一行方式提交. 因爲:" + e.getMessage());
connection.rollback();
doOneInsert(connection, buffer);
} catch (Exception e) {
throw DataXException.asDataXException(
DBUtilErrorCode.WRITE_DATA_ERROR, e);
} finally {
DBUtil.closeDBResources(preparedStatement, null);
}
}
效果
{
"job": {
"setting": {
"speed": {
"byte": 1048576
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "postgres",
"password": "postgres",
"connection": [
{
"querySql":["SELECT seq,userid,name FROM user"],
"jdbcUrl": [
"jdbc:postgresql://127.0.0.1:5432/postgres"
]
}
]
}
},
"writer": {
"name": "oraclewriter",
"parameter": {
"username": "oracle",
"password": "oracle",
"column": [
"seq",
"userid",
"name"
],
"connection": [
{
"jdbcUrl": "jdbc:oracle:thin:@localhost:1521:oracle",
"table": [
"user1"
]
}
],
"writeMode": "update (seq,userid)"
}
}
}
]
}
}
源碼
- 關於 DATAX改造後的代碼 ,參考 這兒.(https://gitee.com/cecotw/DataX)