Datax 支持增量 oracle writeMode update

datax介紹

DataX 是阿里巴巴集團內被廣泛使用的離線數據同步工具/平臺,實現包括 MySQL、Oracle、SqlServer、Postgre、HDFS、Hive、ADS、HBase、TableStore(OTS)、MaxCompute(ODPS)、DRDS 等各種異構數據源之間高效的數據同步功能。

支持增量 oracle update

我們使用datax 希望支持oracle 增量導入數據:地址:https://gitee.com/cecotw/DataX

鏈接:https://pan.baidu.com/s/1mbEvLsDZZNWMYrTTTeYkAw 密碼:v97c

修改 OracleWriter.java

刪除限制
在這裏插入圖片描述

修改WriterUtil.java

添加oracle 數據插入類型轉換:
在這裏插入圖片描述

    public static String getWriteTemplate(List<String> columnHolders, List<String> valueHolders, String writeMode, DataBaseType dataBaseType, boolean forceUseUpdate) {
        boolean isWriteModeLegal = writeMode.trim().toLowerCase().startsWith("insert")
                || writeMode.trim().toLowerCase().startsWith("replace")
                || writeMode.trim().toLowerCase().startsWith("update");

        if (!isWriteModeLegal) {
            throw DataXException.asDataXException(DBUtilErrorCode.ILLEGAL_VALUE,
                    String.format("您所配置的 writeMode:%s 錯誤. 因爲DataX 目前僅支持replace,update 或 insert 方式. 請檢查您的配置並作出修改.", writeMode));
        }
        // && writeMode.trim().toLowerCase().startsWith("replace")
        String writeDataSqlTemplate;
        if (forceUseUpdate ||
                ((dataBaseType == DataBaseType.MySql || dataBaseType == DataBaseType.Tddl) && writeMode.trim().toLowerCase().startsWith("update"))
        ) {
            //update只在mysql下使用

            writeDataSqlTemplate = new StringBuilder()
                    .append("INSERT INTO %s (").append(StringUtils.join(columnHolders, ","))
                    .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                    .append(")")
                    .append(onDuplicateKeyUpdateString(columnHolders))
                    .toString();
        } else {
            if (dataBaseType == DataBaseType.Oracle) {
                writeDataSqlTemplate = new StringBuilder().append(onMergeIntoDoString(writeMode, columnHolders, valueHolders)).append("INSERT (")
                        .append(StringUtils.join(columnHolders, ","))
                        .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                        .append(")").toString();
            } else {
                //這裏是保護,如果其他錯誤的使用了update,需要更換爲replace
                if (writeMode.trim().toLowerCase().startsWith("update")) {
                    writeMode = "replace";
                }
                writeDataSqlTemplate = new StringBuilder().append(writeMode)
                        .append(" INTO %s (").append(StringUtils.join(columnHolders, ","))
                        .append(") VALUES(").append(StringUtils.join(valueHolders, ","))
                        .append(")").toString();
            }
        }

        return writeDataSqlTemplate;
    }

增加onMergeIntoDoString方法:
在這裏插入圖片描述

    public static String onMergeIntoDoString(String merge, List<String> columnHolders, List<String> valueHolders) {
        String[] sArray = getStrings(merge);
        StringBuilder sb = new StringBuilder();
        sb.append("MERGE INTO %s A USING ( SELECT ");

        boolean first = true;
        boolean first1 = true;
        StringBuilder str = new StringBuilder();
        StringBuilder update = new StringBuilder();
        for (String columnHolder : columnHolders) {
            if (Arrays.asList(sArray).contains(columnHolder)) {
                if (!first) {
                    sb.append(",");
                    str.append(" AND ");
                } else {
                    first = false;
                }
                str.append("TMP.").append(columnHolder);
                sb.append("?");
                str.append(" = ");
                sb.append(" AS ");
                str.append("A.").append(columnHolder);
                sb.append(columnHolder);
            }
        }

        for (String columnHolder : columnHolders) {
            if (!Arrays.asList(sArray).contains(columnHolder)) {
                if (!first1) {
                    update.append(",");
                } else {
                    first1 = false;
                }
                update.append(columnHolder);
                update.append(" = ");
                update.append("?");
            }
        }

        sb.append(" FROM DUAL ) TMP ON (");
        sb.append(str);
        sb.append(" ) WHEN MATCHED THEN UPDATE SET ");
        sb.append(update);
        sb.append(" WHEN NOT MATCHED THEN ");
        return sb.toString();
    }

增加getStrings方法:
在這裏插入圖片描述

    public static String[] getStrings(String merge) {
        merge = merge.replace("update", "");
        merge = merge.replace("(", "");
        merge = merge.replace(")", "");
        merge = merge.replace(" ", "");
        return merge.split(",");
    }

修改CommonRdbmsWriter.java

在這裏插入圖片描述

        public void startWriteWithConnection(RecordReceiver recordReceiver, TaskPluginCollector taskPluginCollector, Connection connection) {
            this.taskPluginCollector = taskPluginCollector;
            List<String> columns = new ArrayList<>();
            List<String> columnsOne = new ArrayList<>();
            List<String> columnsTwo = new ArrayList<>();
            if (this.dataBaseType == DataBaseType.Oracle) {
                String merge = this.writeMode;
                String[] sArray = WriterUtil.getStrings(merge);
                int size = this.columns.size();
                int i = 0;
                for (int j = 0; j < size; j++) {
                    if (Arrays.asList(sArray).contains(this.columns.get(j))) {
                        columnsOne.add(this.columns.get(j));
                    }
                }
                for (int j = 0; j < size; j++) {
                    if (!Arrays.asList(sArray).contains(this.columns.get(j))) {
                        columnsTwo.add(this.columns.get(j));
                    }
                }
                for (String column : columnsOne) {
                    columns.add(i, column);
                    i++;
                }
                for (String column : columnsTwo) {
                    columns.add(i, column);
                    i++;
                }
            }
            columns.addAll(this.columns);

            // 用於寫入數據的時候的類型根據目的表字段類型轉換
            this.resultSetMetaData = DBUtil.getColumnMetaData(connection,
                    this.table, StringUtils.join(columns, ","));
            // 寫數據庫的SQL語句
            calcWriteRecordSql();

            List<Record> writeBuffer = new ArrayList<Record>(this.batchSize);
            int bufferBytes = 0;
            try {
                Record record;
                while ((record = recordReceiver.getFromReader()) != null) {
                    if (record.getColumnNumber() != this.columnNumber && this.dataBaseType != DataBaseType.Oracle) {
                        // 源頭讀取字段列數與目的表字段寫入列數不相等,直接報錯
                        throw DataXException
                                .asDataXException(
                                        DBUtilErrorCode.CONF_ERROR,
                                        String.format(
                                                "列配置信息有錯誤. 因爲您配置的任務中,源頭讀取字段數:%s 與 目的表要寫入的字段數:%s 不相等. 請檢查您的配置並作出修改.",
                                                record.getColumnNumber(),
                                                this.columnNumber));
                    }

                    writeBuffer.add(record);
                    bufferBytes += record.getMemorySize();

                    if (writeBuffer.size() >= batchSize || bufferBytes >= batchByteSize) {
                        doBatchInsert(connection, writeBuffer);
                        writeBuffer.clear();
                        bufferBytes = 0;
                    }
                }
                if (!writeBuffer.isEmpty()) {
                    doBatchInsert(connection, writeBuffer);
                    writeBuffer.clear();
                    bufferBytes = 0;
                }
            } catch (Exception e) {
                throw DataXException.asDataXException(
                        DBUtilErrorCode.WRITE_DATA_ERROR, e);
            } finally {
                writeBuffer.clear();
                bufferBytes = 0;
                DBUtil.closeDBResources(null, null, connection);
            }
        }

在這裏插入圖片描述

        protected void doBatchInsert(Connection connection, List<Record> buffer)
                throws SQLException {
            PreparedStatement preparedStatement = null;
            try {
                connection.setAutoCommit(false);
                preparedStatement = connection
                        .prepareStatement(this.writeRecordSql);

                if (this.dataBaseType == DataBaseType.Oracle) {
                    String merge = this.writeMode;
                    String[] sArray = WriterUtil.getStrings(merge);
                    for (Record record : buffer) {
                        List<Column> recordOne = new ArrayList<>();
                        for (int j = 0; j < this.columns.size(); j++) {
                            if (Arrays.asList(sArray).contains(this.columns.get(j))) {
                                recordOne.add(record.getColumn(j));
                            }
                        }
                        for (int j = 0; j < this.columns.size(); j++) {
                            if (!Arrays.asList(sArray).contains(this.columns.get(j))) {
                                recordOne.add(record.getColumn(j));
                            }
                        }
                        for (int j = 0; j < this.columns.size(); j++) {
                            recordOne.add(record.getColumn(j));
                        }
                        for (int j = 0; j < recordOne.size(); j++) {
                            record.setColumn(j, recordOne.get(j));
                        }
                        preparedStatement = fillPreparedStatement(
                                preparedStatement, record);
                        preparedStatement.addBatch();
                    }
                } else {
                    for (Record record : buffer) {
                        preparedStatement = fillPreparedStatement(
                                preparedStatement, record);
                        preparedStatement.addBatch();
                    }
                }
                preparedStatement.executeBatch();
                connection.commit();
            } catch (SQLException e) {
                LOG.warn("回滾此次寫入, 採用每次寫入一行方式提交. 因爲:" + e.getMessage());
                connection.rollback();
                doOneInsert(connection, buffer);
            } catch (Exception e) {
                throw DataXException.asDataXException(
                        DBUtilErrorCode.WRITE_DATA_ERROR, e);
            } finally {
                DBUtil.closeDBResources(preparedStatement, null);
            }
        }

效果

{
    "job": {
        "setting": {
            "speed": {
                 "byte": 1048576
            },
                "errorLimit": {
                "record": 0,
                "percentage": 0.02
            }
        },
        "content": [
            {
                "reader": {
                    "name": "postgresqlreader",
                    "parameter": {
                        "username": "postgres",
                        "password": "postgres",
                        "connection": [
                            {
                                                                "querySql":["SELECT seq,userid,name FROM user"],
                                "jdbcUrl": [
                                    "jdbc:postgresql://127.0.0.1:5432/postgres"
                                ]
                            }
                        ]
                    }
                },
                "writer": {
                    "name": "oraclewriter",
                    "parameter": {
                        "username": "oracle",
                        "password": "oracle",
                        "column": [
                            "seq",
                            "userid",
                            "name"
                        ],
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:oracle:thin:@localhost:1521:oracle",
                                "table": [
                                    "user1"
                                ]
                            }
                        ],
                        "writeMode": "update (seq,userid)"
                    }
                }
            }
        ]
    }
}

源碼

  • 關於 DATAX改造後的代碼 ,參考 這兒.(https://gitee.com/cecotw/DataX)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章