本例僅僅對於單表增量的進行了測試。solr增量的採集原理是依賴系統的時間進行了增量採集,所以我們必須保證數據庫的系統時間與solr所在jvm時間一致。
開始之前下面的提醒很重要:
DataImportHandler is a data import tool for Solr which makes importing data from Databases, XML files and
HTTP data sources quick and easy.
Important Note
--------------
Although Solr strives to be agnostic of the Locale where the server is
running, some code paths in DataImportHandler are known to depend on the
System default Locale, Timezone, or Charset. It is recommended that when
running Solr you set the following system properties:
-Duser.language=xx -Duser.country=YY -Duser.timezone=ZZZ
where xx, YY, and ZZZ are consistent with any database server's configuration.
- 所以我們需要設置時區,/etc/default/solr.in.sh
# By default the start script uses UTC; override the timezone if needed
SOLR_TIMEZONE="UTC+8"
- 準備數據庫表
ALTER TABLE RMS_RESOURCEINFO
ADD (SOLR_LAST_DATE TIMESTAMP );
CREATE INDEX RMS_RESOURCEINFO_INDEX_SOLR ON RMS_RESOURCEINFO (SOLR_LAST_DATE ASC);
alter trigger "PLS"."RMS_RESOURCEINFO_SOLR" disable;
alter trigger "PLS"."RMS_RESOURCEINFO_UPDATE" disable;
UPDATE RMS_RESOURCEINFO SET SOLR_LAST_DATE=CURRENT_TIMESTAMP;
COMMIT;
alter trigger "PLS"."RMS_RESOURCEINFO_SOLR" enable;
alter trigger "PLS"."RMS_RESOURCEINFO_UPDATE" enable;
- 配置data-config.xml
<dataConfig>
<propertyWriter dateFormat="yyyy-MM-dd HH:mm:ss" type="SimplePropertiesWriter" filename="my_dih.properties" locale="zh-CN" />
<dataSource type="JdbcDataSource"
driver="oracle.jdbc.driver.OracleDriver"
url="jdbc:oracle:thin:@//"
user=""
password="U2FsdGVkX1/PqBuNUFBIcmLKTb+y41YB6J7b6tAm8Xw="
encryptKeyFile="/var/solr/data/dih-encryptionkey"
/>
<document>
<!-- <entity name="id"
query="select id,name,section,subject from CLASS_TYPE">
<field column="ID" name="id"/>
<field column="NAME" name="solr_name"/>
<field column="SECTION" name="solr_section"/>
<field column="SUBJECT" name="subject_s"/>
</entity>-->
<entity name="info" transformer="DateFormatTransformer" query="select R_CODE,R_TITLE,R_KS_ID,R_DESC,R_TYPECODE,R_FORMAT,SOLR_LAST_DATE FROM RMS_RESOURCEINFO" deltaImportQuery="select R_CODE,R_TITLE,R_KS_ID,R_DESC,R_TYPECODE,R_FORMAT,SOLR_LAST_DATE FROM RMS_RESOURCEINFO where R_CODE='${dataimporter.delta.R_CODE}'" deltaQuery="SELECT R_CODE FROM RMS_RESOURCEINFO WHERE SOLR_LAST_DATE>TO_DATE('${dataimporter.last_index_time}','yyyy-mm-dd hh24:mi:ss')">
<field column="R_CODE" name="id"/>
<field column="R_TITLE" name="rtitle_txt_cjk"/>
<field column="R_KS_ID" name="ksid_s"/>
<field column="R_DESC" name="rdesc_txt_cjk"/>
<field column="R_TYPECODE" name="rtypecode_s"/>
<field column="R_FORMAT" name="rformat_s"/>
<field column="SOLR_LAST_DATE" dateTimeFormat="yyyy-MM-dd HH:mm:ss" name="lastDate_dt"/>
</entity>
</document>
</dataConfig>
- 執行delta-import,full-import命令,注意my_dih.properties時間的變化,這裏我自定義了文件,手動需要創建my_dih.properties,並chown solr:solr my_dih.properties