來自官網總結
https://docs.spring.io/spring-batch/3.0.x/reference/htmlsingle/
Spring Batch架構
Spring Batch組件
- 運行模型 JobLauncher Job Step
- 重試機制 RetryTemplate
- 輸入輸出 ItemReader ItemWriter
批處理實現類型
- Normal processing in a batch window 普通批處理
- Concurrent batch / on-line processing 在線實時批處理
- Parallel Processing 並行批處理
- Partitioning 分區批處理
分區機制
- Fixed and Even Break-Up of Record Set固定值分解
- Breakup by a Key Column 關鍵字
- Assigned to a batch instance via a partitioning table 分區表
- Assigned to a batch instance by a portion of the value 依賴關鍵字取值的部分來拆分
- Breakup by Views 視圖分解
- Addition of a Processing Indicator 額外指定的處理策略
- Extract Table to a Flat File 導出表數據爲分佈式文件
- Use of a Hashing Column 使用hash列
樣例
JOB樣例
JOB運行狀態三種 STARTED FAILED COMPLETED
<job id="footballJob" job-repository="specialRepository">
<step id="playerload" parent="s1" next="gameLoad"/>
<step id="gameLoad" parent="s3" next="playerSummarization"/>
<step id="playerSummarization" parent="s3"/>
<listeners>
<listener ref="sampleListener"/>
</listeners>
</job>
step樣例
<job id="ioSampleJob">
<step id="step1">
<tasklet>
<chunk reader="itemReader" writer="itemWriter" commit-interval="2"/>
</tasklet>
</step>
</job>
JobRepository 樣例以及事務控制
<job-repository id="jobRepository"
data-source="dataSource"
transaction-manager="transactionManager"
isolation-level-for-create="SERIALIZABLE" REPEATABLE_READ
table-prefix="BATCH_"<!-- 可以改 !!!-->
max-varchar-length="1000"/>
–事務
<aop:config>
<aop:advisor
pointcut="execution(* org.springframework.batch.core..*Repository+.*(..))"/>
<advice-ref="txAdvice" />
</aop:config>
<tx:advice id="txAdvice" transaction-manager="transactionManager">
<tx:attributes>
<tx:method name="*" />
</tx:attributes>
</tx:advice>
更改項目的數據庫類型
<bean id="jobRepository" class="org...JobRepositoryFactoryBean">
<property name="databaseType" *value="db2"*/>
<property name="dataSource" ref="dataSource"/>
</bean>
同步模型
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
</bean>
異步模型
<bean id="jobLauncher"
class="org.springframework.batch.core.launch.support.SimpleJobLauncher">
<property name="jobRepository" ref="jobRepository" />
<property name="taskExecutor">
<bean class="org.springframework.core.task.SimpleAsyncTaskExecutor" />
</property>
</bean>
JobExplorer JOB閱讀器
查詢已經存在的execution p:tablePrefix可指定前綴
<bean id="jobExplorer" class="org.spr...JobExplorerFactoryBean"
p:dataSource-ref="dataSource" ***p:tablePrefix="BATCH_"*** />
RepeatTemplate 循環執行
RepeatOperations 實現類RepeatTemplate
RepeatContext RepeatStatus CONTINUABLE/ FINISHED
並行處理 TaskExecutorRepeatTemplate SynchronousTaskExecutor RepeatCallback
多線程
略
並行
一種方案是 使用flow屬性
<job id="job1">
<split id="split1" task-executor="taskExecutor" next="step4">
<flow>
<step id="step1" parent="s1" next="step2"/>
<step id="step2" parent="s2"/>
</flow>
<flow>
<step id="step3" parent="s3"/>
</flow>
</split>
<step id="step4" parent="s4"/>
</job>
<beans:bean id="taskExecutor" class="org.spr...SimpleAsyncTaskExecutor"/>
重試機制
RetryOperations 接口實現類 RetryTemplate
重試的策略4中:
RetryPolicy :SimpleRetryPolicy TimeoutRetryPolicy ExceptionClassifierRetryPolicy BackoffPolicy
開啓重試
<aop:config>
<aop:pointcut id="transactional"
expression="execution(* com..*Service.remoteCall(..))" />
<aop:advisor pointcut-ref="transactional"
advice-ref="retryAdvice" order="-1"/>
</aop:config>
<bean id="retryAdvice"
class="org.springframework.batch.retry.interceptor.RetryOperationsInterceptor"/>
測試
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "/simple-job-launcher-context.xml",
"/jobs/skipSampleJob.xml" })
@TestExecutionListeners( { DependencyInjectionTestExecutionListener.class,
StepScopeTestExecutionListener.class })
public class SkipSampleFunctionalTests { ... }
##一般batch模式
<step id="simpleStep">
<tasklet>
<chunk reader="reader" writer="writer" commit-interval="10"
chunk-completion-policy="completionPolicy"/>
</tasklet>
</step>
<bean id="completionPolicy" class="org.example...SpecialCompletionPolicy"/>
其他小技巧
Footer
可用於每個文件處理完成之後增加總結性的 例如記錄數 行數等。
//寫文件 Footer
<bean id="itemWriter" class="org.spr...FlatFileItemWriter">
<property name="resource" ref="outputResource" />
<property name="lineAggregator" ref="lineAggregator"/>
<property name="headerCallback" ref="headerCallback" />
<property name="footerCallback" ref="footerCallback" />
</bean>
例如
public void writeFooter(Writer writer) throws IOException {
writer.write("Total Amount Processed: " + totalAmount);
}
執行cmd命令
<bean class="org.springframework.batch.core.step.tasklet.SystemCommandTasklet">
<property name="command" value="echo hello" />
<!-- 5 second timeout for the command to complete -->
<property name="timeout" value="5000" />
</bean>
對JSR-352的支持
主要有兩個功能 Partitioned 和 Batchlet
- Split - Running multiple steps in parallel. Multiple threads
- Executing a single step via multiple threads. Partitioning
- Dividing the data up for parallel processing (master/slave). Remote Chunking
- Executing the processor piece of logic remotely
直接寫代碼可參考
JobOperator operator = BatchRuntime.getJobOperator();
jobOperator.start("myJob", new Properties());
文件流寫入batch
<int:channel id="inboundFileChannel"/>
<int:channel id="outboundJobRequestChannel"/>
<int:channel id="jobLaunchReplyChannel"/>
<int-file:inbound-channel-adapter id="filePoller"
channel="inboundFileChannel"
directory="file:/tmp/myfiles/"
filename-pattern="*.csv">
<int:poller fixed-rate="1000"/>
</int-file:inbound-channel-adapter>
<int:transformer input-channel="inboundFileChannel"
output-channel="outboundJobRequestChannel">
<bean class="io.spring.sbi.FileMessageToJobRequest">
<property name="job" ref="personJob"/>
<property name="fileParameterName" value="input.file.name"/>
</bean>
</int:transformer>
<batch-int:job-launching-gateway request-channel="outboundJobRequestChannel"
reply-channel="jobLaunchReplyChannel"/>
<int:logging-channel-adapter channel="jobLaunchReplyChannel"/>
附1 使用Batch的註解
@BatchProperty and @Inject 兩個一起才起作用
@EnableBatchConfiguration
@EnableBatchProcessing
附2 Batch內部註解
spring batch中主要由job step trunk組成,trunk可以看做是更細粒度的step,對於三大模塊,都建立了監聽器 JobExecutionListener、 StepExecutionListener 、ChunkListener。
其中 ChunkListener、StepExecutionListener 的父類是StepListener
附3 最後的彩蛋
因爲用到了DB-DB的方式,看了下源碼,目前還不支持直接配置多數據源 但是可以自己繼承 AbstractBatchConfiguration 來實現自己的多數據源配置
protected BatchConfigurer getConfigurer(Collection<BatchConfigurer> configurers) throws Exception {
if (this.configurer != null) {
return this.configurer;
}
if (configurers == null || configurers.isEmpty()) {
if (dataSources == null || dataSources.isEmpty()) {
DefaultBatchConfigurer configurer = new DefaultBatchConfigurer();
configurer.initialize();
this.configurer = configurer;
return configurer;
} else if(dataSources != null && dataSources.size() == 1) {
DataSource dataSource = dataSources.iterator().next();
DefaultBatchConfigurer configurer = new DefaultBatchConfigurer(dataSource);
configurer.initialize();
this.configurer = configurer;
return configurer;
} else {
throw new IllegalStateException("To use the default BatchConfigurer the context must contain no more than" +
"one DataSource, found " + dataSources.size());
}
}
if (configurers.size() > 1) {
throw new IllegalStateException(
"To use a custom BatchConfigurer the context must contain precisely one, found "
+ configurers.size());
}
this.configurer = configurers.iterator().next();
return this.configurer;
}
雖然定義的是 Collection 但未給出調用方式 並且
if (configurers.size() > 1)
這是要逗我嗎?
給出重寫的思路:重寫的時候,把if (configurers.size() > 1)這一段去掉,再加上調用的配置prefix 應該就可以實現多數據源了。