spring Batch
用來處理大量數據操作的一個框架,
主要用來讀取大量數據,然後進行一定處理後輸出成指定的形式。
名稱 | 用途 |
---|---|
Job Repository | 用來註冊Job容器 |
Job Launcher | 用來啓動Job接口 |
Job | 實際要執行的任務,包含一個或多個Step |
Step | 包含Item Reader Processor Writer |
Item Reader | 讀取數據的接口 |
Item Processor | 處理數據的接口 |
Item Writer | 輸出數據的接口 |
主要組成部分註冊成 Spring Bean
//@Configuration
@EnableBatchProcessing
數據讀取:Spring Batch 爲我們提供了大量的 ItemReader的實現
數據處理和校驗都是通過 ItemProcessor接口完成
實現ItemProcessor 重寫 process 方法, 從 ItemReader,讀取到數據,返回給 Item Writer
1. pom
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-batch</artifactId>
<exclusions>
<exclusion>
<groupId>org.hsqldb</groupId>
<artifactId>hsqldb</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-jdbc</artifactId>
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<artifactId>ojdbc6</artifactId>
<version>11.2.0.2.0</version>
</dependency>
<dependency>
<groupId>org.hibernate</groupId>
<artifactId>hibernate-validator</artifactId>
</dependency> //作爲校驗使用
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
2. 測試 people.csv
汪某某,11,漢族,合肥
張某某,12,漢族,上海
李某某,13,非漢族,武漢
劉某,14,非漢族,南京
歐陽某某,115,漢族,北京
3. 數據表定義
src/main/resources/schema.sql
create table PERSON
(
id NUMBER not null primary key,
name VARCHAR2(20),
age NUMBER,
nation VARCHAR2(20),
address VARCHAR2(20)
);
4. 領域模型類
public class Person {
@Size(max=4,min=2) //1
private String name;
private int age;
private String nation;
private String address;
}
5. 數據處理及校驗
public class CsvItemProcessor extends ValidatingItemProcessor<Person>{
@Override
public Person process(Person item) throws ValidationException {
super.process(item); //1 纔會調用自定義校驗器
if(item.getNation().equals("漢族")){ //2 如果漢族就轉成01
item.setNation("01");
}else{
item.setNation("02");
}
return item;
}
}
public class CsvBeanValidator<T> implements Validator<T>,InitializingBean {
private javax.validation.Validator validator;
@Override
public void afterPropertiesSet() throws Exception { //1 JSR-303 的Validator的初始化
ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory();
validator = validatorFactory.usingContext().getValidator();
}
@Override
public void validate(T value) throws ValidationException {
Set<ConstraintViolation<T>> constraintViolations = validator.validate(value); //2
if(constraintViolations.size()>0){
StringBuilder message = new StringBuilder();
for (ConstraintViolation<T> constraintViolation : constraintViolations) {
message.append(constraintViolation.getMessage() + "\n");
}
throw new ValidationException(message.toString());
}
}
}
6. job監聽
public class CsvJobListener implements JobExecutionListener{ //實現這個類
long startTime;
long endTime;
@Override
public void beforeJob(JobExecution jobExecution) { //重寫
startTime = System.currentTimeMillis();
System.out.println("任務處理開始");
}
@Override
public void afterJob(JobExecution jobExecution) { //重寫
endTime = System.currentTimeMillis();
System.out.println("任務處理結束");
System.out.println("耗時:" + (endTime - startTime) + "ms");
}
}
7. 配置
//@Configuration
@EnableBatchProcessing //開戶被批處理的支持
public class CsvBatchConfig {
@Bean
public ItemReader<Person> reader() throws Exception {
FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1這個類讀取文件
reader.setResource(new ClassPathResource("people.csv")); //2 csv文件的位置
reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3 數據和領域模型映射
setLineTokenizer(new DelimitedLineTokenizer() {{
setNames(new String[] { "name","age", "nation" ,"address"});
}});
setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
setTargetType(Person.class);
}});
}});
return reader;
}
@Bean
public ItemProcessor<Person, Person> processor() {
CsvItemProcessor processor = new CsvItemProcessor(); //1 自定義的csv
processor.setValidator(csvBeanValidator()); //2 指定校驗器
return processor;
}
@Bean
public ItemWriter<Person> writer(DataSource dataSource) {//1 注入boot 定義的dataSource
JdbcBatchItemWriter<Person> writer = new JdbcBatchItemWriter<Person>(); //2 jdbc的批處理 JdbcBatchItemWriter,寫數據到數據庫
writer.setItemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<Person>());
String sql = "insert into person " + "(id,name,age,nation,address) "
+ "values(hibernate_sequence.nextval, :name, :age, :nation,:address)";
writer.setSql(sql); //3 執行要處理的批處理 SQL語句
writer.setDataSource(dataSource);
return writer;
}
@Bean //工作倉庫
public JobRepository jobRepository(DataSource dataSource, PlatformTransactionManager transactionManager)
throws Exception {
JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
jobRepositoryFactoryBean.setDataSource(dataSource);
jobRepositoryFactoryBean.setTransactionManager(transactionManager);
jobRepositoryFactoryBean.setDatabaseType("oracle");
return jobRepositoryFactoryBean.getObject();
}
@Bean //啓動job的接口
public SimpleJobLauncher jobLauncher(DataSource dataSource, PlatformTransactionManager transactionManager) //需要dataSource,transactionManager
throws Exception {
SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
jobLauncher.setJobRepository(jobRepository(dataSource, transactionManager));
return jobLauncher;
}
@Bean //導入job
public Job importJob(JobBuilderFactory jobs, Step s1) {
return jobs.get("importJob")
.incrementer(new RunIdIncrementer())
.flow(s1) //1 爲job指定 step
.end()
.listener(csvJobListener()) //2 註冊並綁定 監聽器到job
.build();
}
@Bean //步驟
public Step step1(StepBuilderFactory stepBuilderFactory, ItemReader<Person> reader, ItemWriter<Person> writer,
ItemProcessor<Person,Person> processor) {
return stepBuilderFactory
.get("step1")
.<Person, Person>chunk(65000) //1 批處理 每次提交 65000條數據
.reader(reader) //2 給step 綁定 reader
.processor(processor) //3 綁定 processor
.writer(writer) //4 綁定 writer
.build();
}
@Bean
public CsvJobListener csvJobListener() {
return new CsvJobListener();
}
@Bean
public Validator<Person> csvBeanValidator() {
return new CsvBeanValidator<Person>();
}
}
8.手動觸發任務
//@Configuration 註釋這個配置
@EnableBatchProcessing
public class CsvBatchConfig {
}
新建TriggerBatchConfig。內容與 CsvBachConfig保持一致
除了 ItemReader 這個bean
@Bean
@StepScope
public FlatFileItemReader<Person> reader(@Value("#{jobParameters['input.file.name']}") String pathToFile) throws Exception {
FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1 用 FlatFile。而不是Item Reader
reader.setResource(new ClassPathResource(pathToFile)); //2
reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3
setLineTokenizer(new DelimitedLineTokenizer() {{
setNames(new String[] { "name","age", "nation" ,"address"});
}});
setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
setTargetType(Person.class);
}});
}});
return reader;
}
9. 定義控制器
@RestController
public class DemoController {
@Autowired
JobLauncher jobLauncher;
@Autowired
Job importJob;
public JobParameters jobParameters;
@RequestMapping("/read")
public String imp(String fileName) throws Exception{
String path = fileName+".csv";
jobParameters = new JobParametersBuilder()
.addLong("time", System.currentTimeMillis())
.addString("input.file.name", path)
.toJobParameters();
jobLauncher.run(importJob,jobParameters);
return "ok";
}
}
10. 配置文件
spring.datasource.driverClassName=oracle.jdbc.OracleDriver
spring.datasource.url=jdbc\:oracle\:thin\:@192.168.31.183\:49161\:xe
spring.datasource.username=system
spring.datasource.password=oracle
spring.batch.job.enabled=false #關閉自動執行job的配置
logging.level.org.springframework.web = DEBUG
http://localhost:8080/read?fileName=people
表裏就會生成處理好的,對應的數據,就像 hadoop一樣。