EE颠覆者第九章2 spring Batch

spring Batch

用来处理大量数据操作的一个框架,

主要用来读取大量数据,然后进行一定处理后输出成指定的形式。

名称 用途
Job Repository 用来注册Job容器
Job Launcher 用来启动Job接口
Job 实际要执行的任务,包含一个或多个Step
Step 包含Item Reader Processor Writer
Item Reader 读取数据的接口
Item Processor 处理数据的接口
Item Writer 输出数据的接口

主要组成部分注册成 Spring Bean

//@Configuration
@EnableBatchProcessing

数据读取:Spring Batch 为我们提供了大量的 ItemReader的实现

数据处理和校验都是通过 ItemProcessor接口完成

实现ItemProcessor 重写 process 方法, 从 ItemReader,读取到数据,返回给 Item Writer

1. pom

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-batch</artifactId>
			<exclusions>
				<exclusion>
					<groupId>org.hsqldb</groupId>
					<artifactId>hsqldb</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-web</artifactId>
		</dependency>

		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-jdbc</artifactId>
		</dependency>


		<dependency>
			<groupId>com.oracle</groupId>
			<artifactId>ojdbc6</artifactId>
			<version>11.2.0.2.0</version>
		</dependency>

		<dependency>
			<groupId>org.hibernate</groupId>
			<artifactId>hibernate-validator</artifactId>
		</dependency> //作为校验使用
	
		<dependency>
			<groupId>org.springframework.boot</groupId>
			<artifactId>spring-boot-starter-test</artifactId>
			<scope>test</scope>
		</dependency>

2. 测试 people.csv

汪某某,11,汉族,合肥
张某某,12,汉族,上海
李某某,13,非汉族,武汉
刘某,14,非汉族,南京
欧阳某某,115,汉族,北京

3. 数据表定义

src/main/resources/schema.sql
create table PERSON
(
  id          NUMBER not null primary key,
  name        VARCHAR2(20),
  age         NUMBER,
  nation      VARCHAR2(20),
  address     VARCHAR2(20)
);

4. 领域模型类

public class Person {
	
	@Size(max=4,min=2) //1
	private String name;
	
	private int age;
	
	private String nation;
	
	private String address;
	}

5. 数据处理及校验

public class CsvItemProcessor  extends ValidatingItemProcessor<Person>{

	@Override
	public Person process(Person item) throws ValidationException {
		super.process(item); //1 才会调用自定义校验器
		
		if(item.getNation().equals("汉族")){ //2 如果汉族就转成01
			item.setNation("01");
		}else{
			item.setNation("02");
		}
		return item;
	}


}
public class CsvBeanValidator<T> implements Validator<T>,InitializingBean {
    private javax.validation.Validator validator; 
    @Override
    public void afterPropertiesSet() throws Exception { //1 JSR-303 的Validator的初始化
        ValidatorFactory validatorFactory = Validation.buildDefaultValidatorFactory();
        validator = validatorFactory.usingContext().getValidator();
    }

    @Override
    public void validate(T value) throws ValidationException {
        Set<ConstraintViolation<T>> constraintViolations = validator.validate(value); //2
        if(constraintViolations.size()>0){
            
            StringBuilder message = new StringBuilder();
            for (ConstraintViolation<T> constraintViolation : constraintViolations) {
                message.append(constraintViolation.getMessage() + "\n");
            }
            throw new ValidationException(message.toString());

        }

    }

}

6. job监听

public class CsvJobListener implements JobExecutionListener{  //实现这个类

    long startTime;
    long endTime;
    @Override
    public void beforeJob(JobExecution jobExecution) { //重写
        startTime = System.currentTimeMillis();
        System.out.println("任务处理开始");
    }

    @Override
    public void afterJob(JobExecution jobExecution) { //重写
        endTime = System.currentTimeMillis();
        System.out.println("任务处理结束");
        System.out.println("耗时:" + (endTime - startTime) + "ms");
    }

}

7. 配置

//@Configuration
@EnableBatchProcessing  //开户被批处理的支持
public class CsvBatchConfig {

	@Bean
	public ItemReader<Person> reader() throws Exception {
		FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1这个类读取文件
		reader.setResource(new ClassPathResource("people.csv")); //2 csv文件的位置
	        reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3 数据和领域模型映射
	            setLineTokenizer(new DelimitedLineTokenizer() {{
	                setNames(new String[] { "name","age", "nation" ,"address"});
	            }});
	            setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
	                setTargetType(Person.class);
	            }});
	        }});
	        return reader;
	}
	
	@Bean
	public ItemProcessor<Person, Person> processor() { 
		CsvItemProcessor processor = new CsvItemProcessor(); //1 自定义的csv
		processor.setValidator(csvBeanValidator()); //2 指定校验器
		return processor;
	}
	
	

	@Bean
	public ItemWriter<Person> writer(DataSource dataSource) {//1 注入boot 定义的dataSource
		JdbcBatchItemWriter<Person> writer = new JdbcBatchItemWriter<Person>(); //2 jdbc的批处理 JdbcBatchItemWriter,写数据到数据库
		writer.setItemSqlParameterSourceProvider(new BeanPropertyItemSqlParameterSourceProvider<Person>());
		String sql = "insert into person " + "(id,name,age,nation,address) "
				+ "values(hibernate_sequence.nextval, :name, :age, :nation,:address)";
		writer.setSql(sql); //3 执行要处理的批处理 SQL语句
		writer.setDataSource(dataSource);
		return writer;
	}

	@Bean //工作仓库
	public JobRepository jobRepository(DataSource dataSource, PlatformTransactionManager transactionManager)
			throws Exception {
		JobRepositoryFactoryBean jobRepositoryFactoryBean = new JobRepositoryFactoryBean();
		jobRepositoryFactoryBean.setDataSource(dataSource);
		jobRepositoryFactoryBean.setTransactionManager(transactionManager);
		jobRepositoryFactoryBean.setDatabaseType("oracle");
		return jobRepositoryFactoryBean.getObject();
	}

	@Bean //启动job的接口
	public SimpleJobLauncher jobLauncher(DataSource dataSource, PlatformTransactionManager transactionManager) //需要dataSource,transactionManager
			throws Exception {
		SimpleJobLauncher jobLauncher = new SimpleJobLauncher();
		jobLauncher.setJobRepository(jobRepository(dataSource, transactionManager));
		return jobLauncher;
	}

	@Bean //导入job
	public Job importJob(JobBuilderFactory jobs, Step s1) {
		return jobs.get("importJob")
				.incrementer(new RunIdIncrementer())
				.flow(s1) //1 为job指定 step
				.end()
				.listener(csvJobListener()) //2 注册并绑定 监听器到job
				.build();
	}

	@Bean //步骤
	public Step step1(StepBuilderFactory stepBuilderFactory, ItemReader<Person> reader, ItemWriter<Person> writer,
			ItemProcessor<Person,Person> processor) {
		return stepBuilderFactory
				.get("step1")
				.<Person, Person>chunk(65000) //1 批处理 每次提交 65000条数据
				.reader(reader) //2 给step 绑定 reader
				.processor(processor) //3 绑定 processor
				.writer(writer) //4 绑定 writer
				.build();
	}



	@Bean
	public CsvJobListener csvJobListener() {
		return new CsvJobListener();
	}

	@Bean
	public Validator<Person> csvBeanValidator() {
		return new CsvBeanValidator<Person>();
	}
	

}

8.手动触发任务

//@Configuration 注释这个配置
@EnableBatchProcessing
public class CsvBatchConfig {
}

新建TriggerBatchConfig。内容与 CsvBachConfig保持一致

除了 ItemReader 这个bean

	@Bean
	@StepScope
	public FlatFileItemReader<Person> reader(@Value("#{jobParameters['input.file.name']}") String pathToFile) throws Exception {
		FlatFileItemReader<Person> reader = new FlatFileItemReader<Person>(); //1 用 FlatFile。而不是Item Reader
		 reader.setResource(new ClassPathResource(pathToFile)); //2
	        reader.setLineMapper(new DefaultLineMapper<Person>() {{ //3
	            setLineTokenizer(new DelimitedLineTokenizer() {{
	                setNames(new String[] { "name","age", "nation" ,"address"});
	            }});
	            setFieldSetMapper(new BeanWrapperFieldSetMapper<Person>() {{
	                setTargetType(Person.class);
	            }});
	        }});
	       
	        return reader;
	}

9. 定义控制器

@RestController
public class DemoController {
	
	    @Autowired
	    JobLauncher jobLauncher;

	    @Autowired
	    Job importJob;
	    public JobParameters   jobParameters;
	    
	    @RequestMapping("/read")
	    public String imp(String fileName) throws Exception{
	    	
	        String path = fileName+".csv";
	        jobParameters = new JobParametersBuilder()
	                .addLong("time", System.currentTimeMillis())
	                .addString("input.file.name", path)
	                .toJobParameters();
	        jobLauncher.run(importJob,jobParameters);
	        return "ok";
	    }

}

10. 配置文件

spring.datasource.driverClassName=oracle.jdbc.OracleDriver
spring.datasource.url=jdbc\:oracle\:thin\:@192.168.31.183\:49161\:xe
spring.datasource.username=system
spring.datasource.password=oracle

spring.batch.job.enabled=false #关闭自动执行job的配置

logging.level.org.springframework.web = DEBUG

http://localhost:8080/read?fileName=people

表里就会生成处理好的,对应的数据,就像 hadoop一样。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章