突发奇想想要得到彩票的开奖信息,就写了这个程序,爬取双色球历史开奖,爬取大乐透历史开奖。
数据抓取地址:http://datachart.500.com/
获取彩票开奖信息工具类
* 1、获取传入期号之后开奖的双色球数据
* 2、获取全部双色球的开奖数据
* 3、获取传入期号之后大乐透的开奖数据
* 4、获取全部大乐透开奖信息数据
模块零:实体类
package entity.lottery;
import java.util.Date;
import java.util.List;
/**
* Created by yy on 2017/9/26.
* 大乐透开奖信息表的实体类
*/
public class BigLottoBall {
private int Period_Id;//期号
private int RedBall_One;//红球1
private int RedBall_Tow;//红球2
private int RedBall_Three;//红球3
private int RedBall_Four;//红球4
private int RedBall_Fives;//红球5
private int BlueBall_One;//蓝球1
private int BlueBall_Tow;//蓝球2
private long Prize_Pool_Bonus;//奖池奖金
private int First_Prize_Number;//一等奖注数
private int First_Prize_Bonus;//一等奖奖金
private int Second_Prize_Number;//二等奖注数
private int Second_Prize_Bonus;//二等奖奖金
private int Total_Bet_Amount;//总投注额
private String Lottery_Date;//开奖日期
public BigLottoBall(List<String> infoData) {
if (infoData.size()==15){
this.Period_Id=Integer.parseInt(infoData.get(0));
this.RedBall_One=Integer.parseInt(infoData.get(1));
this.RedBall_Tow=Integer.parseInt(infoData.get(2));
this.RedBall_Three=Integer.parseInt(infoData.get(3));
this.RedBall_Four=Integer.parseInt(infoData.get(4));
this.RedBall_Fives=Integer.parseInt(infoData.get(5));
this.BlueBall_One=Integer.parseInt(infoData.get(6));
this.BlueBall_Tow=Integer.parseInt(infoData.get(7));
this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
this.First_Prize_Number=Integer.parseInt(infoData.get(9));
this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
this.Lottery_Date=infoData.get(14);
}
}
public BigLottoBall() {}
@Override
public String toString() {
return "BigLottoBall{" +
"Period_Id=" + Period_Id +
", RedBall_One=" + RedBall_One +
", RedBall_Tow=" + RedBall_Tow +
", RedBall_Three=" + RedBall_Three +
", RedBall_Four=" + RedBall_Four +
", RedBall_Fives=" + RedBall_Fives +
", BlueBall_One=" + BlueBall_One +
", BlueBall_Tow=" + BlueBall_Tow +
", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
", First_Prize_Number=" + First_Prize_Number +
", First_Prize_Bonus=" + First_Prize_Bonus +
", Second_Prize_Number=" + Second_Prize_Number +
", Second_Prize_Bonus=" + Second_Prize_Bonus +
", Total_Bet_Amount=" + Total_Bet_Amount +
", Lottery_Date='" + Lottery_Date + '\'' +
'}';
}
//getter,setter略
}
package entity.lottery;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
/**
* Created by yy on 2017/9/26.
* 双色球开奖信息表实体类
*/
public class DoubleColorBall {
private int Period_Id;//期号
private int RedBall_One;//红球1
private int RedBall_Tow;//红球2
private int RedBall_Three;//红球3
private int RedBall_Four;//红球4
private int RedBall_Fives;//红球5
private int RedBall_Six;//红球6
private int BlueBall_One;//蓝球1
private long Prize_Pool_Bonus;//奖池奖金
private int First_Prize_Number;//一等奖注数
private int First_Prize_Bonus;//一等奖奖金
private int Second_Prize_Number;//二等奖注数
private int Second_Prize_Bonus;//二等奖奖金
private int Total_Bet_Amount;//总投注额
private String Lottery_Date;//开奖日期
public DoubleColorBall(){}
//接收正则返回的数据
public DoubleColorBall(List<String> infoData) {
if (infoData.size()==15){
this.Period_Id=Integer.parseInt(infoData.get(0));
this.RedBall_One=Integer.parseInt(infoData.get(1));
this.RedBall_Tow=Integer.parseInt(infoData.get(2));
this.RedBall_Three=Integer.parseInt(infoData.get(3));
this.RedBall_Four=Integer.parseInt(infoData.get(4));
this.RedBall_Fives=Integer.parseInt(infoData.get(5));
this.RedBall_Six=Integer.parseInt(infoData.get(6));
this.BlueBall_One=Integer.parseInt(infoData.get(7));
this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
this.First_Prize_Number=Integer.parseInt(infoData.get(9));
this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
this.Lottery_Date=infoData.get(14);
}
}
@Override
public String toString() {
return "DoubleColorBall{" +
"Period_Id=" + Period_Id +
", RedBall_One=" + RedBall_One +
", RedBall_Tow=" + RedBall_Tow +
", RedBall_Three=" + RedBall_Three +
", RedBall_Four=" + RedBall_Four +
", edBall_Fives=" + RedBall_Fives +
", RedBall_Six=" + RedBall_Six +
", BlueBall_One=" + BlueBall_One +
", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
", First_Prize_Number=" + First_Prize_Number +
", First_Prize_Bonus=" + First_Prize_Bonus +
", Second_Prize_Number=" + Second_Prize_Number +
", Second_Prize_Bonus=" + Second_Prize_Bonus +
", Total_Bet_Amount=" + Total_Bet_Amount +
", Lottery_Date='" + Lottery_Date + '\'' +
'}';
}
//getter,setter略
}
模块一:获取网页数据
方法一(getString)要是返回乱码请替换使用方法二(unGZIPGetString)
package tool;
import org.apache.commons.lang3.StringUtils;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPInputStream;
/**
* Created by yy on 2017/10/17.
* 获取url信息的工具类
*/
public class GetUrlData {
/**
* 根据传入的url将结果以String的数据返回
* @param url 需要请求的url
* @return
* @throws Exception
*/
public static String getString(String url)throws Exception{
//当传入的url返回不为空的时候,读取数据
BufferedReader reader=null;
InputStreamReader input=null;
StringBuilder data=null;//提高字符数据的生成
if(StringUtils.isNotBlank(url)){
try{
//设置请求的头信息
URL urlInfo = new URL(url);
URLConnection connection = urlInfo.openConnection();
connection.addRequestProperty("Host", urlInfo.getHost());//设置头信息
connection.addRequestProperty("Connection", "keep-alive");
connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
connection.connect();
//获取请求回来的信息
input = new InputStreamReader(connection.getInputStream(),"UTF-8");//定义返回数据的格式
reader = new BufferedReader(input);
data = new StringBuilder();
String str;
while ((str = reader.readLine()) != null) {
data.append(str);
}
}catch(Exception e){
throw new Exception("读取Url数据失败:"+url,e);
}finally {
reader.close();//关闭操作流
input.close();
}
}
return data.toString();
}
/**
* 获取url链接数据,以GZIP解压
* 如第一个方法返回乱码,请使用本方法
* @param url
* @return
*/
public static String unGZIPGetString(String url) throws Exception{
//当传入的url返回不为空的时候,读取数据
if(StringUtils.isNotBlank(url)){
try{
//设置请求的头信息
URL urlInfo = new URL(url);
URLConnection connection = urlInfo.openConnection();
connection.addRequestProperty("Host", urlInfo.getHost());//设置头信息
connection.addRequestProperty("Connection", "keep-alive");
connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
connection.connect();
//获取请求回来的信息
//设置解压信息
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPInputStream gzip = new GZIPInputStream(connection.getInputStream());
byte[] buffer = new byte[256];
int n;
while ((n = gzip.read(buffer)) >= 0) {
out.write(buffer, 0, n);
}
out.close();
gzip.close();
return out.toString("UTF-8");
}catch(Exception e){
throw new Exception("读取Url数据失败:"+url,e);
}
}
return null;
}
}
模块二:获取彩票开奖数据
package tool;
import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by yy on 2017/10/16.
* 获取彩票开奖信息工具类
* 1、获取传入期号之后开奖的双色球数据
* 2、获取全部双色球的开奖数据
* 3、获取传入期号之后大乐透的开奖数据
* 4、获取全部大乐透开奖信息数据
*/
public class GetLottery {
//大乐透请求url
private static String bigUrl="http://datachart.500.com/dlt/history/newinc/history.php?limit=10000&sort=0";
//双色球请求url
private static String douUrl="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
//匹配开奖正则
private static String listRegex="--><(.*?)<\/tr>";
//匹配开奖明细正则
private static String infoRegex=">[,\\d-]+?<";
/**
* 获取全部双色球的开奖信息数据
* @return list对象
* @throws Exception
*/
public static List<DoubleColorBall> getAllDCB()throws Exception{
List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
//获取url信息
String data= GetUrlData.getString(GetLottery.douUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍历获取的数据
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//获取具体的数据
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//创建对象
list.add(new DoubleColorBall(infoData));
}
return list;
}
/**
* 根据传进来的的开奖期号获取这个开奖期号自后的开奖信息
* @param period_Id 开奖期号
* @return List对象
* @throws Exception
*/
public static List<DoubleColorBall> getBeforeDCB(int period_Id)throws Exception{
List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
//获取url信息
String data= GetUrlData.getString(GetLottery.douUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍历获取的数据
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//获取具体的数据
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//要是获取的期号小于或者等于传进来的期号则跳出
if (Integer.parseInt(infoData.get(0))<=period_Id){
break;
}else{
//创建对象
list.add(new DoubleColorBall(infoData));
}
}
return list;
}
/**
* 获取全部大乐透的开奖信息
* @return List对象
* @throws Exception
*/
public static List<BigLottoBall> getAllBLB()throws Exception{
List<BigLottoBall> list=new ArrayList<BigLottoBall>();
//获取url信息
String data= GetUrlData.getString(GetLottery.bigUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍历获取的数据
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//获取具体的数据
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//创建对象
list.add(new BigLottoBall(infoData));
}
return list;
}
/**
* 根据传入的期号来获取这个期号之前的开奖信息
* @param period_Id 开奖期号
* @return
* @throws Exception
*/
public static List<BigLottoBall> getBeforeBLB(int period_Id)throws Exception{
List<BigLottoBall> list=new ArrayList<BigLottoBall>();
//获取url信息
String data= GetUrlData.getString(GetLottery.bigUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍历获取的数据
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//获取具体的数据
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//要是获取的期号小于或者等于传进来的期号则跳出
if (Integer.parseInt(infoData.get(0))<=period_Id){
break;
}else{
//创建对象
list.add(new BigLottoBall(infoData));
}
}
return list;
}
}
模块三:测试
import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;
import tool.GetLottery;
import java.util.List;
/**
* Created by yy on 2017/10/16.
* 获取url数据测试
*/
public class TestGetUrl {
public static void main(String args[]){
long star=System.currentTimeMillis();
String url="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
try {
//测试获取url信息
//System.out.println(GetLottery.getString(url));
//测试获取全部的双色球开奖信息
//List<DoubleColorBall> list= GetLottery.getAllDCB();
//测试获取全部的大乐透开奖信息
//List<BigLottoBall> list= GetLottery.getAllBLB();
//测试获取这个双色球16120期号自后的开奖数据
//List<DoubleColorBall> list= GetLottery.getBeforeDCB(16120);
//测试获取这个双色球17120期号自后的开奖数据
//List<DoubleColorBall> list= GetLottery.getBeforeDCB(17120);
//测试获取这个大乐透16120期号自后的开奖数据
//List<BigLottoBall> list= GetLottery.getBeforeBLB(16120);
//测试获取这个大乐透17110期号自后的开奖数据
List<BigLottoBall> list= GetLottery.getBeforeBLB(17110);
for(int i=0;i<list.size();i++){
System.out.println(list.get(i).toString());
}
}catch(Exception e){
e.printStackTrace();
}
System.out.println(System.currentTimeMillis()-star);
}
}
模块四:测试结果
BigLottoBall{Period_Id=17121, RedBall_One=1, RedBall_Tow=6, RedBall_Three=12, RedBall_Four=26, RedBall_Fives=31, BlueBall_One=1, BlueBall_Tow=7, Prize_Pool_Bonus=4271765199, First_Prize_Number=0, First_Prize_Bonus=0, Second_Prize_Number=59, Second_Prize_Bonus=149524, Total_Bet_Amount=199583003, Lottery_Date='2017-10-16'}
BigLottoBall{Period_Id=17120, RedBall_One=8, RedBall_Tow=15, RedBall_Three=24, RedBall_Four=26, RedBall_Fives=27, BlueBall_One=5, BlueBall_Tow=6, Prize_Pool_Bonus=4223045066, First_Prize_Number=7, First_Prize_Bonus=6747509, Second_Prize_Number=107, Second_Prize_Bonus=63449, Total_Bet_Amount=216330390, Lottery_Date='2017-10-14'}
BigLottoBall{Period_Id=17119, RedBall_One=5, RedBall_Tow=7, RedBall_Three=13, RedBall_Four=29, RedBall_Fives=35, BlueBall_One=3, BlueBall_Tow=8, Prize_Pool_Bonus=4247472952, First_Prize_Number=5, First_Prize_Bonus=8296832, Second_Prize_Number=43, Second_Prize_Bonus=216912, Total_Bet_Amount=197391000, Lottery_Date='2017-10-11'}
BigLottoBall{Period_Id=17118, RedBall_One=2, RedBall_Tow=7, RedBall_Three=16, RedBall_Four=20, RedBall_Fives=33, BlueBall_One=3, BlueBall_Tow=11, Prize_Pool_Bonus=4252457951, First_Prize_Number=11, First_Prize_Bonus=6287618, Second_Prize_Number=57, Second_Prize_Bonus=166932, Total_Bet_Amount=195121929, Lottery_Date='2017-10-09'}
BigLottoBall{Period_Id=17117, RedBall_One=5, RedBall_Tow=7, RedBall_Three=9, RedBall_Four=24, RedBall_Fives=32, BlueBall_One=8, BlueBall_Tow=10, Prize_Pool_Bonus=4307581838, First_Prize_Number=5, First_Prize_Bonus=8668433, Second_Prize_Number=40, Second_Prize_Bonus=220578, Total_Bet_Amount=207881694, Lottery_Date='2017-10-07'}
BigLottoBall{Period_Id=17116, RedBall_One=2, RedBall_Tow=27, RedBall_Three=30, RedBall_Four=32, RedBall_Fives=33, BlueBall_One=1, BlueBall_Tow=3, Prize_Pool_Bonus=4309435912, First_Prize_Number=2, First_Prize_Bonus=10000000, Second_Prize_Number=36, Second_Prize_Bonus=242412, Total_Bet_Amount=172984540, Lottery_Date='2017-10-04'}
BigLottoBall{Period_Id=17115, RedBall_One=14, RedBall_Tow=19, RedBall_Three=20, RedBall_Four=25, RedBall_Fives=31, BlueBall_One=6, BlueBall_Tow=8, Prize_Pool_Bonus=4288831779, First_Prize_Number=3, First_Prize_Bonus=9023329, Second_Prize_Number=172, Second_Prize_Bonus=38436, Total_Bet_Amount=175963547, Lottery_Date='2017-10-02'}
BigLottoBall{Period_Id=17114, RedBall_One=6, RedBall_Tow=7, RedBall_Three=12, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=1, BlueBall_Tow=12, Prize_Pool_Bonus=4288325250, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=55, Second_Prize_Bonus=207498, Total_Bet_Amount=221457669, Lottery_Date='2017-09-30'}
BigLottoBall{Period_Id=17113, RedBall_One=5, RedBall_Tow=8, RedBall_Three=17, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=4, BlueBall_Tow=12, Prize_Pool_Bonus=4238842403, First_Prize_Number=10, First_Prize_Bonus=6601551, Second_Prize_Number=117, Second_Prize_Bonus=73142, Total_Bet_Amount=202181819, Lottery_Date='2017-09-27'}
BigLottoBall{Period_Id=17112, RedBall_One=5, RedBall_Tow=6, RedBall_Three=20, RedBall_Four=31, RedBall_Fives=32, BlueBall_One=6, BlueBall_Tow=12, Prize_Pool_Bonus=4270236000, First_Prize_Number=6, First_Prize_Bonus=7219221, Second_Prize_Number=88, Second_Prize_Bonus=105157, Total_Bet_Amount=199283910, Lottery_Date='2017-09-25'}
BigLottoBall{Period_Id=17111, RedBall_One=2, RedBall_Tow=14, RedBall_Three=17, RedBall_Four=26, RedBall_Fives=34, BlueBall_One=8, BlueBall_Tow=12, Prize_Pool_Bonus=4289815824, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=43, Second_Prize_Bonus=251070, Total_Bet_Amount=220140489, Lottery_Date='2017-09-23'}
总结:
1、提供的工具类可以直接遍历到对应的彩票开奖数据,以上的测试结果也是正确的,然后工具类的使用读取全部一般在1000毫秒以内(根据具体的网络环境可能有所浮动)
2、2018年使用时候发现出现乱码,特意在工具类提供了GZIP的解压工具方法(unGZIPGetString),因为原先的爬取站点使用了GZIP压缩