java 爬取彩票开奖信息,爬取双色球历史开奖,爬取大乐透历史开奖

突发奇想想要得到彩票的开奖信息,就写了这个程序,爬取双色球历史开奖,爬取大乐透历史开奖。

数据抓取地址:http://datachart.500.com/

获取彩票开奖信息工具类
* 1、获取传入期号之后开奖的双色球数据
* 2、获取全部双色球的开奖数据
* 3、获取传入期号之后大乐透的开奖数据
* 4、获取全部大乐透开奖信息数据


模块零:实体类

package entity.lottery;

import java.util.Date;
import java.util.List;

/**
 * Created by yy on 2017/9/26.
 * 大乐透开奖信息表的实体类
 */
public class BigLottoBall {

    private int Period_Id;//期号
    private int RedBall_One;//红球1
    private int RedBall_Tow;//红球2
    private int RedBall_Three;//红球3
    private int RedBall_Four;//红球4
    private int RedBall_Fives;//红球5
    private int BlueBall_One;//蓝球1
    private int BlueBall_Tow;//蓝球2
    private long Prize_Pool_Bonus;//奖池奖金
    private int First_Prize_Number;//一等奖注数
    private int First_Prize_Bonus;//一等奖奖金
    private int Second_Prize_Number;//二等奖注数
    private int Second_Prize_Bonus;//二等奖奖金
    private int Total_Bet_Amount;//总投注额
    private String Lottery_Date;//开奖日期

    public BigLottoBall(List<String> infoData) {
        if (infoData.size()==15){
            this.Period_Id=Integer.parseInt(infoData.get(0));
            this.RedBall_One=Integer.parseInt(infoData.get(1));
            this.RedBall_Tow=Integer.parseInt(infoData.get(2));
            this.RedBall_Three=Integer.parseInt(infoData.get(3));
            this.RedBall_Four=Integer.parseInt(infoData.get(4));
            this.RedBall_Fives=Integer.parseInt(infoData.get(5));
            this.BlueBall_One=Integer.parseInt(infoData.get(6));
            this.BlueBall_Tow=Integer.parseInt(infoData.get(7));
            this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
            this.First_Prize_Number=Integer.parseInt(infoData.get(9));
            this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
            this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
            this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
            this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
            this.Lottery_Date=infoData.get(14);
        }
    }
    public BigLottoBall() {}

    @Override
    public String toString() {
        return "BigLottoBall{" +
                "Period_Id=" + Period_Id +
                ", RedBall_One=" + RedBall_One +
                ", RedBall_Tow=" + RedBall_Tow +
                ", RedBall_Three=" + RedBall_Three +
                ", RedBall_Four=" + RedBall_Four +
                ", RedBall_Fives=" + RedBall_Fives +
                ", BlueBall_One=" + BlueBall_One +
                ", BlueBall_Tow=" + BlueBall_Tow +
                ", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
                ", First_Prize_Number=" + First_Prize_Number +
                ", First_Prize_Bonus=" + First_Prize_Bonus +
                ", Second_Prize_Number=" + Second_Prize_Number +
                ", Second_Prize_Bonus=" + Second_Prize_Bonus +
                ", Total_Bet_Amount=" + Total_Bet_Amount +
                ", Lottery_Date='" + Lottery_Date + '\'' +
                '}';
    }
//getter,setter略

}
package entity.lottery;

import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;

/**
 * Created by yy on 2017/9/26.
 * 双色球开奖信息表实体类
 */
public class DoubleColorBall {

    private int Period_Id;//期号
    private int RedBall_One;//红球1
    private int RedBall_Tow;//红球2
    private int RedBall_Three;//红球3
    private int RedBall_Four;//红球4
    private int RedBall_Fives;//红球5
    private int RedBall_Six;//红球6
    private int BlueBall_One;//蓝球1
    private long Prize_Pool_Bonus;//奖池奖金
    private int First_Prize_Number;//一等奖注数
    private int First_Prize_Bonus;//一等奖奖金
    private int Second_Prize_Number;//二等奖注数
    private int Second_Prize_Bonus;//二等奖奖金
    private int Total_Bet_Amount;//总投注额
    private String Lottery_Date;//开奖日期

    public DoubleColorBall(){}
    //接收正则返回的数据
    public DoubleColorBall(List<String> infoData) {
        if (infoData.size()==15){
            this.Period_Id=Integer.parseInt(infoData.get(0));
            this.RedBall_One=Integer.parseInt(infoData.get(1));
            this.RedBall_Tow=Integer.parseInt(infoData.get(2));
            this.RedBall_Three=Integer.parseInt(infoData.get(3));
            this.RedBall_Four=Integer.parseInt(infoData.get(4));
            this.RedBall_Fives=Integer.parseInt(infoData.get(5));
            this.RedBall_Six=Integer.parseInt(infoData.get(6));
            this.BlueBall_One=Integer.parseInt(infoData.get(7));
            this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
            this.First_Prize_Number=Integer.parseInt(infoData.get(9));
            this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
            this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
            this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
            this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
            this.Lottery_Date=infoData.get(14);
        }
    }

    @Override
    public String toString() {
        return "DoubleColorBall{" +
                "Period_Id=" + Period_Id +
                ", RedBall_One=" + RedBall_One +
                ", RedBall_Tow=" + RedBall_Tow +
                ", RedBall_Three=" + RedBall_Three +
                ", RedBall_Four=" + RedBall_Four +
                ", edBall_Fives=" + RedBall_Fives +
                ", RedBall_Six=" + RedBall_Six +
                ", BlueBall_One=" + BlueBall_One +
                ", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
                ", First_Prize_Number=" + First_Prize_Number +
                ", First_Prize_Bonus=" + First_Prize_Bonus +
                ", Second_Prize_Number=" + Second_Prize_Number +
                ", Second_Prize_Bonus=" + Second_Prize_Bonus +
                ", Total_Bet_Amount=" + Total_Bet_Amount +
                ", Lottery_Date='" + Lottery_Date + '\'' +
                '}';
    }
//getter,setter略   
}

模块一:获取网页数据

方法一(getString)要是返回乱码请替换使用方法二(unGZIPGetString)

package tool;

import org.apache.commons.lang3.StringUtils;

import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPInputStream;

/**
 * Created by yy on 2017/10/17.
 * 获取url信息的工具类
 */
public class GetUrlData {
    /**
     * 根据传入的url将结果以String的数据返回
     * @param url 需要请求的url
     * @return
     * @throws Exception
     */
    public static String getString(String url)throws Exception{

        //当传入的url返回不为空的时候,读取数据
        BufferedReader reader=null;
        InputStreamReader input=null;
        StringBuilder data=null;//提高字符数据的生成
        if(StringUtils.isNotBlank(url)){
            try{
                //设置请求的头信息
                URL urlInfo = new URL(url);
                URLConnection connection = urlInfo.openConnection();
                connection.addRequestProperty("Host", urlInfo.getHost());//设置头信息
                connection.addRequestProperty("Connection", "keep-alive");
                connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
                connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
                connection.connect();
                //获取请求回来的信息
                input = new InputStreamReader(connection.getInputStream(),"UTF-8");//定义返回数据的格式
                reader = new BufferedReader(input);
                data = new StringBuilder();
                String str;
                while ((str = reader.readLine()) != null) {
                    data.append(str);
                }
            }catch(Exception e){
                throw new Exception("读取Url数据失败:"+url,e);
            }finally {
                reader.close();//关闭操作流
                input.close();
            }
        }
        return data.toString();
    }

    /**
     * 获取url链接数据,以GZIP解压
     * 如第一个方法返回乱码,请使用本方法
     * @param url
     * @return
     */
    public static String unGZIPGetString(String url) throws Exception{
        //当传入的url返回不为空的时候,读取数据
        if(StringUtils.isNotBlank(url)){
            try{
                //设置请求的头信息
                URL urlInfo = new URL(url);
                URLConnection connection = urlInfo.openConnection();
                connection.addRequestProperty("Host", urlInfo.getHost());//设置头信息
                connection.addRequestProperty("Connection", "keep-alive");
                connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
                connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
                connection.connect();
                //获取请求回来的信息
                //设置解压信息
                ByteArrayOutputStream out = new ByteArrayOutputStream();
                GZIPInputStream gzip = new GZIPInputStream(connection.getInputStream());
                byte[] buffer = new byte[256];
                int n;
                while ((n = gzip.read(buffer)) >= 0) {
                    out.write(buffer, 0, n);
                }
                out.close();
                gzip.close();
                return out.toString("UTF-8");

            }catch(Exception e){
                throw new Exception("读取Url数据失败:"+url,e);
            }
        }
        return null;
    }
}


模块二:获取彩票开奖数据

package tool;

import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by yy on 2017/10/16.
 * 获取彩票开奖信息工具类
 * 1、获取传入期号之后开奖的双色球数据
 * 2、获取全部双色球的开奖数据
 * 3、获取传入期号之后大乐透的开奖数据
 * 4、获取全部大乐透开奖信息数据
 */
public class GetLottery {
    //大乐透请求url
    private static  String bigUrl="http://datachart.500.com/dlt/history/newinc/history.php?limit=10000&sort=0";
    //双色球请求url
    private static  String douUrl="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
    //匹配开奖正则
    private static  String listRegex="--><(.*?)<\/tr>";
    //匹配开奖明细正则
    private static  String infoRegex=">[,\\d-]+?<";

    /**
     * 获取全部双色球的开奖信息数据
     * @return list对象
     * @throws Exception
     */
    public static List<DoubleColorBall> getAllDCB()throws Exception{
        List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
        //获取url信息
        String data= GetUrlData.getString(GetLottery.douUrl);

        Pattern pattern = Pattern.compile(GetLottery.listRegex);
        Matcher matcher =pattern.matcher(data);
        //遍历获取的数据
        while (matcher.find()){
            String info=matcher.group();
            List<String> infoData=new ArrayList<String>();
            Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
            Matcher matcherInfo =patternInfo.matcher(info);
            //获取具体的数据
            while (matcherInfo.find()){
                infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
            }
            //创建对象
            list.add(new DoubleColorBall(infoData));
        }
        return list;

    }

    /**
     * 根据传进来的的开奖期号获取这个开奖期号自后的开奖信息
     * @param period_Id 开奖期号
     * @return List对象
     * @throws Exception
     */
    public static List<DoubleColorBall> getBeforeDCB(int period_Id)throws Exception{
        List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
        //获取url信息
        String data= GetUrlData.getString(GetLottery.douUrl);

        Pattern pattern = Pattern.compile(GetLottery.listRegex);
        Matcher matcher =pattern.matcher(data);
        //遍历获取的数据
        while (matcher.find()){
            String info=matcher.group();
            List<String> infoData=new ArrayList<String>();
            Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
            Matcher matcherInfo =patternInfo.matcher(info);
            //获取具体的数据
            while (matcherInfo.find()){
                infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
            }
            //要是获取的期号小于或者等于传进来的期号则跳出
            if (Integer.parseInt(infoData.get(0))<=period_Id){
                break;
            }else{
                //创建对象
                list.add(new DoubleColorBall(infoData));
            }
        }
        return list;
    }

    /**
     * 获取全部大乐透的开奖信息
     * @return List对象
     * @throws Exception
     */
    public static List<BigLottoBall> getAllBLB()throws Exception{
        List<BigLottoBall> list=new ArrayList<BigLottoBall>();
        //获取url信息
        String data= GetUrlData.getString(GetLottery.bigUrl);

        Pattern pattern = Pattern.compile(GetLottery.listRegex);
        Matcher matcher =pattern.matcher(data);
        //遍历获取的数据
        while (matcher.find()){
            String info=matcher.group();
            List<String> infoData=new ArrayList<String>();
            Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
            Matcher matcherInfo =patternInfo.matcher(info);
            //获取具体的数据
            while (matcherInfo.find()){
                infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
            }
            //创建对象
            list.add(new BigLottoBall(infoData));
        }
        return list;

    }

    /**
     * 根据传入的期号来获取这个期号之前的开奖信息
     * @param period_Id 开奖期号
     * @return
     * @throws Exception
     */
    public static List<BigLottoBall> getBeforeBLB(int period_Id)throws Exception{
        List<BigLottoBall> list=new ArrayList<BigLottoBall>();
        //获取url信息
        String data= GetUrlData.getString(GetLottery.bigUrl);

        Pattern pattern = Pattern.compile(GetLottery.listRegex);
        Matcher matcher =pattern.matcher(data);
        //遍历获取的数据
        while (matcher.find()){
            String info=matcher.group();
            List<String> infoData=new ArrayList<String>();
            Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
            Matcher matcherInfo =patternInfo.matcher(info);
            //获取具体的数据
            while (matcherInfo.find()){
                infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
            }
            //要是获取的期号小于或者等于传进来的期号则跳出
            if (Integer.parseInt(infoData.get(0))<=period_Id){
                break;
            }else{
                //创建对象
                list.add(new BigLottoBall(infoData));
            }
        }
        return list;
    }
}

模块三:测试

import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;
import tool.GetLottery;

import java.util.List;

/**
 * Created by yy on 2017/10/16.
 * 获取url数据测试
 */
public class TestGetUrl {
    public static void main(String args[]){
        long star=System.currentTimeMillis();
        String url="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
        try {
            //测试获取url信息
            //System.out.println(GetLottery.getString(url));
            //测试获取全部的双色球开奖信息
            //List<DoubleColorBall> list= GetLottery.getAllDCB();
            //测试获取全部的大乐透开奖信息
            //List<BigLottoBall> list= GetLottery.getAllBLB();
            //测试获取这个双色球16120期号自后的开奖数据
            //List<DoubleColorBall> list= GetLottery.getBeforeDCB(16120);
            //测试获取这个双色球17120期号自后的开奖数据
            //List<DoubleColorBall> list= GetLottery.getBeforeDCB(17120);
            //测试获取这个大乐透16120期号自后的开奖数据
            //List<BigLottoBall> list= GetLottery.getBeforeBLB(16120);
            //测试获取这个大乐透17110期号自后的开奖数据
            List<BigLottoBall> list= GetLottery.getBeforeBLB(17110);

            for(int i=0;i<list.size();i++){
                System.out.println(list.get(i).toString());
            }
        }catch(Exception e){
            e.printStackTrace();
        }
        System.out.println(System.currentTimeMillis()-star);
    }
}

模块四:测试结果

BigLottoBall{Period_Id=17121, RedBall_One=1, RedBall_Tow=6, RedBall_Three=12, RedBall_Four=26, RedBall_Fives=31, BlueBall_One=1, BlueBall_Tow=7, Prize_Pool_Bonus=4271765199, First_Prize_Number=0, First_Prize_Bonus=0, Second_Prize_Number=59, Second_Prize_Bonus=149524, Total_Bet_Amount=199583003, Lottery_Date='2017-10-16'}
BigLottoBall{Period_Id=17120, RedBall_One=8, RedBall_Tow=15, RedBall_Three=24, RedBall_Four=26, RedBall_Fives=27, BlueBall_One=5, BlueBall_Tow=6, Prize_Pool_Bonus=4223045066, First_Prize_Number=7, First_Prize_Bonus=6747509, Second_Prize_Number=107, Second_Prize_Bonus=63449, Total_Bet_Amount=216330390, Lottery_Date='2017-10-14'}
BigLottoBall{Period_Id=17119, RedBall_One=5, RedBall_Tow=7, RedBall_Three=13, RedBall_Four=29, RedBall_Fives=35, BlueBall_One=3, BlueBall_Tow=8, Prize_Pool_Bonus=4247472952, First_Prize_Number=5, First_Prize_Bonus=8296832, Second_Prize_Number=43, Second_Prize_Bonus=216912, Total_Bet_Amount=197391000, Lottery_Date='2017-10-11'}
BigLottoBall{Period_Id=17118, RedBall_One=2, RedBall_Tow=7, RedBall_Three=16, RedBall_Four=20, RedBall_Fives=33, BlueBall_One=3, BlueBall_Tow=11, Prize_Pool_Bonus=4252457951, First_Prize_Number=11, First_Prize_Bonus=6287618, Second_Prize_Number=57, Second_Prize_Bonus=166932, Total_Bet_Amount=195121929, Lottery_Date='2017-10-09'}
BigLottoBall{Period_Id=17117, RedBall_One=5, RedBall_Tow=7, RedBall_Three=9, RedBall_Four=24, RedBall_Fives=32, BlueBall_One=8, BlueBall_Tow=10, Prize_Pool_Bonus=4307581838, First_Prize_Number=5, First_Prize_Bonus=8668433, Second_Prize_Number=40, Second_Prize_Bonus=220578, Total_Bet_Amount=207881694, Lottery_Date='2017-10-07'}
BigLottoBall{Period_Id=17116, RedBall_One=2, RedBall_Tow=27, RedBall_Three=30, RedBall_Four=32, RedBall_Fives=33, BlueBall_One=1, BlueBall_Tow=3, Prize_Pool_Bonus=4309435912, First_Prize_Number=2, First_Prize_Bonus=10000000, Second_Prize_Number=36, Second_Prize_Bonus=242412, Total_Bet_Amount=172984540, Lottery_Date='2017-10-04'}
BigLottoBall{Period_Id=17115, RedBall_One=14, RedBall_Tow=19, RedBall_Three=20, RedBall_Four=25, RedBall_Fives=31, BlueBall_One=6, BlueBall_Tow=8, Prize_Pool_Bonus=4288831779, First_Prize_Number=3, First_Prize_Bonus=9023329, Second_Prize_Number=172, Second_Prize_Bonus=38436, Total_Bet_Amount=175963547, Lottery_Date='2017-10-02'}
BigLottoBall{Period_Id=17114, RedBall_One=6, RedBall_Tow=7, RedBall_Three=12, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=1, BlueBall_Tow=12, Prize_Pool_Bonus=4288325250, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=55, Second_Prize_Bonus=207498, Total_Bet_Amount=221457669, Lottery_Date='2017-09-30'}
BigLottoBall{Period_Id=17113, RedBall_One=5, RedBall_Tow=8, RedBall_Three=17, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=4, BlueBall_Tow=12, Prize_Pool_Bonus=4238842403, First_Prize_Number=10, First_Prize_Bonus=6601551, Second_Prize_Number=117, Second_Prize_Bonus=73142, Total_Bet_Amount=202181819, Lottery_Date='2017-09-27'}
BigLottoBall{Period_Id=17112, RedBall_One=5, RedBall_Tow=6, RedBall_Three=20, RedBall_Four=31, RedBall_Fives=32, BlueBall_One=6, BlueBall_Tow=12, Prize_Pool_Bonus=4270236000, First_Prize_Number=6, First_Prize_Bonus=7219221, Second_Prize_Number=88, Second_Prize_Bonus=105157, Total_Bet_Amount=199283910, Lottery_Date='2017-09-25'}
BigLottoBall{Period_Id=17111, RedBall_One=2, RedBall_Tow=14, RedBall_Three=17, RedBall_Four=26, RedBall_Fives=34, BlueBall_One=8, BlueBall_Tow=12, Prize_Pool_Bonus=4289815824, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=43, Second_Prize_Bonus=251070, Total_Bet_Amount=220140489, Lottery_Date='2017-09-23'}

总结:

1、提供的工具类可以直接遍历到对应的彩票开奖数据,以上的测试结果也是正确的,然后工具类的使用读取全部一般在1000毫秒以内(根据具体的网络环境可能有所浮动)

2、2018年使用时候发现出现乱码,特意在工具类提供了GZIP的解压工具方法(unGZIPGetString),因为原先的爬取站点使用了GZIP压缩


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章