突發奇想想要得到彩票的開獎信息,就寫了這個程序,爬取雙色球歷史開獎,爬取大樂透歷史開獎。
數據抓取地址:http://datachart.500.com/
獲取彩票開獎信息工具類
* 1、獲取傳入期號之後開獎的雙色球數據
* 2、獲取全部雙色球的開獎數據
* 3、獲取傳入期號之後大樂透的開獎數據
* 4、獲取全部大樂透開獎信息數據
模塊零:實體類
package entity.lottery;
import java.util.Date;
import java.util.List;
/**
* Created by yy on 2017/9/26.
* 大樂透開獎信息表的實體類
*/
public class BigLottoBall {
private int Period_Id;//期號
private int RedBall_One;//紅球1
private int RedBall_Tow;//紅球2
private int RedBall_Three;//紅球3
private int RedBall_Four;//紅球4
private int RedBall_Fives;//紅球5
private int BlueBall_One;//藍球1
private int BlueBall_Tow;//藍球2
private long Prize_Pool_Bonus;//獎池獎金
private int First_Prize_Number;//一等獎注數
private int First_Prize_Bonus;//一等獎獎金
private int Second_Prize_Number;//二等獎注數
private int Second_Prize_Bonus;//二等獎獎金
private int Total_Bet_Amount;//總投注額
private String Lottery_Date;//開獎日期
public BigLottoBall(List<String> infoData) {
if (infoData.size()==15){
this.Period_Id=Integer.parseInt(infoData.get(0));
this.RedBall_One=Integer.parseInt(infoData.get(1));
this.RedBall_Tow=Integer.parseInt(infoData.get(2));
this.RedBall_Three=Integer.parseInt(infoData.get(3));
this.RedBall_Four=Integer.parseInt(infoData.get(4));
this.RedBall_Fives=Integer.parseInt(infoData.get(5));
this.BlueBall_One=Integer.parseInt(infoData.get(6));
this.BlueBall_Tow=Integer.parseInt(infoData.get(7));
this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
this.First_Prize_Number=Integer.parseInt(infoData.get(9));
this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
this.Lottery_Date=infoData.get(14);
}
}
public BigLottoBall() {}
@Override
public String toString() {
return "BigLottoBall{" +
"Period_Id=" + Period_Id +
", RedBall_One=" + RedBall_One +
", RedBall_Tow=" + RedBall_Tow +
", RedBall_Three=" + RedBall_Three +
", RedBall_Four=" + RedBall_Four +
", RedBall_Fives=" + RedBall_Fives +
", BlueBall_One=" + BlueBall_One +
", BlueBall_Tow=" + BlueBall_Tow +
", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
", First_Prize_Number=" + First_Prize_Number +
", First_Prize_Bonus=" + First_Prize_Bonus +
", Second_Prize_Number=" + Second_Prize_Number +
", Second_Prize_Bonus=" + Second_Prize_Bonus +
", Total_Bet_Amount=" + Total_Bet_Amount +
", Lottery_Date='" + Lottery_Date + '\'' +
'}';
}
//getter,setter略
}
package entity.lottery;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
/**
* Created by yy on 2017/9/26.
* 雙色球開獎信息表實體類
*/
public class DoubleColorBall {
private int Period_Id;//期號
private int RedBall_One;//紅球1
private int RedBall_Tow;//紅球2
private int RedBall_Three;//紅球3
private int RedBall_Four;//紅球4
private int RedBall_Fives;//紅球5
private int RedBall_Six;//紅球6
private int BlueBall_One;//藍球1
private long Prize_Pool_Bonus;//獎池獎金
private int First_Prize_Number;//一等獎注數
private int First_Prize_Bonus;//一等獎獎金
private int Second_Prize_Number;//二等獎注數
private int Second_Prize_Bonus;//二等獎獎金
private int Total_Bet_Amount;//總投注額
private String Lottery_Date;//開獎日期
public DoubleColorBall(){}
//接收正則返回的數據
public DoubleColorBall(List<String> infoData) {
if (infoData.size()==15){
this.Period_Id=Integer.parseInt(infoData.get(0));
this.RedBall_One=Integer.parseInt(infoData.get(1));
this.RedBall_Tow=Integer.parseInt(infoData.get(2));
this.RedBall_Three=Integer.parseInt(infoData.get(3));
this.RedBall_Four=Integer.parseInt(infoData.get(4));
this.RedBall_Fives=Integer.parseInt(infoData.get(5));
this.RedBall_Six=Integer.parseInt(infoData.get(6));
this.BlueBall_One=Integer.parseInt(infoData.get(7));
this.Prize_Pool_Bonus=Long.parseLong(infoData.get(8));
this.First_Prize_Number=Integer.parseInt(infoData.get(9));
this.First_Prize_Bonus=Integer.parseInt(infoData.get(10));
this.Second_Prize_Number=Integer.parseInt(infoData.get(11));
this.Second_Prize_Bonus=Integer.parseInt(infoData.get(12));
this.Total_Bet_Amount=Integer.parseInt(infoData.get(13));
this.Lottery_Date=infoData.get(14);
}
}
@Override
public String toString() {
return "DoubleColorBall{" +
"Period_Id=" + Period_Id +
", RedBall_One=" + RedBall_One +
", RedBall_Tow=" + RedBall_Tow +
", RedBall_Three=" + RedBall_Three +
", RedBall_Four=" + RedBall_Four +
", edBall_Fives=" + RedBall_Fives +
", RedBall_Six=" + RedBall_Six +
", BlueBall_One=" + BlueBall_One +
", Prize_Pool_Bonus=" + Prize_Pool_Bonus +
", First_Prize_Number=" + First_Prize_Number +
", First_Prize_Bonus=" + First_Prize_Bonus +
", Second_Prize_Number=" + Second_Prize_Number +
", Second_Prize_Bonus=" + Second_Prize_Bonus +
", Total_Bet_Amount=" + Total_Bet_Amount +
", Lottery_Date='" + Lottery_Date + '\'' +
'}';
}
//getter,setter略
}
模塊一:獲取網頁數據
方法一(getString)要是返回亂碼請替換使用方法二(unGZIPGetString)
package tool;
import org.apache.commons.lang3.StringUtils;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.zip.GZIPInputStream;
/**
* Created by yy on 2017/10/17.
* 獲取url信息的工具類
*/
public class GetUrlData {
/**
* 根據傳入的url將結果以String的數據返回
* @param url 需要請求的url
* @return
* @throws Exception
*/
public static String getString(String url)throws Exception{
//當傳入的url返回不爲空的時候,讀取數據
BufferedReader reader=null;
InputStreamReader input=null;
StringBuilder data=null;//提高字符數據的生成
if(StringUtils.isNotBlank(url)){
try{
//設置請求的頭信息
URL urlInfo = new URL(url);
URLConnection connection = urlInfo.openConnection();
connection.addRequestProperty("Host", urlInfo.getHost());//設置頭信息
connection.addRequestProperty("Connection", "keep-alive");
connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
connection.connect();
//獲取請求回來的信息
input = new InputStreamReader(connection.getInputStream(),"UTF-8");//定義返回數據的格式
reader = new BufferedReader(input);
data = new StringBuilder();
String str;
while ((str = reader.readLine()) != null) {
data.append(str);
}
}catch(Exception e){
throw new Exception("讀取Url數據失敗:"+url,e);
}finally {
reader.close();//關閉操作流
input.close();
}
}
return data.toString();
}
/**
* 獲取url鏈接數據,以GZIP解壓
* 如第一個方法返回亂碼,請使用本方法
* @param url
* @return
*/
public static String unGZIPGetString(String url) throws Exception{
//當傳入的url返回不爲空的時候,讀取數據
if(StringUtils.isNotBlank(url)){
try{
//設置請求的頭信息
URL urlInfo = new URL(url);
URLConnection connection = urlInfo.openConnection();
connection.addRequestProperty("Host", urlInfo.getHost());//設置頭信息
connection.addRequestProperty("Connection", "keep-alive");
connection.addRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
connection.addRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36");
connection.connect();
//獲取請求回來的信息
//設置解壓信息
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPInputStream gzip = new GZIPInputStream(connection.getInputStream());
byte[] buffer = new byte[256];
int n;
while ((n = gzip.read(buffer)) >= 0) {
out.write(buffer, 0, n);
}
out.close();
gzip.close();
return out.toString("UTF-8");
}catch(Exception e){
throw new Exception("讀取Url數據失敗:"+url,e);
}
}
return null;
}
}
模塊二:獲取彩票開獎數據
package tool;
import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by yy on 2017/10/16.
* 獲取彩票開獎信息工具類
* 1、獲取傳入期號之後開獎的雙色球數據
* 2、獲取全部雙色球的開獎數據
* 3、獲取傳入期號之後大樂透的開獎數據
* 4、獲取全部大樂透開獎信息數據
*/
public class GetLottery {
//大樂透請求url
private static String bigUrl="http://datachart.500.com/dlt/history/newinc/history.php?limit=10000&sort=0";
//雙色球請求url
private static String douUrl="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
//匹配開獎正則
private static String listRegex="--><(.*?)<\/tr>";
//匹配開獎明細正則
private static String infoRegex=">[,\\d-]+?<";
/**
* 獲取全部雙色球的開獎信息數據
* @return list對象
* @throws Exception
*/
public static List<DoubleColorBall> getAllDCB()throws Exception{
List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
//獲取url信息
String data= GetUrlData.getString(GetLottery.douUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍歷獲取的數據
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//獲取具體的數據
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//創建對象
list.add(new DoubleColorBall(infoData));
}
return list;
}
/**
* 根據傳進來的的開獎期號獲取這個開獎期號自後的開獎信息
* @param period_Id 開獎期號
* @return List對象
* @throws Exception
*/
public static List<DoubleColorBall> getBeforeDCB(int period_Id)throws Exception{
List<DoubleColorBall> list=new ArrayList<DoubleColorBall>();
//獲取url信息
String data= GetUrlData.getString(GetLottery.douUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍歷獲取的數據
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//獲取具體的數據
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//要是獲取的期號小於或者等於傳進來的期號則跳出
if (Integer.parseInt(infoData.get(0))<=period_Id){
break;
}else{
//創建對象
list.add(new DoubleColorBall(infoData));
}
}
return list;
}
/**
* 獲取全部大樂透的開獎信息
* @return List對象
* @throws Exception
*/
public static List<BigLottoBall> getAllBLB()throws Exception{
List<BigLottoBall> list=new ArrayList<BigLottoBall>();
//獲取url信息
String data= GetUrlData.getString(GetLottery.bigUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍歷獲取的數據
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//獲取具體的數據
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//創建對象
list.add(new BigLottoBall(infoData));
}
return list;
}
/**
* 根據傳入的期號來獲取這個期號之前的開獎信息
* @param period_Id 開獎期號
* @return
* @throws Exception
*/
public static List<BigLottoBall> getBeforeBLB(int period_Id)throws Exception{
List<BigLottoBall> list=new ArrayList<BigLottoBall>();
//獲取url信息
String data= GetUrlData.getString(GetLottery.bigUrl);
Pattern pattern = Pattern.compile(GetLottery.listRegex);
Matcher matcher =pattern.matcher(data);
//遍歷獲取的數據
while (matcher.find()){
String info=matcher.group();
List<String> infoData=new ArrayList<String>();
Pattern patternInfo = Pattern.compile(GetLottery.infoRegex);
Matcher matcherInfo =patternInfo.matcher(info);
//獲取具體的數據
while (matcherInfo.find()){
infoData.add(matcherInfo.group().replaceAll(">|<|,",""));
}
//要是獲取的期號小於或者等於傳進來的期號則跳出
if (Integer.parseInt(infoData.get(0))<=period_Id){
break;
}else{
//創建對象
list.add(new BigLottoBall(infoData));
}
}
return list;
}
}
模塊三:測試
import entity.lottery.BigLottoBall;
import entity.lottery.DoubleColorBall;
import tool.GetLottery;
import java.util.List;
/**
* Created by yy on 2017/10/16.
* 獲取url數據測試
*/
public class TestGetUrl {
public static void main(String args[]){
long star=System.currentTimeMillis();
String url="http://datachart.500.com/ssq/history/newinc/history.php?limit=100000&sort=0";
try {
//測試獲取url信息
//System.out.println(GetLottery.getString(url));
//測試獲取全部的雙色球開獎信息
//List<DoubleColorBall> list= GetLottery.getAllDCB();
//測試獲取全部的大樂透開獎信息
//List<BigLottoBall> list= GetLottery.getAllBLB();
//測試獲取這個雙色球16120期號自後的開獎數據
//List<DoubleColorBall> list= GetLottery.getBeforeDCB(16120);
//測試獲取這個雙色球17120期號自後的開獎數據
//List<DoubleColorBall> list= GetLottery.getBeforeDCB(17120);
//測試獲取這個大樂透16120期號自後的開獎數據
//List<BigLottoBall> list= GetLottery.getBeforeBLB(16120);
//測試獲取這個大樂透17110期號自後的開獎數據
List<BigLottoBall> list= GetLottery.getBeforeBLB(17110);
for(int i=0;i<list.size();i++){
System.out.println(list.get(i).toString());
}
}catch(Exception e){
e.printStackTrace();
}
System.out.println(System.currentTimeMillis()-star);
}
}
模塊四:測試結果
BigLottoBall{Period_Id=17121, RedBall_One=1, RedBall_Tow=6, RedBall_Three=12, RedBall_Four=26, RedBall_Fives=31, BlueBall_One=1, BlueBall_Tow=7, Prize_Pool_Bonus=4271765199, First_Prize_Number=0, First_Prize_Bonus=0, Second_Prize_Number=59, Second_Prize_Bonus=149524, Total_Bet_Amount=199583003, Lottery_Date='2017-10-16'}
BigLottoBall{Period_Id=17120, RedBall_One=8, RedBall_Tow=15, RedBall_Three=24, RedBall_Four=26, RedBall_Fives=27, BlueBall_One=5, BlueBall_Tow=6, Prize_Pool_Bonus=4223045066, First_Prize_Number=7, First_Prize_Bonus=6747509, Second_Prize_Number=107, Second_Prize_Bonus=63449, Total_Bet_Amount=216330390, Lottery_Date='2017-10-14'}
BigLottoBall{Period_Id=17119, RedBall_One=5, RedBall_Tow=7, RedBall_Three=13, RedBall_Four=29, RedBall_Fives=35, BlueBall_One=3, BlueBall_Tow=8, Prize_Pool_Bonus=4247472952, First_Prize_Number=5, First_Prize_Bonus=8296832, Second_Prize_Number=43, Second_Prize_Bonus=216912, Total_Bet_Amount=197391000, Lottery_Date='2017-10-11'}
BigLottoBall{Period_Id=17118, RedBall_One=2, RedBall_Tow=7, RedBall_Three=16, RedBall_Four=20, RedBall_Fives=33, BlueBall_One=3, BlueBall_Tow=11, Prize_Pool_Bonus=4252457951, First_Prize_Number=11, First_Prize_Bonus=6287618, Second_Prize_Number=57, Second_Prize_Bonus=166932, Total_Bet_Amount=195121929, Lottery_Date='2017-10-09'}
BigLottoBall{Period_Id=17117, RedBall_One=5, RedBall_Tow=7, RedBall_Three=9, RedBall_Four=24, RedBall_Fives=32, BlueBall_One=8, BlueBall_Tow=10, Prize_Pool_Bonus=4307581838, First_Prize_Number=5, First_Prize_Bonus=8668433, Second_Prize_Number=40, Second_Prize_Bonus=220578, Total_Bet_Amount=207881694, Lottery_Date='2017-10-07'}
BigLottoBall{Period_Id=17116, RedBall_One=2, RedBall_Tow=27, RedBall_Three=30, RedBall_Four=32, RedBall_Fives=33, BlueBall_One=1, BlueBall_Tow=3, Prize_Pool_Bonus=4309435912, First_Prize_Number=2, First_Prize_Bonus=10000000, Second_Prize_Number=36, Second_Prize_Bonus=242412, Total_Bet_Amount=172984540, Lottery_Date='2017-10-04'}
BigLottoBall{Period_Id=17115, RedBall_One=14, RedBall_Tow=19, RedBall_Three=20, RedBall_Four=25, RedBall_Fives=31, BlueBall_One=6, BlueBall_Tow=8, Prize_Pool_Bonus=4288831779, First_Prize_Number=3, First_Prize_Bonus=9023329, Second_Prize_Number=172, Second_Prize_Bonus=38436, Total_Bet_Amount=175963547, Lottery_Date='2017-10-02'}
BigLottoBall{Period_Id=17114, RedBall_One=6, RedBall_Tow=7, RedBall_Three=12, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=1, BlueBall_Tow=12, Prize_Pool_Bonus=4288325250, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=55, Second_Prize_Bonus=207498, Total_Bet_Amount=221457669, Lottery_Date='2017-09-30'}
BigLottoBall{Period_Id=17113, RedBall_One=5, RedBall_Tow=8, RedBall_Three=17, RedBall_Four=18, RedBall_Fives=23, BlueBall_One=4, BlueBall_Tow=12, Prize_Pool_Bonus=4238842403, First_Prize_Number=10, First_Prize_Bonus=6601551, Second_Prize_Number=117, Second_Prize_Bonus=73142, Total_Bet_Amount=202181819, Lottery_Date='2017-09-27'}
BigLottoBall{Period_Id=17112, RedBall_One=5, RedBall_Tow=6, RedBall_Three=20, RedBall_Four=31, RedBall_Fives=32, BlueBall_One=6, BlueBall_Tow=12, Prize_Pool_Bonus=4270236000, First_Prize_Number=6, First_Prize_Bonus=7219221, Second_Prize_Number=88, Second_Prize_Bonus=105157, Total_Bet_Amount=199283910, Lottery_Date='2017-09-25'}
BigLottoBall{Period_Id=17111, RedBall_One=2, RedBall_Tow=14, RedBall_Three=17, RedBall_Four=26, RedBall_Fives=34, BlueBall_One=8, BlueBall_Tow=12, Prize_Pool_Bonus=4289815824, First_Prize_Number=1, First_Prize_Bonus=10000000, Second_Prize_Number=43, Second_Prize_Bonus=251070, Total_Bet_Amount=220140489, Lottery_Date='2017-09-23'}
總結:
1、提供的工具類可以直接遍歷到對應的彩票開獎數據,以上的測試結果也是正確的,然後工具類的使用讀取全部一般在1000毫秒以內(根據具體的網絡環境可能有所浮動)
2、2018年使用時候發現出現亂碼,特意在工具類提供了GZIP的解壓工具方法(unGZIPGetString),因爲原先的爬取站點使用了GZIP壓縮