從搜狐下在每日交易數據的爬蟲程序

網易不行有搜狐,搜狐提供的每日股票交易數據可比網易的強多了,近四千支股票4月的交易數據八萬餘條一氣呵成.看來以後要靠它當主力.

程序:

package com.ufo.hy.agumaster.crawler.daytransact;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.ufo.hy.agumaster.entity.DayTransact;

public class SohuDTCrawler {
    private List<DayTransact> dtList;
    
    public List<DayTransact> getDtList(){
        return dtList;
    }
    
    public void download(String originalCode,String name,String fromDate,String toDate) {
        dtList=new ArrayList<>();
        try {
            Document doc=Jsoup.connect(getReqUrl(originalCode,fromDate,toDate)).ignoreContentType(true)
                    .data("query", "Java")
                    .userAgent("Mozilla")
                    .cookie("auth", "token")
                    .timeout(30000)
                    .get();
            String rawText=doc.text();
            String json=rawText.substring(22, rawText.length()-2);
            
            ObjectMapper mapper = new ObjectMapper();
            JsonNode node = mapper.readTree(json);
            
            JsonNode listNode=node.path("hq");
            Iterator<JsonNode> iterator = listNode.elements();

            while (iterator.hasNext()) {
                JsonNode transNode = iterator.next();
                
                DayTransact dt=new DayTransact();
                dt.setCode(originalCode);
                dt.setName(name);
                dt.setDay(transNode.get(0).asText());
                dt.setTopen(Double.parseDouble(transNode.get(1).asText()));
                dt.setTclose(Double.parseDouble(transNode.get(2).asText()));
                dt.setChg(Double.parseDouble(transNode.get(3).asText()));
                dt.setPchg(Double.parseDouble(transNode.get(4).asText().replace("%", "")));
                dt.setLow(Double.parseDouble(transNode.get(5).asText()));
                dt.setHigh(Double.parseDouble(transNode.get(6).asText()));
                dt.setVoturnover(Long.parseLong(transNode.get(7).asText()));
                dt.setVaturnover(Double.parseDouble(transNode.get(8).asText()));
                dt.setTurnover(Double.parseDouble(transNode.get(9).asText().replace("%", "")));
                
                dtList.add(dt);
            }
        }catch(Exception ex) {
            ex.printStackTrace();
        }
    }
    
    private String getReqUrl(String code,String startDate,String endDate) {
        return "http://q.stock.sohu.com/hisHq?code=cn_"+code+"&start="+startDate+"&end="+endDate+"&stat=1&order=D&period=d&callback=historySearchHandler&rt=jsonp";
    }
    
    public static void main(String[] args) {
        SohuDTCrawler n=new SohuDTCrawler();
        n.download("002101","廣東鴻圖", "20200401", "20200410");
        
        for(DayTransact dt:n.getDtList()) {
            System.out.println(dt);
        }
    }
}

用到的實體類:

package com.ufo.hy.agumaster.entity;

import com.fasterxml.jackson.databind.JsonNode;

/**
 * 每日交易數據實體類
 * @author ufo
 *
 */
public class DayTransact {
    private long    id;            // ID
    private String  day;        // 日期
    private String  code;        // 代號
    private String  name;        // 名稱
    private double  tclose;        // 收盤價
    private double  high;        // 最高價
    private double  low;        // 最低價
    private double  topen;        // 開盤價
    private double  lclose;        // 前日收盤價
    private double  chg;        // 漲跌額
    private double  pchg;        // 漲跌幅
    private double  turnover;    // 換手率
    private long    voturnover;    // 成交量
    private double  vaturnover;    // 成交金額
    private double  tcap;        // 總市值
    private double  mcap;        // 流通市值
    
    public DayTransact() {
        
    }
    
    public DayTransact(JsonNode transNode) {
        
    }
    
    public DayTransact(String[] arr) {
        if(arr.length!=15) {
            throw new ArrayIndexOutOfBoundsException("Array size should be 15 but now it is "+arr.length);
        }
        
        String dataLine=String.join(",", arr);
        
        day=arr[0];

        try {
            tclose=Double.parseDouble(arr[3]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get tclose from string:"+arr[3]+" dataLine:"+dataLine);
        }
        
        try {
            high=Double.parseDouble(arr[4]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get high from string:"+arr[4]+" dataLine:"+dataLine);
        }
        
        try {
            low=Double.parseDouble(arr[5]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get low from string:"+arr[5]+" dataLine:"+dataLine);
        }
        
        try {
            topen=Double.parseDouble(arr[6]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get topen from string:"+arr[6]+" dataLine:"+dataLine);
        }
        
        try {
            lclose=Double.parseDouble(arr[7]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get lclose from string:"+arr[7]+" dataLine:"+dataLine);
        }
    
        try {
            chg=Double.parseDouble(arr[8]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get chg from string:"+arr[8]+" dataLine:"+dataLine);
        }
        
        try {
            pchg=Double.parseDouble(arr[9]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get pchg from string:"+arr[9]+" dataLine:"+dataLine);
        }
        
        try {
            turnover=Double.parseDouble(arr[10]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get turnover from string:"+arr[10]+" dataLine:"+dataLine);
        }
        
        try {
            voturnover=Long.parseLong(arr[11]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get voturnover from string:"+arr[11]+" dataLine:"+dataLine);
        }
        
        try {
            vaturnover=Double.parseDouble(arr[12]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get vaturnover from string:"+arr[12]+" dataLine:"+dataLine);
        }
        
        try {
            tcap=Double.parseDouble(arr[13]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get tcap from string:"+arr[13]+" dataLine:"+dataLine);
        }
        
        try {
            mcap=Double.parseDouble(arr[14]);
        }catch(NumberFormatException ex) {
            throw new NumberFormatException("Can not get mcap from string:"+arr[14]+" dataLine:"+dataLine);
        }
    
    }
    
    public String toString() {
        StringBuilder sb=new StringBuilder();
        sb.append("id:"+id);
        sb.append(" 日期day:"+day);
        sb.append(" 代號code:"+code);
        sb.append(" 名稱name:"+name);
        sb.append(" 收盤價tclose:"+tclose);
        sb.append(" 最高價high:"+high);
        sb.append(" 最低價low:"+low);
        sb.append(" 開盤價topen:"+topen);
        sb.append(" 前日收盤價lclose:"+lclose);
        sb.append(" 漲跌額chg:"+chg);
        sb.append(" 漲跌幅pchg:"+pchg);
        sb.append(" 換手率turnover:"+turnover);
        sb.append(" 成交量voturnover:"+voturnover);
        sb.append(" 成交金額vaturnover:"+vaturnover);
        sb.append(" 總市值tcap:"+tcap);
        sb.append(" 流通市值mcap:"+mcap);
        
        return sb.toString();//"code:"+code+" name:"+name+" date:"+day+" tclose:"+tclose;
    }
    
    public long getId() {
        return id;
    }
    public void setId(long id) {
        this.id = id;
    }
    public String getDay() {
        return day;
    }
    public void setDay(String day) {
        this.day = day;
    }
    public String getCode() {
        return code;
    }
    public void setCode(String code) {
        this.code = code;
    }
    public String getName() {
        return name;
    }
    public void setName(String name) {
        this.name = name;
    }
    public double getTclose() {
        return tclose;
    }
    public void setTclose(double tclose) {
        this.tclose = tclose;
    }
    public double getHigh() {
        return high;
    }
    public void setHigh(double high) {
        this.high = high;
    }
    public double getLow() {
        return low;
    }
    public void setLow(double low) {
        this.low = low;
    }
    public double getTopen() {
        return topen;
    }
    public void setTopen(double topen) {
        this.topen = topen;
    }
    public double getLclose() {
        return lclose;
    }
    public void setLclose(double lclose) {
        this.lclose = lclose;
    }
    public double getChg() {
        return chg;
    }
    public void setChg(double chg) {
        this.chg = chg;
    }
    public double getPchg() {
        return pchg;
    }
    public void setPchg(double pchg) {
        this.pchg = pchg;
    }
    public double getTurnover() {
        return turnover;
    }
    public void setTurnover(double turnover) {
        this.turnover = turnover;
    }
    public long getVoturnover() {
        return voturnover;
    }
    public void setVoturnover(long voturnover) {
        this.voturnover = voturnover;
    }
    public double getVaturnover() {
        return vaturnover;
    }
    public void setVaturnover(double vaturnover) {
        this.vaturnover = vaturnover;
    }
    public double getTcap() {
        return tcap;
    }
    public void setTcap(double tcap) {
        this.tcap = tcap;
    }
    public double getMcap() {
        return mcap;
    }
    public void setMcap(double mcap) {
        this.mcap = mcap;
    }
}

執行情況:

id:0 日期day:2020-04-10 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:7.95 最高價high:8.23 最低價low:7.81 開盤價topen:8.11 前日收盤價lclose:0.0 漲跌額chg:-0.15 漲跌幅pchg:-1.85 換手率turnover:2.2 成交量voturnover:93679 成交金額vaturnover:7500.99 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-09 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:8.1 最高價high:8.18 最低價low:7.94 開盤價topen:8.13 前日收盤價lclose:0.0 漲跌額chg:0.0 漲跌幅pchg:0.0 換手率turnover:2.75 成交量voturnover:116902 成交金額vaturnover:9441.65 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-08 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:8.1 最高價high:8.27 最低價low:8.03 開盤價topen:8.06 前日收盤價lclose:0.0 漲跌額chg:-0.13 漲跌幅pchg:-1.58 換手率turnover:2.75 成交量voturnover:116971 成交金額vaturnover:9499.67 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-07 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:8.23 最高價high:8.28 最低價low:7.9 開盤價topen:8.04 前日收盤價lclose:0.0 漲跌額chg:0.33 漲跌幅pchg:4.18 換手率turnover:3.76 成交量voturnover:159804 成交金額vaturnover:12937.74 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-03 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:7.9 最高價high:8.11 最低價low:7.82 開盤價topen:8.11 前日收盤價lclose:0.0 漲跌額chg:-0.3 漲跌幅pchg:-3.66 換手率turnover:3.24 成交量voturnover:138091 成交金額vaturnover:10978.95 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-02 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:8.2 最高價high:8.2 最低價low:7.58 開盤價topen:7.7 前日收盤價lclose:0.0 漲跌額chg:0.45 漲跌幅pchg:5.81 換手率turnover:4.54 成交量voturnover:193364 成交金額vaturnover:15326.84 總市值tcap:0.0 流通市值mcap:0.0
id:0 日期day:2020-04-01 代號code:002101 名稱name:廣東鴻圖 收盤價tclose:7.75 最高價high:8.07 最低價low:7.62 開盤價topen:7.62 前日收盤價lclose:0.0 漲跌額chg:0.31 漲跌幅pchg:4.17 換手率turnover:3.7 成交量voturnover:157608 成交金額vaturnover:12279.22 總市值tcap:0.0 流通市值mcap:0.0

希望此程序對你也有用.

--2020年5月7日--

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章