- 以美國adidas官網爲例。
- 輸入url,抓取商品信息(標題、描述、圖片等);抓取屬性信息(顏色、尺碼、價格、庫存、skuId)。
- 思路很簡單,就是打開頁面,分析各個需要內容的標籤。
獲取頁面
public static Document getHttpPostResponseWithDocument(String url, String referrer, List<NameValuePair> params, DecompressingHttpClient httpClient) throws IOException {
HttpResponse response = getHttpPostResponse(url, referrer, params, httpClient);
Document doc = Jsoup.parse(EntityUtils.toString(response.getEntity(), "UTF-8"));
EntityUtils.consume(response.getEntity());
return doc;
}
public static HttpResponse getHttpGetResponse(String url, String referrer, DecompressingHttpClient httpClient) throws IOException {
HttpGet get = new HttpGet(url);
setHeaders(get);
if (!StringUtils.isBlank(referrer)) {
get.setHeader("Referer", referrer);
}
return httpClient.execute(get);
}
判斷是否有貨
public boolean isInStock() {
Elements addToCartElements = doc.select(".addtocart");
if(null == addToCartElements || addToCartElements.isEmpty()) {
return false;
}
if(!addToCartElements.toString().contains("add-to-cart-button")) {
return false;
}
return true;
}
顏色獲取
public ExecInfo parse(String url, Map<String, String> colorMap) {
ExecResult<Document> execResult = getOneSkuInfoPage(url)
if (!execResult.isSucc()) {
LogUtils.info(execResult.getMsg())
}
if(!isInStock()) {
LogUtils.info("out of stock!")
return ExecInfo.fail("out of stock!")
}
Elements curColorElements = doc.select(".product-color")
if(null == curColorElements || curColorElements.isEmpty()) {
return ExecInfo.fail("獲取當前商品顏色信息失敗")
} else {
Pattern COLOR_PATTERN = Pattern.compile("<span class=\"product-color-clear\">([^<]*)</span>")
Pattern SKU_PATTERN = Pattern.compile("\\(([0-9A-Za-z]*)\\)")
Matcher color_matcher = COLOR_PATTERN.matcher(curColorElements.toString())
Matcher sku_matcher = SKU_PATTERN.matcher(curColorElements.toString())
if(color_matcher.find() && sku_matcher.find()) {
LogUtils.info("CURRENT COLOR: " + sku_matcher.group(1) + ", " + color_matcher.group(1))
}
}
//Elements elements = doc.select("#colorVariationsCarousel")
Elements elements = doc.select(".color-variation-row")
if(null != elements && !elements.isEmpty()) {
for (Element element : elements) {
Elements colorElements = element.select(".color-variations-thumb-color")
for (Element colorElement : colorElements) {
//LogUtils.info(colorElement.toString())
Pattern SKU_PATTERN = Pattern.compile("data-articleno=\"([0-9A-Za-z]*)")
Pattern TITLE_PATTERN = Pattern.compile("title=\"([^\"]*)")
Matcher sku_matcher = SKU_PATTERN.matcher(colorElement.toString())
Matcher title_matcher = TITLE_PATTERN.matcher(colorElement.toString())
if (sku_matcher.find() && title_matcher.find()) {
colorMap.put(sku_matcher.group(1), title_matcher.group(1))
}
}
}
}
LogUtils.info(colorMap.toString())
return ExecInfo.succ()
}