1.1 環境及技術
- java8
- jsoup
- 文件流
1.2 分析訪問路徑
總體上說沒有什麼水平, 但是最耗時的是需要去反覆試驗獲取圖片的鏈接參數的作用。
以下面的鏈接爲例:
https://cn.bing.com/images/async?q=%E5%B9%BB%E6%83%B3%E4%B9%A1&first=277&count=35&relp=35&scenario=ImageBasicHover&datsrc=N_I&layout=RowBased&mmasync=1&dgState=x*0_y*0_h*0_c*6_i*246_r*46&IG=AEDE9443E91045D58E57983441FB36E3&SFX=8&iid=images.5754
這裏面關鍵的參數是q,first.
q是搜索的關鍵詞;
first是指從第幾張圖片開始加載;
mmasync=1這個參數和值是固定的, 否則會有圖片無法加載。
在程序中最關鍵的就是這條鏈接了。
1.3 程序
package com.nikolazhang.spider;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.Iterator;
import java.util.Scanner;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.nikolazhang.util.HttpRequestUtil;
public class SpiderPicFromBing {
private final static String[] strs = {"src", "data-src"};
public static void main(String[] args) throws IOException {
Scanner scanner = new Scanner(System.in);
System.out.println("=================================================");
System.out.println("= catch pictures from Bing =");
System.out.println("= 【2019年1月20日 下午1:08:47 NikolaZhang】 =");
System.out.println("=================================================");
System.out.println("請輸入參數: [文件路徑] [關鍵詞] [下載數量]後回車");
System.out.println("參數使用空格分割, 如: F:\\火影\\ 火影忍者 200");
String[] params = scanner.nextLine().split(" ");
System.out.println("開始下載----------");
System.out.println(params[0]);
System.out.println(params[1]);
System.out.println(params[2]);
visitHtml(params);
}
/**
* 訪問bing html界面, 獲取界面http鏈接, 過濾, 下載圖片
* @param params 輸入關鍵詞
* @param filepath 本地存放路徑
*/
private static void visitHtml(String[] args) {
int count = Integer.valueOf(args[2]);
int start = 1;
for(int i = 0; i<count; i++) {
int res = downloadPic(args, start);
if (res != -1) {
start += res + 2;
i+=res;
} else {
System.out.println("***********下載出錯!程序退出!");
}
}
}
private static int downloadPic(String[] args, int start) {
String filepath = args[0];
String params =args[1];
String url = "https://cn.bing.com/images/async?q="+params+"&first="+start+"&mmasync=1";
Connection conn = Jsoup.connect(url);
int i = 0;
try {
Document doc = conn.get();
Elements imgTags = doc.getElementsByTag("img");
Iterator<Element> elems = imgTags.iterator();
File file = new File(filepath);
if(!file.exists()) {
file.mkdirs();
}
while(elems.hasNext()) {
Element img = elems.next();
String attr = getImgUrl(img);
if(!"".equals(attr) && attr.startsWith("https")) {
System.out.println("獲取圖片: "+attr);
InputStream requestIO = HttpRequestUtil.httpRequestIO(attr);
long date = new Date().getTime();
String localpath = filepath + date+".png";
saveImageToDisk(requestIO, localpath);
i++;
}
}
System.out.println("==== INFORMATION =========================");
System.out.println("下載路徑: " + url);
System.out.println("存儲路徑: " + filepath);
System.out.println("獲取資源: " + params);
System.out.println("獲取圖片數量: " + i);
System.out.println("===== END ========================");
} catch (IOException e) {
e.printStackTrace();
return -1;
}
return i;
}
private static String getImgUrl(Element img) {
String attr = "";
for(String str : strs) {
attr = img.attr(str);
if(attr != null && !"".equals(attr) && attr.indexOf("&")!=-1) {
attr = attr.split("&")[0];
return attr;
}
}
return "";
}
private static void saveImageToDisk(InputStream inputStream, String filepath) {
byte[] data = new byte[1024];
int len = 0;
FileOutputStream fileOutputStream = null;
try {
fileOutputStream = new FileOutputStream(filepath);
while ((len = inputStream.read(data)) != -1) {
fileOutputStream.write(data, 0, len);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (fileOutputStream != null) {
try {
fileOutputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
}
2 打包
爲了方便以後使用可以將程序打成可執行jar包
使用方法如下:
end
程序獲取:
https://github.com/NikolaZhang/PickPic
先來500張日常。啊~ 爽啊~