抓取起點網站內容
static String gethtml(String ul) {
String url = ul;
InputStream is = null;
InputStreamReader isr = null;
try {
URL u = new URL(url);
HttpURLConnection uc = (HttpURLConnection) u.openConnection();
//設置header
uc.setRequestMethod("GET");
uc.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3895.5 Safari/537.36");
uc.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
// uc.setRequestProperty("Accept-Encoding", "gzip, deflate");
uc.setRequestProperty("Accept",
"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
uc.setRequestProperty("Connection", "keep-alive");
uc.connect();
is = uc.getInputStream();
isr = new InputStreamReader(is);
StringBuffer sb = new StringBuffer("");
char[] buff = new char[1024];
int len = -1;
while ((len = isr.read(buff)) != -1) {
sb.append(buff, 0, len);
}
String s = new String(sb.toString().getBytes(), "UTF-8");
return s;
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
isr.close();
is.close();
} catch (IOException e) {
e.printStackTrace();
}
}
return "";
}
正則匹配書名及鏈接
String url = "https://www.qidian.com/rank/yuepiao";
String str = gethtml(url);
String preg = "<h4><a href=\"(.+?)\".*?>(.*?)</a>";
Pattern p = Pattern.compile(preg);
Matcher m = p.matcher(str);
while (m.find()) {
System.out.println("https:"+m.group(1)+"::"+m.group(2));
}
結果
![結果]()