java抓取起點小說,並用正則匹配需要數據

抓取起點網站內容

static String gethtml(String ul) {
		String url = ul;
		InputStream is = null;
		InputStreamReader isr = null;
		try {
			URL u = new URL(url);
			HttpURLConnection uc = (HttpURLConnection) u.openConnection();
			//設置header 
			uc.setRequestMethod("GET");
			uc.setRequestProperty("User-Agent",
					"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3895.5 Safari/537.36");
			uc.setRequestProperty("Accept-Language", "zh-CN,zh;q=0.9");
//			uc.setRequestProperty("Accept-Encoding", "gzip, deflate");
			uc.setRequestProperty("Accept",
					"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3");
			uc.setRequestProperty("Connection", "keep-alive");
			uc.connect();
			is = uc.getInputStream();
			isr = new InputStreamReader(is);
			StringBuffer sb = new StringBuffer("");
			char[] buff = new char[1024];
			int len = -1;
			while ((len = isr.read(buff)) != -1) {
				sb.append(buff, 0, len);
			}
			String s = new String(sb.toString().getBytes(), "UTF-8");
			return s;
		} catch (MalformedURLException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} finally {
			try {
				isr.close();
				is.close();
			} catch (IOException e) {
				e.printStackTrace();
			}

		}
		return "";
	}

正則匹配書名及鏈接

		String url = "https://www.qidian.com/rank/yuepiao";
		String str = gethtml(url);
		String preg = "<h4><a href=\"(.+?)\".*?>(.*?)</a>";
		Pattern p = Pattern.compile(preg);
		Matcher m = p.matcher(str);
		while (m.find()) {
			System.out.println("https:"+m.group(1)+"::"+m.group(2));
		}

結果

結果

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章