1.第一次做的時候:
if (url.matches(regxhp)) {
List<String> time1 = getElementAgainstXpath(s, "//div");
time = listToString(time1);
String regtime = "\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}";
List<String> strs = new ArrayList<String>();
Pattern r = Pattern.compile(regtime);
Matcher m = r.matcher(time);
while (m.find()) {
strs.add(m.group());
}
time = listToString(strs);
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
Date d = new Date();
try {
d = formatter.parse(formatter.format(new Date()));
article.setPublishTime(d);
} catch (ParseException e) {
d = null;
e.printStackTrace();
}
cret = s;
cret = cret.replaceAll("var XinhuammNews =", "");
String json = cret;
JSONObject jsonObject = JSON.parseObject(json);
String video = null;
String videourl = "<video src=";
if (jsonObject != null) {
cret = jsonObject.getString("content");
article.setTitle(jsonObject.getString("topic"));
article.setSource(jsonObject.getString("docSource"));
article.setPublishTime(jsonObject.getDate("releasedate"));
video = jsonObject.getString("videourl");
video = video.replaceAll("\"", "");
String videour = "></video>";
if (video != "") {
video = videourl + video + videour;
}
cret = video + cret;
cret = HtmlUtils.getTagFormattedText(cret, url);
article.setText(cret);
// proArticle(url, cret, article);
}
}
2.第二次遇到的時候:
if ("首頁-新華髮布".equals(article.getChannelName())) {
String url = "http://xhpfmapi.zhongguowangshi.com/v600/news/%s.js";
url = String.format(url, article.getArticleUrl());
article.setArticleUrl(url);
String html = "";
try {
HttpClientResponse response = request(url, "GET", null, null, context);
html = response.getHtml().replace("var XinhuammNews =", "");
} catch (Exception e) {
LOG.error("detail request occurs error. {} {}", url, e);
}
return html;
}
主要區別是:第一次做的時候使用的FastJson
第二次做的時候就是簡單處理一下新華分享頁返回的內容,將其json標準化,然後送去模板進行處理。