抓取網頁中的email地址
index.html爲保存的網址,需要自行選擇保存
// 抓取網頁中的email地址
public class EmailSpider {
public static void main(String[] args) {
try {
BufferedReader br = new BufferedReader(new FileReader("E:\\gongfang\\JavaDemo\\resources\\index.html"));
String line = "";
while ((line = br.readLine())!=null) {
// 分析讀取的這一行是否有我們需要的email地址
parse(line);
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
private static void parse(String lin) {
Pattern p = Pattern.compile("[\\w[.-]]+@[\\w[.-]]+\\.[\\w]+");
Matcher m = p.matcher(lin);
while(m.find()) {
// 如果是存到數據庫的話就改這裏
System.out.println(m.group());
}
}
}