本文是對博客 下雨天沒帶傘–JAVA實現網頁爬蟲及將數據寫入數據庫 –http://blog.csdn.net/sinat_38224744/article/details/70652767 中代碼的理解和補充,並使用 Navicat for MySQL 軟件實現對數據的可視化操作。代碼分成兩部分:Web.java + Jdbc.java
一、Web.java
/**
* 原博客:http://blog.csdn.net/sinat_38224744/article/details/70652767
* (1)設置URL、URLConnection、BufferedReader
* (2)設置正則表達式,通過獲取的數據流進行解析
* (3)將符合匹配要求的數據存放到list數組中和數據庫中
* (4)List<String> list = new ArrayList<String>();
* -->http://blog.csdn.net/u010340178/article/details/53507964
* (5) 對 " \\w+@\\w+(\\.\\w+)+ "的理解:
* -->https://zhidao.baidu.com/question/875897286591066732.html
*/
package web_one;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Web {
public static void main(String[] args) throws IOException{
// TODO Auto-generated method stub
web();
}
private static void web( ) throws IOException{
// TODO Auto-generated method stub
URL url = new URL("http://bbs.tianya.cn/post-enterprise-758850-1.shtml");
URLConnection connection = url.openConnection();
BufferedReader buffer = new BufferedReader(new InputStreamReader( connection.getInputStream() ) );
String line = null;
String mailreg = "\\w+@\\w+(\\.\\w+)+"; //Email的通配符
Pattern pattern = Pattern.compile(mailreg);
//matcher:匹配
while (( line = buffer.readLine() ) != null) {
Matcher matcher = pattern.matcher(line);
while ( matcher.find() ) {
String string = matcher.group();
List<String> list = new ArrayList<String>();
list.add(string);
for ( String string2 : list ) {
Jdbc.insert(list);
System.out.println( string2 );
}
}
}
}
}
二、Jdbc.java
/**
* (1)mysql數據庫的連接
* (2)數據庫的操作:http://blog.csdn.net/sinat_38224744/article/details/70652612
* (3)PreparedStatement.executeUpdate() --> (a)SQL數據操作語言(DML)語句的行計數或
* --> (b)0不返回的SQL語句
* (4)insert into QQ (email) values(?) --> 往表QQ中添加屬性email
*/
package web_one;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
public class Jdbc {
//通過jdbc連接數據庫
private static Connection getConn() {
String driver = "com.mysql.jdbc.Driver";
//下面的變量設置根據自己的情況
String url = "jdbc:mysql://localhost:3306/webone"; //數據庫是webone
String username = "mysql的用戶名";
String password = "mysql的密碼";
Connection conn = null;
try {
Class.forName(driver); //classLoader,加載對應驅動
conn = (Connection) DriverManager.getConnection(url, username, password);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
}
return conn;
}
static int insert( List<String> list ) {
Connection conn = getConn();
int i = 0;
//往表QQ中添加屬性email
String sql = "insert into QQ (email) values(?)";
PreparedStatement pstmt;
try {
//表示預編譯的sql對象
pstmt = (PreparedStatement) conn.prepareStatement(sql);
for (int j = 0; j < list.size(); j++) {
pstmt.setString(1, list.get(j));
i = pstmt.executeUpdate();
pstmt.close();
conn.commit();
conn.close();
}
} catch (SQLException e) {
e.printStackTrace();
}
return i;
}
}
三、 Navicat for MySQL
運行截圖:
參考資料:
(1)http://www.pan66.com/show/2373054.html
(2)http://blog.csdn.net/jgirl_333/article/details/54925219
(3)http://jingyan.baidu.com/article/9faa7231b031b8473c28cb34.html
(4)http://jingyan.baidu.com/article/a3a3f81188824c8da2eb8ae2.html
(5)http://blog.csdn.net/sinat_38224744/article/details/70652612