Java爬蟲歷險記 -- (2)爬取數據並存放到mysql

本文是對博客 下雨天沒帶傘–JAVA實現網頁爬蟲及將數據寫入數據庫 –http://blog.csdn.net/sinat_38224744/article/details/70652767 中代碼的理解和補充,並使用 Navicat for MySQL 軟件實現對數據的可視化操作。代碼分成兩部分:Web.java + Jdbc.java

一、Web.java

/**
 * 原博客:http://blog.csdn.net/sinat_38224744/article/details/70652767
 * (1)設置URL、URLConnection、BufferedReader
 * (2)設置正則表達式,通過獲取的數據流進行解析
 * (3)將符合匹配要求的數據存放到list數組中和數據庫中
 * (4)List<String> list = new ArrayList<String>();
 * -->http://blog.csdn.net/u010340178/article/details/53507964
 * (5) 對 " \\w+@\\w+(\\.\\w+)+ "的理解:
 * -->https://zhidao.baidu.com/question/875897286591066732.html
 */

package web_one;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Web {

    public static void main(String[] args) throws IOException{
        // TODO Auto-generated method stub
        web();
    }

    private static void web( ) throws IOException{
        // TODO Auto-generated method stub

         URL url = new URL("http://bbs.tianya.cn/post-enterprise-758850-1.shtml");
         URLConnection connection = url.openConnection();
         BufferedReader buffer = new BufferedReader(new InputStreamReader( connection.getInputStream() ) );

         String line = null;
         String mailreg = "\\w+@\\w+(\\.\\w+)+"; //Email的通配符
         Pattern pattern = Pattern.compile(mailreg);

         //matcher:匹配
         while (( line = buffer.readLine() ) != null) {
             Matcher matcher = pattern.matcher(line);
             while ( matcher.find() ) {
                String string = matcher.group();
                List<String> list = new ArrayList<String>();
                list.add(string);
                for ( String string2 : list ) {
                    Jdbc.insert(list);
                    System.out.println( string2 );
                }
             }
            }
    }
}

二、Jdbc.java

/**
 * (1)mysql數據庫的連接
 * (2)數據庫的操作:http://blog.csdn.net/sinat_38224744/article/details/70652612
 * (3)PreparedStatement.executeUpdate() --> (a)SQL數據操作語言(DML)語句的行計數或
 *                                      --> (b)0不返回的SQL語句 
 * (4)insert into QQ (email) values(?)  -->  往表QQ中添加屬性email
 */

package web_one;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;

public class Jdbc {

        //通過jdbc連接數據庫
        private static Connection getConn() {

            String driver = "com.mysql.jdbc.Driver";
            //下面的變量設置根據自己的情況
            String url = "jdbc:mysql://localhost:3306/webone"; //數據庫是webone
            String username = "mysql的用戶名";
            String password = "mysql的密碼";

            Connection conn = null;

            try {
                Class.forName(driver); //classLoader,加載對應驅動
                conn = (Connection) DriverManager.getConnection(url, username, password);
            } catch (ClassNotFoundException e) {
                e.printStackTrace();
            } catch (SQLException e) {
                e.printStackTrace();
            }

            return conn;
        }


        static int insert( List<String> list ) {

            Connection conn = getConn();
            int i = 0;

            //往表QQ中添加屬性email
            String sql = "insert into QQ (email) values(?)";
            PreparedStatement pstmt;

            try {

                //表示預編譯的sql對象
                pstmt = (PreparedStatement) conn.prepareStatement(sql);

                for (int j = 0; j < list.size(); j++) {
                    pstmt.setString(1, list.get(j));
                    i = pstmt.executeUpdate();  
                    pstmt.close();
                    conn.commit();
                    conn.close();
                }

            } catch (SQLException e) {
                e.printStackTrace();
            }

            return i;
        }
}

三、 Navicat for MySQL

運行截圖:

mysql

參考資料:
(1)http://www.pan66.com/show/2373054.html
(2)http://blog.csdn.net/jgirl_333/article/details/54925219
(3)http://jingyan.baidu.com/article/9faa7231b031b8473c28cb34.html
(4)http://jingyan.baidu.com/article/a3a3f81188824c8da2eb8ae2.html
(5)http://blog.csdn.net/sinat_38224744/article/details/70652612

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章