最近花時間學習了一下使用Java獲取網站數據的方法,自己也親自動手實踐一下;共獲取3000+數據,去除重複的數據剩餘2000+,使用JFreeChart根據電影評分做出幾張簡單的統計圖。
電影評分統計圖: JFreeChart生成圖片
使用jsoup獲取該網站的電影數據信息,此網站動態加載數據,如果直接查看網頁源代碼是看不到數據的。可以通過js文件,獲取相應的數據:
部分代碼如下:
movieServlet.java
主要的功能爲:獲取網站的電影數據
首先獲取每一個電影分類的鏈接:
HashMap<String, String> urlandnames = new HashMap<String, String>(); MovieService movieService = new MovieService(); // 排行榜頁面 String url = "http://movie.douban.com/chart"; // 獲取分類的所有相對鏈接和分類名稱 try { Document kinds = Jsoup.connect(url) .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36") .timeout(10000) .get(); Elements elements = kinds.select("#content .types a"); for(Element element : elements){ String kindurl = element.attr("href"); // 鏈接地址 String name = element.text(); // 類別 urlandnames.put(kindurl,name); } } catch (IOException e) { e.printStackTrace(); System.out.println("獲取urlandname出現錯誤!!"); } //獲取所有的key Set<String> keySet = urlandnames.keySet(); //迭代key值 Iterator<String> iterator = keySet.iterator(); List<Movie> allMovies = new ArrayList<Movie>(); while(iterator.hasNext()){ // 獲取到key值,即url String next = iterator.next(); // 根據某一個類別的鏈接,獲取行對應的電影數據 List<Movie> listMovie = getMovieInfo(next); allMovies.addAll(listMovie); }
根據對應的鏈接獲取相應的數據,保存至數據庫:
/** * 獲取種類電影信息,保存到數據庫 * @param url 某一個種類的鏈接地址 */ private List<Movie> getMovieInfo(String url){ String[] tempurl = url.split("&"); String finalurl = "http://movie.douban.com/j/chart/top_list_count?"+tempurl[1]+"&"+tempurl[2]; // finalurl ---------http://movie.douban.com/j/chart/top_list_count?type=18&interval_id=100:90 String document = null; try { //獲取該類別影片的數量total、可在線觀看數量playable_count document = Jsoup.connect(finalurl).timeout(10000).ignoreContentType(true).execute().body(); // document------{"playable_count":18,"total":32,"unwatched_count":32}可在線觀看18部,共32部,未觀看32部 } catch (IOException e) { e.printStackTrace(); } //json解析器 JsonParser parser = new JsonParser(); //獲取json對象 JsonObject jsonObject = (JsonObject) parser.parse(document); //將json數據轉爲int型數據 int movienum = jsonObject.get("total").getAsInt(); System.out.println(movienum);//該類型的數量 String nameurl = "http://movie.douban.com/j/chart/top_list?"+tempurl[1]+"&"+tempurl[2]+"&action=&start=0&limit="+movienum; // nameurl-------------http://movie.douban.com/j/chart/top_list?type=18&interval_id=100:90&action=&start=0&limit=32 FileWriter fw = null; String doc = null; try { //獲取該類別的所有影片的信息 doc = Jsoup.connect(nameurl).timeout(10000).ignoreContentType(true).execute().body(); } catch (Exception e) { e.printStackTrace(); } //將json的一個對象數組解析成JsonElement對象 JsonElement element = null; try { //通過JsonParser對象可以把json格式的字符串解析成一個JsonElement對象 element = parser.parse(doc); } catch (NullPointerException e) { e.printStackTrace(); } JsonArray jsonArray = null; if(element.isJsonArray()){ //JsonElement對象如果是一個數組的話轉化成jsonArray jsonArray = element.getAsJsonArray(); } //遍歷json的對象數組 Iterator it = jsonArray.iterator(); List<Movie> listMovie = new ArrayList<Movie>(); while (it.hasNext()) { JsonObject e = (JsonObject)it.next(); //電影名稱 String name = e.get("title").getAsString(); //豆瓣評分 float score = e.get("score").getAsFloat(); //發佈時間 String release_date = e.get("release_date").getAsString(); //類型 JsonArray jsonArray2 = e.get("types").getAsJsonArray(); String types = jsonArray2.toString(); //鏈接地址 String movieUrl = e.get("url").getAsString(); //是否可以在線播放 String is_playable = e.get("is_playable").getAsString(); String substring = movieUrl.substring(0, movieUrl.lastIndexOf("/")); String keyID = substring.substring(substring.lastIndexOf("/"), substring.length()); if(cache.get(keyID) != null){ String value = (String) cache.get(keyID).getObjectValue(); if(!name.equals(value)){ net.sf.ehcache.Element element2 = new net.sf.ehcache.Element(keyID,name); cache.put(element2); }else { // System.out.println("重複的 movie Info"); continue; } }else { net.sf.ehcache.Element element2 = new net.sf.ehcache.Element(keyID,name); cache.put(element2); } Movie movie = new Movie(); movie.setName(name); movie.setTypes(types); movie.setRelease_date(release_date); movie.setScore(score); movie.setMovieUrl(movieUrl); movie.setIs_playable(is_playable); //在控制檯輸出 // System.out.println(movie.toString()); // System.out.println("正在獲取數據ing..."); listMovie.add(movie); } return listMovie; }
ScoreServlet.java 主要是生成圖表
生成柱狀圖:
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String method = request.getParameter("method"); System.out.println(method+"===================method"); MovieService movieService = new MovieService(); Map<String, Integer> map = movieService.Count(); Integer one = map.get("one"); Integer two = map.get("two"); Integer three = map.get("three"); Integer four = map.get("four"); Integer five = map.get("five"); if(method.equals("barChart")){ double [][]data = new double[][]{{one},{two},{three},{four},{five}}; String []rowKeys = {">=9",">=8.5",">=8",">=7.5","<7.5"}; String []columnKeys = {"評分"}; CategoryDataset dataset = DatasetUtilities.createCategoryDataset(rowKeys, columnKeys, data); JFreeChart chart = ChartFactory.createBarChart3D( "電影評分柱狀圖", // 圖表標題 "電影", // 目錄軸的顯示標籤 "數量", // 數值軸的顯示標籤 dataset, // 數據集 PlotOrientation.VERTICAL, // 圖表方向:水平、垂直 true, // 是否顯示圖例(對於簡單的柱狀圖必須是 false) false, // 是否創建工具提示 (tooltip) false // 是否生成 URL 鏈接 ); CategoryPlot plot = chart.getCategoryPlot(); // 設置網格背景顏色 plot.setBackgroundPaint(Color.white); // 設置網格豎線顏色 plot.setDomainGridlinePaint(Color.pink); // 設置網格橫線顏色 plot.setRangeGridlinePaint(Color.pink); // 顯示每個柱的數值,並修改該數值的字體屬性 BarRenderer3D renderer=new BarRenderer3D(); renderer.setBaseItemLabelGenerator(new StandardCategoryItemLabelGenerator()); renderer.setBaseItemLabelsVisible(true); renderer.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_LEFT)); renderer.setItemLabelAnchorOffset(10D); // 設置平行柱的之間距離 renderer.setItemMargin(0.4); plot.setRenderer(renderer); FileOutputStream fos_jpg = null; try { //將圖片保存至Tomcat服務器WebRoot下的img目錄中 fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"barChart.jpg"); ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); } catch (Exception e) { System.out.println("error"); } finally { try { fos_jpg.close(); } catch (Exception e) { System.out.println("error2"); } } request.setAttribute("barChart", "barChart.jpg"); }
生成餅狀圖:
MovieService movieService = new MovieService(); Map<String, Integer> map = movieService.Count(); Integer one = map.get("one"); Integer two = map.get("two"); Integer three = map.get("three"); Integer four = map.get("four"); Integer five = map.get("five"); if (method.equals("pieChart")) { DefaultPieDataset data = new DefaultPieDataset(); data.setValue(">=9",one); data.setValue(">=8.5",two); data.setValue(">=8",three); data.setValue(">=7.5",four); data.setValue("<7.5",five); JFreeChart chart = ChartFactory.createPieChart3D( "評分餅狀圖", // 圖表標題 data, true, // 是否顯示圖例 false, // 是否創建工具提示 (tooltip) false // 是否生成 URL 鏈接 ); //顯示百分比 PiePlot pieplot = (PiePlot)chart.getPlot(); pieplot.setLabelFont(new Font("宋體", 0, 12)); pieplot.setNoDataMessage("無數據"); pieplot.setCircular(true); pieplot.setLabelGap(0.02D); pieplot.setLabelGenerator(new StandardPieSectionLabelGenerator("{0} {2}",NumberFormat.getNumberInstance(),new DecimalFormat("0.00%"))); PiePlot3D pieplot3d = (PiePlot3D)chart.getPlot(); //設置開始角度 pieplot3d.setStartAngle(120D); //設置方向爲”順時針方向“ pieplot3d.setDirection(Rotation.CLOCKWISE); //設置透明度,0.5F爲半透明,1爲不透明,0爲全透明 pieplot3d.setForegroundAlpha(0.7F); FileOutputStream fos_jpg = null; try { //將圖片保存至Tomcat服務器WebRoot目錄下 fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"pieChart.jpg"); ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); } catch (Exception e) { System.out.println("error"); } finally { try { fos_jpg.close(); } catch (Exception e) { System.out.println("error2"); } } request.setAttribute("pieChart", "pieChart.jpg"); }
生成折線圖
if (method.equals("lineChart")) { XYSeriesCollection collection = new XYSeriesCollection(); XYSeries series = new XYSeries("折線"); Map<String, Integer> map2 = movieService.lineChart(); int number = 99; for(int i=0; i<map2.size(); i++){ String s= number+""; String score = s.charAt(0)+"."+s.charAt(1); series.add(Double.parseDouble(score),map2.get(score)); // System.out.println(Double.parseDouble(score)+"--"+map2.get(score)); number--; } collection.addSeries(series); JFreeChart chart = ChartFactory.createXYLineChart( "評分折線圖", "評分", "數量", collection, PlotOrientation.VERTICAL, true, true, false); XYPlot plot = (XYPlot) chart.getPlot(); //設置曲線是否顯示數據點 XYLineAndShapeRenderer xylinerenderer = (XYLineAndShapeRenderer)plot.getRenderer(); xylinerenderer.setBaseShapesVisible(true); //設置曲線顯示各數據點的值 XYItemRenderer xyitem = plot.getRenderer(); xyitem.setBaseItemLabelsVisible(true); xyitem.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_CENTER)); xyitem.setBaseItemLabelGenerator(new StandardXYItemLabelGenerator()); xyitem.setBaseItemLabelFont(new Font("Dialog", 1, 10)); plot.setRenderer(xyitem); FileOutputStream fos_jpg = null; try { //將圖片保存至Tomcat服務器WebRoot目錄下 fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"lineChart.jpg"); ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); } catch (Exception e) { System.out.println("error"); } finally { try { fos_jpg.close(); } catch (Exception e) { System.out.println("error2"); } } request.setAttribute("lineChart", "lineChart.jpg"); }
MovieDao.java
把數據插入到數據庫
public class MovieDao { /** * 把獲取的數據,一次性插入 * @param listMovie */ public void save(List<Movie> listMovie){ Connection connection = null; PreparedStatement statement = null; connection = JdbcUtils.getConnection(); try { int i = 1; for(Movie movie : listMovie){ System.out.println("正在插入第"+(i++)+"條數據到數據庫ing..."); String sql = " INSERT INTO movie(NAME,TYPES,release_date,score,movieUrl,is_playable) VALUE( ?,?,?,?,?,? ) "; statement = connection.prepareStatement(sql); statement.setString(1, movie.getName()); statement.setString(2, movie.getTypes()); statement.setString(3, movie.getRelease_date()); statement.setFloat(4, movie.getScore()); statement.setString(5, movie.getMovieUrl()); statement.setString(6, movie.getIs_playable()); statement.execute(); } System.out.println("保存數據完成"); } catch (SQLException e) { System.out.println("保存數據出現錯誤 MovieDao error"); e.printStackTrace(); throw new RuntimeException(e); } finally { try { connection.close(); statement.close(); } catch (SQLException e) { e.printStackTrace(); throw new RuntimeException(e); } } }
查詢所有數據
/** * 查詢所有數據 * @return */ public List<Movie> findAll(){ Connection connection = null; PreparedStatement statement = null; ResultSet resultSet = null; try { connection = JdbcUtils.getConnection(); String sql = " select * from movie "; statement = connection.prepareStatement(sql); resultSet = statement.executeQuery(); List<Movie> list = new ArrayList<Movie>(); while (resultSet.next()) { Movie movie = new Movie(); movie.setId(resultSet.getInt("id")); movie.setName(resultSet.getString("name")); movie.setTypes(resultSet.getString("types")); movie.setRelease_date(resultSet.getString("release_date")); movie.setScore(resultSet.getFloat("score")); movie.setMovieUrl(resultSet.getString("movieUrl")); movie.setIs_playable(resultSet.getString("is_playable")); list.add(movie); } return list; } catch (SQLException e) { e.printStackTrace(); throw new RuntimeException(e); } finally { try { connection.close(); statement.close(); } catch (SQLException e) { e.printStackTrace(); throw new RuntimeException(e); } } }
獲取不同分數等級的電影數量
/** * 統計不同分數級別的電影數量 * @return */ public Map<String,Integer> Count(){ Connection conn = null; PreparedStatement stmt = null; ResultSet resultSet = null; Map<String,Integer> mapCount = new HashMap<String, Integer>(); conn = JdbcUtils.getConnection(); String sql = null; String key = null; int i=0; while (i<5) { switch (i) { case 0: sql = "SELECT COUNT(1) FROM movie WHERE score>=9 "; key = "one"; break; case 1: sql = "SELECT COUNT(1) FROM movie WHERE score>=8.5 && score<9 "; key = "two"; break; case 2: sql = "SELECT COUNT(1) FROM movie WHERE score>=8 && score<8.5 "; key = "three"; break; case 3: sql = "SELECT COUNT(1) FROM movie WHERE score>=7.5 && score<8 "; key = "four"; break; case 4: sql = "SELECT COUNT(1) FROM movie WHERE score<7.5 "; key = "five"; break; } try { stmt = conn.prepareStatement(sql); resultSet = stmt.executeQuery(); while (resultSet.next()) { mapCount.put(key, resultSet.getInt(1)); } } catch (SQLException e) { e.printStackTrace(); } i++; } return mapCount; }
獲取每個電影評分的電影數量
/** * 統計每個分數對應的數量 * @return */ public Map<String,Integer> lineChart(){ Connection conn = null; PreparedStatement stmt = null; ResultSet resultSet = null; Map<String,Integer> mapCount = new HashMap<String, Integer>(); conn = JdbcUtils.getConnection(); String sql = null; int number = 99; for( ; number>=70; number-=1){ //獲取9.9 9.1 7.4 ..... String s = number+""; String score = s.charAt(0)+"."+s.charAt(1); sql = "SELECT COUNT(1) FROM movie WHERE score=" + score ; try { stmt = conn.prepareStatement(sql); resultSet = stmt.executeQuery(); while (resultSet.next()) { mapCount.put(score, resultSet.getInt(1)); } } catch (SQLException e) { e.printStackTrace(); } } return mapCount; } }
兩分鐘抓取數據2000+並保存至數據庫中,感覺還是挺慢的,有待優化代碼
代碼源碼: GitHub:https://github.com/YanKuan-IT/DouBanMoviesInfo_DB.git
注:如有什麼做的不對的,請指教