java抓取豆瓣電影數據,分析電影評分,生成統計圖表 ---servlet

    最近花時間學習了一下使用Java獲取網站數據的方法,自己也親自動手實踐一下;共獲取3000+數據,去除重複的數據剩餘2000+,使用JFreeChart根據電影評分做出幾張簡單的統計圖。

電影評分統計圖:     JFreeChart生成圖片

    使用jsoup獲取該網站的電影數據信息,此網站動態加載數據,如果直接查看網頁源代碼是看不到數據的。可以通過js文件,獲取相應的數據:

部分代碼如下:

movieServlet.java

    主要的功能爲:獲取網站的電影數據

    首先獲取每一個電影分類的鏈接:

        HashMap<String, String> urlandnames = new HashMap<String, String>();
		MovieService movieService = new MovieService();
		// 排行榜頁面
		String url = "http://movie.douban.com/chart";
		// 獲取分類的所有相對鏈接和分類名稱
		try {
			Document kinds = Jsoup.connect(url)
							  .userAgent("Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36")
							  .timeout(10000)
							  .get();
			Elements elements = kinds.select("#content .types a");
			for(Element element : elements){
				String kindurl = element.attr("href");	// 鏈接地址
				String name = element.text();			// 類別
				urlandnames.put(kindurl,name);
			}
		} catch (IOException e) {
			e.printStackTrace();
			System.out.println("獲取urlandname出現錯誤!!");
		}
		//獲取所有的key
		Set<String> keySet = urlandnames.keySet();	
		//迭代key值
		Iterator<String> iterator = keySet.iterator();
		List<Movie> allMovies = new ArrayList<Movie>();
		while(iterator.hasNext()){
			// 獲取到key值,即url
			String next = iterator.next();
			// 根據某一個類別的鏈接,獲取行對應的電影數據
			List<Movie> listMovie = getMovieInfo(next);
			allMovies.addAll(listMovie);
		}

 根據對應的鏈接獲取相應的數據,保存至數據庫:

	/**
	 * 獲取種類電影信息,保存到數據庫
	 * @param url 某一個種類的鏈接地址
	 */
	private List<Movie> getMovieInfo(String url){
		String[] tempurl = url.split("&");
		String finalurl = "http://movie.douban.com/j/chart/top_list_count?"+tempurl[1]+"&"+tempurl[2];
		// finalurl ---------http://movie.douban.com/j/chart/top_list_count?type=18&interval_id=100:90
		String document = null;
		try {
			//獲取該類別影片的數量total、可在線觀看數量playable_count
			document = Jsoup.connect(finalurl).timeout(10000).ignoreContentType(true).execute().body();	
			// document------{"playable_count":18,"total":32,"unwatched_count":32}可在線觀看18部,共32部,未觀看32部
		} catch (IOException e) {
			e.printStackTrace();
		}

		//json解析器
		JsonParser parser = new JsonParser();
		//獲取json對象
		JsonObject jsonObject = (JsonObject) parser.parse(document);
		//將json數據轉爲int型數據
		int movienum = jsonObject.get("total").getAsInt();
		System.out.println(movienum);//該類型的數量
		String nameurl = "http://movie.douban.com/j/chart/top_list?"+tempurl[1]+"&"+tempurl[2]+"&action=&start=0&limit="+movienum;
		// nameurl-------------http://movie.douban.com/j/chart/top_list?type=18&interval_id=100:90&action=&start=0&limit=32
		FileWriter fw = null;
		String doc = null;
		try {
			//獲取該類別的所有影片的信息
			doc = Jsoup.connect(nameurl).timeout(10000).ignoreContentType(true).execute().body();
		} catch (Exception e) {
			e.printStackTrace();
		}
		//將json的一個對象數組解析成JsonElement對象
		JsonElement element = null;
		try {
			//通過JsonParser對象可以把json格式的字符串解析成一個JsonElement對象
			element = parser.parse(doc);
		} catch (NullPointerException e) {
			e.printStackTrace();
		}
		
		JsonArray jsonArray = null;
		if(element.isJsonArray()){
			//JsonElement對象如果是一個數組的話轉化成jsonArray
			jsonArray = element.getAsJsonArray();
		}
		
		//遍歷json的對象數組
		Iterator it = jsonArray.iterator();
		List<Movie> listMovie = new ArrayList<Movie>();
		while (it.hasNext()) {
			JsonObject e = (JsonObject)it.next();
			//電影名稱
			String name = e.get("title").getAsString();
			//豆瓣評分
			float score = e.get("score").getAsFloat();
			//發佈時間
			String release_date = e.get("release_date").getAsString();
			//類型
			JsonArray jsonArray2 = e.get("types").getAsJsonArray();
			String types = jsonArray2.toString();
			//鏈接地址
			String movieUrl = e.get("url").getAsString();
			//是否可以在線播放
			String is_playable = e.get("is_playable").getAsString();
			
			String substring = movieUrl.substring(0, movieUrl.lastIndexOf("/"));
			String keyID = substring.substring(substring.lastIndexOf("/"), substring.length());
			
			if(cache.get(keyID) != null){
				String value = (String) cache.get(keyID).getObjectValue();
				if(!name.equals(value)){
					net.sf.ehcache.Element element2 = new net.sf.ehcache.Element(keyID,name);
					cache.put(element2);
				}else {
//					System.out.println("重複的 movie Info");
					continue;
				}
			}else {
				net.sf.ehcache.Element element2 = new net.sf.ehcache.Element(keyID,name);
				cache.put(element2);
			}
			
			Movie movie = new Movie();
			
			movie.setName(name);
			movie.setTypes(types);
			movie.setRelease_date(release_date);
			movie.setScore(score);
			movie.setMovieUrl(movieUrl);
			movie.setIs_playable(is_playable);

			//在控制檯輸出
//			System.out.println(movie.toString());
//			System.out.println("正在獲取數據ing...");
			
			listMovie.add(movie);
		}
		return listMovie;
	}

ScoreServlet.java    主要是生成圖表

    生成柱狀圖:

	protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
		String method = request.getParameter("method");
		System.out.println(method+"===================method");
		MovieService movieService = new MovieService();
		
		Map<String, Integer> map = movieService.Count();
		Integer one = map.get("one");
		Integer two = map.get("two");
		Integer three = map.get("three");
		Integer four = map.get("four");
		Integer five = map.get("five");
		
		if(method.equals("barChart")){
			double [][]data = new double[][]{{one},{two},{three},{four},{five}};
			String []rowKeys = {">=9",">=8.5",">=8",">=7.5","<7.5"}; 
			String []columnKeys = {"評分"};
			
			CategoryDataset dataset = DatasetUtilities.createCategoryDataset(rowKeys, columnKeys, data);
			
	        JFreeChart chart = ChartFactory.createBarChart3D(
	        		"電影評分柱狀圖", // 圖表標題
	                "電影", // 目錄軸的顯示標籤
	                "數量", // 數值軸的顯示標籤
	                 dataset, // 數據集
	                 PlotOrientation.VERTICAL, // 圖表方向:水平、垂直
	                 true,  // 是否顯示圖例(對於簡單的柱狀圖必須是 false)
	                 false, // 是否創建工具提示 (tooltip) 
	                 false  // 是否生成 URL 鏈接
	                 ); 
	        
	        CategoryPlot plot = chart.getCategoryPlot();
	        // 設置網格背景顏色
	 		plot.setBackgroundPaint(Color.white);
	 		// 設置網格豎線顏色
	 		plot.setDomainGridlinePaint(Color.pink);
	 		// 設置網格橫線顏色
	 		plot.setRangeGridlinePaint(Color.pink);
	 		
	 		// 顯示每個柱的數值,並修改該數值的字體屬性
	 		BarRenderer3D renderer=new BarRenderer3D();
	 		renderer.setBaseItemLabelGenerator(new StandardCategoryItemLabelGenerator());
	 		renderer.setBaseItemLabelsVisible(true);
	 		
	 		renderer.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_LEFT));
	 		renderer.setItemLabelAnchorOffset(10D);  
	 		
	 		// 設置平行柱的之間距離
	 		renderer.setItemMargin(0.4);
	 		plot.setRenderer(renderer);
	        
	        FileOutputStream fos_jpg = null; 
	        try { 
	        	//將圖片保存至Tomcat服務器WebRoot下的img目錄中
	            fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"barChart.jpg");
	            ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); 
	        } catch (Exception e) {
	        	System.out.println("error");
			} finally { 
	            try { 
	                fos_jpg.close(); 
	            } catch (Exception e) {
	            	System.out.println("error2");
	            } 
	        }
	        request.setAttribute("barChart", "barChart.jpg");
			
		}

生成餅狀圖:

        MovieService movieService = new MovieService();
		
		Map<String, Integer> map = movieService.Count();
		Integer one = map.get("one");
		Integer two = map.get("two");
		Integer three = map.get("three");
		Integer four = map.get("four");
		Integer five = map.get("five");
		
        if (method.equals("pieChart")) {
			
			DefaultPieDataset data = new DefaultPieDataset();
			data.setValue(">=9",one); 
			data.setValue(">=8.5",two); 
			data.setValue(">=8",three); 
			data.setValue(">=7.5",four); 
			data.setValue("<7.5",five); 
	        
	        JFreeChart chart = ChartFactory.createPieChart3D(
	        		"評分餅狀圖",  		// 圖表標題
			        data, 
			        true, 			// 是否顯示圖例
			        false, 			// 是否創建工具提示 (tooltip) 
	                false  			// 是否生成 URL 鏈接
			        ); 
	        
			//顯示百分比
			PiePlot pieplot = (PiePlot)chart.getPlot();
	        pieplot.setLabelFont(new Font("宋體", 0, 12));
	        pieplot.setNoDataMessage("無數據");
	        pieplot.setCircular(true);
	        pieplot.setLabelGap(0.02D);
	        pieplot.setLabelGenerator(new StandardPieSectionLabelGenerator("{0} {2}",NumberFormat.getNumberInstance(),new DecimalFormat("0.00%")));
	        
	        PiePlot3D pieplot3d = (PiePlot3D)chart.getPlot(); 
			//設置開始角度  
			pieplot3d.setStartAngle(120D);  
			//設置方向爲”順時針方向“  
			pieplot3d.setDirection(Rotation.CLOCKWISE);  
			//設置透明度,0.5F爲半透明,1爲不透明,0爲全透明  
			pieplot3d.setForegroundAlpha(0.7F); 
	        
	        FileOutputStream fos_jpg = null; 
	        try { 
	        	//將圖片保存至Tomcat服務器WebRoot目錄下
	            fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"pieChart.jpg");
	            ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); 
	        } catch (Exception e) {
	        	System.out.println("error");
			} finally { 
	            try { 
	                fos_jpg.close(); 
	            } catch (Exception e) {
	            	System.out.println("error2");
	            } 
	        }
	        request.setAttribute("pieChart", "pieChart.jpg");
			
		}

生成折線圖

       if (method.equals("lineChart")) {
			XYSeriesCollection collection = new XYSeriesCollection();
			XYSeries series = new XYSeries("折線");
			
			Map<String, Integer> map2 = movieService.lineChart();
			int number = 99;
			for(int i=0; i<map2.size(); i++){
				String s= number+"";
				String score = s.charAt(0)+"."+s.charAt(1);
				series.add(Double.parseDouble(score),map2.get(score));
//				System.out.println(Double.parseDouble(score)+"--"+map2.get(score));
				number--;
			}
			collection.addSeries(series);
			
			JFreeChart chart = ChartFactory.createXYLineChart(
				        "評分折線圖",
				        "評分",
				        "數量",				
				        collection,
				        PlotOrientation.VERTICAL,
				        true, 
				        true, 
				        false);
			
			XYPlot plot = (XYPlot) chart.getPlot(); 
			//設置曲線是否顯示數據點
			XYLineAndShapeRenderer xylinerenderer = (XYLineAndShapeRenderer)plot.getRenderer();
			xylinerenderer.setBaseShapesVisible(true); 
			
			//設置曲線顯示各數據點的值
			XYItemRenderer xyitem = plot.getRenderer(); 
			xyitem.setBaseItemLabelsVisible(true);
			xyitem.setBasePositiveItemLabelPosition(new ItemLabelPosition(ItemLabelAnchor.OUTSIDE12, TextAnchor.BASELINE_CENTER)); 
			xyitem.setBaseItemLabelGenerator(new StandardXYItemLabelGenerator());
			xyitem.setBaseItemLabelFont(new Font("Dialog", 1, 10)); 
			plot.setRenderer(xyitem);
			
			FileOutputStream fos_jpg = null; 
	        try { 
	        	//將圖片保存至Tomcat服務器WebRoot目錄下
	            fos_jpg = new FileOutputStream(request.getSession().getServletContext().getRealPath("/")+"lineChart.jpg");
	            ChartUtilities.writeChartAsJPEG(fos_jpg,1,chart,700,500,null); 
	        } catch (Exception e) {
	        	System.out.println("error");
			} finally { 
	            try { 
	                fos_jpg.close(); 
	            } catch (Exception e) {
	            	System.out.println("error2");
	            } 
	        }
	        request.setAttribute("lineChart", "lineChart.jpg");
		}

MovieDao.java

把數據插入到數據庫

public class MovieDao {
	
	/**
	 * 把獲取的數據,一次性插入
	 * @param listMovie
	 */
	public void save(List<Movie> listMovie){
		Connection connection = null;
		PreparedStatement statement = null;
		
		connection = JdbcUtils.getConnection();

		try {
			int i = 1;
			for(Movie movie : listMovie){
				System.out.println("正在插入第"+(i++)+"條數據到數據庫ing...");
				String sql = " INSERT INTO movie(NAME,TYPES,release_date,score,movieUrl,is_playable) VALUE( ?,?,?,?,?,? ) ";
				
				statement = connection.prepareStatement(sql);
				
				statement.setString(1, movie.getName());
				statement.setString(2, movie.getTypes());
				statement.setString(3, movie.getRelease_date());
				statement.setFloat(4, movie.getScore());
				statement.setString(5, movie.getMovieUrl());
				statement.setString(6, movie.getIs_playable());
				
				statement.execute();
			}
			System.out.println("保存數據完成");
		} catch (SQLException e) {
			System.out.println("保存數據出現錯誤 MovieDao error");
			e.printStackTrace();
			throw new RuntimeException(e);
		} finally {
			try {
				connection.close();
				statement.close();
			} catch (SQLException e) {
				e.printStackTrace();
				throw new RuntimeException(e);
			}
		}
	}

 查詢所有數據

   /**
	 * 查詢所有數據
	 * @return
	 */
	public List<Movie> findAll(){
		
		Connection connection = null;
		PreparedStatement statement = null;
		ResultSet resultSet = null;
		
		try {
			connection = JdbcUtils.getConnection();
			
			String sql = " select * from movie ";
			
			statement = connection.prepareStatement(sql);
			
			resultSet = statement.executeQuery();

			List<Movie> list = new ArrayList<Movie>();
			while (resultSet.next()) {
				Movie movie = new Movie();
				
				movie.setId(resultSet.getInt("id"));
				movie.setName(resultSet.getString("name"));
				movie.setTypes(resultSet.getString("types"));
				movie.setRelease_date(resultSet.getString("release_date"));
				movie.setScore(resultSet.getFloat("score"));
				movie.setMovieUrl(resultSet.getString("movieUrl"));
				movie.setIs_playable(resultSet.getString("is_playable"));
				
				list.add(movie);
			}
			
			return list;
		} catch (SQLException e) {
			e.printStackTrace();
			throw new RuntimeException(e);
		} finally {
			try {
				connection.close();
				statement.close();
			} catch (SQLException e) {
				e.printStackTrace();
				throw new RuntimeException(e);
			}
		}
	}

獲取不同分數等級的電影數量

   /**
	 * 統計不同分數級別的電影數量
	 * @return
	 */
	public Map<String,Integer> Count(){
		
		Connection conn = null;
		PreparedStatement stmt = null;
		ResultSet resultSet = null;
		
		Map<String,Integer> mapCount = new HashMap<String, Integer>();
		
		conn = JdbcUtils.getConnection();
		String sql = null;
		String key = null;
		int i=0;
		while (i<5) {
			switch (i) {
				case 0:
					sql = "SELECT COUNT(1) FROM movie WHERE score>=9 ";
					key = "one";
					break;
				case 1:
					sql = "SELECT COUNT(1) FROM movie WHERE score>=8.5 && score<9 ";
					key = "two";
					break;
				case 2:
					sql = "SELECT COUNT(1) FROM movie WHERE score>=8 && score<8.5 ";
					key = "three";
					break;
				case 3:
					sql = "SELECT COUNT(1) FROM movie WHERE score>=7.5 && score<8 ";
					key = "four";
					break;
				case 4:
					sql = "SELECT COUNT(1) FROM movie WHERE score<7.5 ";
					key = "five";
					break;
			}
			try {
				stmt = conn.prepareStatement(sql);
				resultSet = stmt.executeQuery();
				while (resultSet.next()) {
					mapCount.put(key, resultSet.getInt(1));
				}
			} catch (SQLException e) {
				e.printStackTrace();
			}
			i++;
		}
		return mapCount;
	}

獲取每個電影評分的電影數量

   /**
	 * 統計每個分數對應的數量
	 * @return
	 */
	public Map<String,Integer> lineChart(){
		
		Connection conn = null;
		PreparedStatement stmt = null;
		ResultSet resultSet = null;
		
		Map<String,Integer> mapCount = new HashMap<String, Integer>();
		
		conn = JdbcUtils.getConnection();
		String sql = null;
		int number = 99;
		for( ; number>=70; number-=1){
			//獲取9.9 9.1 7.4 .....
			String s = number+"";
			String score = s.charAt(0)+"."+s.charAt(1);
			sql = "SELECT COUNT(1) FROM movie WHERE score=" + score ;
			try {
				stmt = conn.prepareStatement(sql);
				resultSet = stmt.executeQuery();
				while (resultSet.next()) {
					mapCount.put(score, resultSet.getInt(1));
				}
			} catch (SQLException e) {
				e.printStackTrace();
			}
		}
		return mapCount;
	}
}

     兩分鐘抓取數據2000+並保存至數據庫中,感覺還是挺慢的,有待優化代碼

代碼源碼: GitHub:https://github.com/YanKuan-IT/DouBanMoviesInfo_DB.git

注:如有什麼做的不對的,請指教

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章