java 爬蟲分析xml

package com.example.xml;
import java.io.BufferedReader;
import java.io.File;  
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;  
import java.util.regex.Matcher;
import java.util.regex.Pattern;
      
    public class jx  
    {  
        
        public static void main(String[] args) throws IOException {  
        	  
            PrintWriter pw = new PrintWriter(new FileWriter("C:/Users/Roronoa/Desktop/Fr/c.xml"));  
            StringBuffer buf = new StringBuffer(2 * 1024 * 1024);  
            String html = "C:/Users/Roronoa/Desktop/Fr/a.xml";  
            readTxtFile(html, buf);  
      
            String lv = buf.toString();
            List<String> a = new ArrayList<String>();  
            a = getKey(lv); 
            List<String> b = new ArrayList<String>();  
            b = getString(lv); 
          
            for (int i = 0; i < b.size(); i++) {  
                System.out.println(a.get(i)); 
                pw.write("<string name=\""+a.get(i)+"\">");
                pw.write(b.get(i));  
                pw.write("</string>");
                pw.write("\n"); 
               
            }  
            pw.close();  
        }  
        
        public static void readTxtFile(String filePath, StringBuffer buf) {  
            try {  
                String encoding = "UTF-8";  
                File file = new File(filePath);  
                if (file.isFile() && file.exists()) { // �ж��ļ��Ƿ����  
                    InputStreamReader read = new InputStreamReader(  
                            new FileInputStream(file), encoding);// ���ǵ������ʽ  
                    BufferedReader bufferedReader = new BufferedReader(read);  
                    String lineTxt = null;  
                    while ((lineTxt = bufferedReader.readLine()) != null) {  
                        // System.out.println(lineTxt);  
                        buf.append(lineTxt);  
                    }  
                    read.close();  
                } else {  
                    System.out.println("!!!");  
                }  
            } catch (Exception e) {  
                System.out.println("!!!");  
                e.printStackTrace();  
            }  
      
        }  
       
        
//      public static String getLv(String html) throws IOException {         	
//       	String lv = html.replaceAll("\"<key>", "");
//       	return lv;  
//       }  
        
        public static List<String> getKey(String html) throws IOException {  
        	  
            Pattern p = Pattern  
                    .compile("<key>(.+?)</key>");  
            Matcher m = p.matcher(html);  
      
            ArrayList<String> alist = new ArrayList<String>();  
            while (m.find()) {  
                alist.add(m.group(1));  
                System.out.println(m.group(1));  
            }  
            return alist;  
        }  
      
        public static List<String> getString(String html) throws IOException {  
        	
        	Pattern p = Pattern  
        			.compile("<string>(.+?)</string>");  
        	Matcher m = p.matcher(html);  
        	
        	ArrayList<String> alist = new ArrayList<String>();  
        	while (m.find()) {  
        		alist.add(m.group(1));  
        		System.out.println(m.group(1));  
        	}  
        	return alist;  
        }  
        
    }  

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章