package com.example.xml;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class jx
{
public static void main(String[] args) throws IOException {
PrintWriter pw = new PrintWriter(new FileWriter("C:/Users/Roronoa/Desktop/Fr/c.xml"));
StringBuffer buf = new StringBuffer(2 * 1024 * 1024);
String html = "C:/Users/Roronoa/Desktop/Fr/a.xml";
readTxtFile(html, buf);
String lv = buf.toString();
List<String> a = new ArrayList<String>();
a = getKey(lv);
List<String> b = new ArrayList<String>();
b = getString(lv);
for (int i = 0; i < b.size(); i++) {
System.out.println(a.get(i));
pw.write("<string name=\""+a.get(i)+"\">");
pw.write(b.get(i));
pw.write("</string>");
pw.write("\n");
}
pw.close();
}
public static void readTxtFile(String filePath, StringBuffer buf) {
try {
String encoding = "UTF-8";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // �ж��ļ��Ƿ����
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);// ���ǵ������ʽ
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
// System.out.println(lineTxt);
buf.append(lineTxt);
}
read.close();
} else {
System.out.println("!!!");
}
} catch (Exception e) {
System.out.println("!!!");
e.printStackTrace();
}
}
// public static String getLv(String html) throws IOException {
// String lv = html.replaceAll("\"<key>", "");
// return lv;
// }
public static List<String> getKey(String html) throws IOException {
Pattern p = Pattern
.compile("<key>(.+?)</key>");
Matcher m = p.matcher(html);
ArrayList<String> alist = new ArrayList<String>();
while (m.find()) {
alist.add(m.group(1));
System.out.println(m.group(1));
}
return alist;
}
public static List<String> getString(String html) throws IOException {
Pattern p = Pattern
.compile("<string>(.+?)</string>");
Matcher m = p.matcher(html);
ArrayList<String> alist = new ArrayList<String>();
while (m.find()) {
alist.add(m.group(1));
System.out.println(m.group(1));
}
return alist;
}
}
java 爬蟲分析xml
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.