安卓爬蟲--Jsoup的使用

jsoup的使用

使用Jsoup來爬取學校公告,配合Recyclerview顯示出來(這個測試最好在白天,晚上官網有時候不能訪問)

github源碼地址:點擊查看

效果如圖:


首先添加依賴

implementation 'org.jsoup:jsoup:1.11.3'
implementation 'com.android.support:appcompat-v7:28.0.0'
implementation 'com.android.support:recyclerview-v7:28.0.0'

打開我們要爬的目標網址,我這裏選擇了我們學校圖書館的公告
http://www.lib.wust.edu.cn/bullet/bullet.aspx

點擊右鍵查看網頁源代碼:

找到一個id爲content的table
可以看到通知列表在其中的 第一個tr -> 第二個td -> 第四個div中
這個一定要看清楚,別數錯了,一會爬取數據時要用 

activity_main:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    xmlns:tools="http://schemas.android.com/tools"
    android:layout_width="match_parent"
    android:layout_height="match_parent"
    android:orientation="vertical"
    tools:context=".MainActivity">

    <androidx.appcompat.widget.Toolbar
        android:id="@+id/toolbar"
        android:layout_width="match_parent"
        android:layout_height="?attr/actionBarSize"
        android:background="@color/colorPrimary"
        app:title="校園公告"
        app:titleTextColor="#fff" />

    <androidx.recyclerview.widget.RecyclerView
        android:id="@+id/student_list"
        android:layout_width="match_parent"
        android:layout_height="match_parent">

    </androidx.recyclerview.widget.RecyclerView>

</LinearLayout>

News:

package com.example.jsouptest;

public class News {
    private String title;
    private String date;

    public String getDate() {
        return date;
    }

    public void setDate(String date) {
        this.date = date;
    }

    public String getTitle() {
        return title;
    }

    public void setTitle(String title) {
        this.title = title;
    }
}

newsitem:

<?xml version="1.0" encoding="utf-8"?>
<LinearLayout xmlns:android="http://schemas.android.com/apk/res/android"
    xmlns:app="http://schemas.android.com/apk/res-auto"
    android:layout_width="match_parent"
    android:layout_height="wrap_content"
    android:elevation="8dp"
    android:orientation="vertical">

    <RelativeLayout
        android:layout_width="match_parent"
        android:layout_height="wrap_content">
        <ImageView
            android:id="@+id/ad"
            android:layout_alignParentLeft="true"
            android:layout_marginTop="5dp"
            android:layout_marginLeft="10dp"
            android:background="@drawable/ad"
            android:layout_width="25dp"
            android:layout_height="25dp"/>
        <TextView
            android:text="公告1"
            android:id="@+id/head"
            android:layout_toRightOf="@id/ad"
            android:textSize="18sp"
            android:layout_marginTop="5dp"
            android:layout_marginLeft="10dp"
            android:layout_width="wrap_content"
            android:layout_height="wrap_content"/>

        <androidx.appcompat.widget.AppCompatTextView
            android:id="@+id/title_text"
            android:layout_below="@id/head"
            android:layout_width="match_parent"
            android:layout_marginTop="5dp"
            android:layout_centerVertical="true"
            android:layout_height="wrap_content"
            android:lines="2"
            android:textColor="#000"
            android:layout_marginLeft="10dp"
            android:text="關於一次大型會議的記錄關於一次大型會議的記錄..."
            android:textSize="20dp" />

        <androidx.appcompat.widget.AppCompatTextView
            android:layout_below="@id/title_text"
            android:gravity="end"
            android:id="@+id/date_text"
            android:layout_width="match_parent"
            android:layout_height="wrap_content"
            android:layout_marginEnd="15dp"
            android:layout_marginRight="15dp"
            android:layout_marginBottom="5dp"
            android:textColor="#000"
            android:text="2017-08-15"
            android:textSize="16sp" />
    </RelativeLayout>

    <View
        android:layout_width="match_parent"
        android:layout_height="0.5dp"
        android:layout_marginLeft="5dp"
        android:layout_marginTop="5dp"
        android:layout_marginRight="5dp"
        android:layout_marginBottom="5dp"
        android:background="#ACACA9" />

</LinearLayout>

NewsAdapter:

 

package com.example.jsouptest;

import android.content.Context;
import android.view.LayoutInflater;
import android.view.View;
import android.view.ViewGroup;
import android.widget.TextView;

import java.util.ArrayList;
import java.util.List;

import androidx.annotation.NonNull;
import androidx.recyclerview.widget.RecyclerView;

public class NewsAdapter extends RecyclerView.Adapter<NewsAdapter.ViewHolder> {
    List<News> newslist = new ArrayList<>();
    LayoutInflater inflater;
    Context context;


    @Override
    public NewsAdapter.ViewHolder onCreateViewHolder(@NonNull ViewGroup parent, int viewType) {
        View view = inflater.inflate(R.layout.newsitem,null);
        ViewHolder viewHolder = new ViewHolder(view);
        return viewHolder;
    }

    @Override
    public void onBindViewHolder(@NonNull NewsAdapter.ViewHolder holder, int position) {
        News news = newslist.get(position);
        int pos = position+1;
        holder.title.setText(news.getTitle());
        holder.date.setText(news.getDate());
        holder.head.setText("公告"+pos);
    }

    @Override
    public int getItemCount() {
        return newslist.size();
    }

    public class ViewHolder extends RecyclerView.ViewHolder {
        TextView head;
        TextView title;
        TextView date;
        public ViewHolder(@NonNull View itemView) {
            super(itemView);
            head = itemView.findViewById(R.id.head);
            date =itemView.findViewById(R.id.date_text);
            title = itemView.findViewById(R.id.title_text);
        }
    }

    public NewsAdapter(Context context,List<News> newslist){
        this.newslist = newslist;
        this.context = context;
        inflater =LayoutInflater.from(context);
    }
}

 

NewsJsoup(在這個中去爬取數據)(按照我們上面分析的位置去爬取,具體用法就按下面去做)

package com.example.jsouptest;

import android.util.Log;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class NewsJsoup {
    private String api = "http://www.lib.wust.edu.cn/bullet/bullet.aspx";
    private static final String TAG = "NewsJsoup";
    List<News> newslist = new ArrayList<>();

    public List<News> GetNews() throws IOException {
        Connection connection = Jsoup.connect(api);
        connection.timeout(5000);
        Document document = connection.get();
        Element element = document.getElementById("content").select("tr").get(0)
                .select("td").get(1).select("div").get(3);
        Elements elements = element.children();
        for (int i=0;i<elements.size();i++){
            Element element1 = elements.get(i);
            String text = element1.select("div").get(1).text().replace("»","");
            String date = element1.select("div").get(2).text();
            News news = new News();
            news.setTitle(text);
            news.setDate(date);
            newslist.add(news);
        }
        return newslist;
    }

}

 

MainActivity:(注意網絡請求要放在子線程中,ui顯示又要返回主線程中)

package com.example.jsouptest;

import androidx.appcompat.app.AppCompatActivity;
import androidx.appcompat.widget.Toolbar;
import androidx.recyclerview.widget.LinearLayoutManager;
import androidx.recyclerview.widget.RecyclerView;

import android.content.Context;
import android.os.Bundle;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class MainActivity extends AppCompatActivity {
    private RecyclerView recyclerView;
    Toolbar toolbar;
    private List<News> news = new ArrayList<>();
    Context context = MainActivity.this;


    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        recyclerView = findViewById(R.id.student_list);
        toolbar = findViewById(R.id.toolbar);
        InitData();
    }

    private void InitData() {
        new Thread(new Runnable() {
            @Override
            public void run() {
                try {
                    NewsJsoup newsJsoup = new NewsJsoup();
                    news = newsJsoup.GetNews();
                    runOnUiThread(new Runnable() {
                        @Override
                        public void run() {
                            InitRecyclerView();
                        }
                    });
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }).start();

    }

    private void InitRecyclerView(){
        LinearLayoutManager linearLayoutManager = new LinearLayoutManager(this);
        recyclerView.setLayoutManager(linearLayoutManager);
        NewsAdapter newsAdapter = new NewsAdapter(context,news);
        recyclerView.setAdapter(newsAdapter);
    }

}

別忘了對於網絡的權限設置(這裏是安卓9的網絡權限)

新建一個叫network_security_config.xml的actions xml

<?xml version="1.0" encoding="utf-8"?>
<network-security-config>
<base-config cleartextTrafficPermitted="true">
    <trust-anchors>
        <certificates src="system" overridePins="true" />
        <certificates src="user" overridePins="true" />
    </trust-anchors>
</base-config>
</network-security-config>

 然後在manifests中

    <uses-permission android:name="android.permission.INTERNET" />

在applications中:

android:networkSecurityConfig="@xml/network_security_config"

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章