PHP 抓取 國家統計局全國省市區鎮四級行政區數據文章有數據庫表下載地址

數據庫下載地址
https://download.csdn.net/download/u012607165/10534630

一級行政區: 省份
<?php
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html');
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        $data = curl_exec($curl);
        curl_close($curl);
        $data = mb_convert_encoding($data, 'UTF-8', 'GBK');

        // 裁頭
        $offset = mb_strpos($data, 'provincetr',2000,'GBK');
        $data = mb_substr($data, $offset,NULL,'GBK');

        // 裁尾
        $offset = mb_strpos($data, '</TABLE>', 200,'GBK');
        $data = mb_substr($data, 0, $offset,'GBK');
        preg_match_all('/\d{2}|[\x7f-\xff]+/', $data, $out);
        $out = $out[0]; 

        var_dump($out);

二級行政區 城市
<?php
    public function add()
    {
        $curl = curl_init();
        curl_setopt($curl, CURLOPT_URL, 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/index.html');
        curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
        $data = curl_exec($curl);
        curl_close($curl);
        $data = mb_convert_encoding($data, 'UTF-8', 'GBK');

        // 裁頭
        $offset = mb_strpos($data, 'provincetr',2000,'GBK');
        $data = mb_substr($data, $offset,NULL,'GBK');

        // 裁尾
        $offset = mb_strpos($data, '</TABLE>', 200,'GBK');
        $data = mb_substr($data, 0, $offset,'GBK');
        preg_match_all('/\d{2}|[\x7f-\xff]+/', $data, $out);
        $out = $out[0];  // 省份


        $b = 0;
        $time = time();
        for ($i = 0; $i < count($out); $i++) {

            $curl = curl_init();
            curl_setopt($curl, CURLOPT_URL, 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/' . $out[$i++] . '.html');
            curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
            $data = curl_exec($curl);
            curl_close($curl);
            $data = mb_convert_encoding($data, 'UTF-8', 'GBK');

            // 裁頭
            $offset = mb_strpos($data, 'citytr',2000,'GBK');
            $data = mb_substr($data, $offset,NULL,'GBK');

            // 裁尾
            $offset = mb_strpos($data, '</TABLE>', 200,'GBK');
            $data = mb_substr($data, 0, $offset,'GBK');

            preg_match_all('/\d{12}|[\x7f-\xff]+/', $data, $city);
            $city = $city[0];
            // 某個省份的城市


            var_dump($city);
            echo ++$b;

            $list = [];
            for ($j=0; $j < count($city) ; $j++) {
                $list[] = [
                    'code'  => $city[$j],
                    'name'  => $city[++$j],
                    'create_time' => $time,
                    'update_time' => $time,
                ];
            }
            Db::table('city')->insertAll($list);  // 城市
            /*
            $data = [
                'code' => '1sfds',
                'name' => 'test',
                'create_time' => time(),
                'update_time' => time(),
            ];
            $list = [
                $data, $data, $data
            ];
            */
            // return Db::table('province')->insertAll($list);
        }

        // return Db::table('province')->insertAll($list);
    } 

三級行政區

<?php  
        $code_list = Db::table('city')->column('code');
        $time = time();
        foreach ($code_list as $key => $code) {
            $code = substr($code, 0,4);
            $curl = curl_init();
            curl_setopt($curl, CURLOPT_URL, 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'. substr($code, 0,2) . '/' . $code . '.html');
            curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
            $data = curl_exec($curl);
            curl_close($curl);
            $data = mb_convert_encoding($data, 'UTF-8', 'GBK');
            // 裁頭
            $offset = @mb_strpos($data, 'countytr',2000,'GBK');
            if (!$offset) {
                continue;
            }
            $data = mb_substr($data, $offset,NULL,'GBK');

            // 裁尾
            $offset = mb_strpos($data, '</TABLE>', 200,'GBK');
            $data = mb_substr($data, 0, $offset,'GBK');
            preg_match_all('/\d{12}|[\x7f-\xff]+/', $data, $out);
            $out = $out[0]; // 某個城市
            $list = [];
            for ($j=0; $j < count($out) ; $j++) {
                $list[] = [
                    'code'  => $out[$j],
                    'name'  => $out[++$j],
                    'create_time' => $time,
                    'update_time' => $time,
                ];
            }
            Db::table('county')->insertAll($list);
        } 


四級行政區:鄉村或街道

<?php

 $code_list = Db::table('county')->column('code');


        $time = time();
        foreach ($code_list as $key => $code) {
            $code = substr($code, 0,6);
            $curl = curl_init();
            curl_setopt($curl, CURLOPT_URL, 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2017/'. substr($code, 0,2) . '/' . substr($code,2, 2) . '/' . $code . '.html');
            curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1);
            $data = curl_exec($curl);
            curl_close($curl);
            $data = mb_convert_encoding($data, 'UTF-8', 'GBK');
            // 裁頭
            $offset = @mb_strpos($data, 'towntr',2000,'GBK');
            if (!$offset) {
                continue;
            }
            $data = mb_substr($data, $offset,NULL,'GBK');

            // 裁尾
            $offset = mb_strpos($data, '</TABLE>', 200,'GBK');
            $data = mb_substr($data, 0, $offset,'GBK');
            preg_match_all('/\d{12}|[\x7f-\xff]+/', $data, $out);
            $out = $out[0];
            $list = [];
            for ($j=0; $j < count($out) ; $j++) {
                $list[] = [
                    'code'  => $out[$j],
                    'name'  => $out[++$j],
                    'create_time' => $time,
                    'update_time' => $time,
                ];
            }
            Db::table('town')->insertAll($list);
        } 


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章