php智能識別收貨地址信息

功能需求:用戶輸入混合的收貨地址,能智能識別出地址,手機,姓名

準備:需要兩張表,一張地區表和一張姓氏表 (地區表得到應該不難,姓氏表我是搜索中國姓氏自制的哈,底部會附上表結構)

  • 思路:主要思路分兩種,一種是用戶正常輸入全地址,則順序按地區等級匹配地址;另一種用戶非正常輸入(省市區有缺少的),則全面模糊搜索表,再根據結果對比原地址。

  • 提醒:手機可以根據自己需求修改正則;
    名字只匹配中文,可以根據自己的需求修改姓氏表以及正則
    地址匹配暫無發現問題

代碼:

<?php
class DistinguishAddress {

/**
 * 類的入口方法
 * 傳入地址信息自動識別,並返回最高匹配結果
 * @param $address
 */
function getAddressResult($address){
    // 優先第一種方法
    $result = $this->getAddressArrar($address);

    // 如果結果不理想,再模糊去匹配
    if($result['level'] != 3){
        $result_sub = $this->addressVague($address);
        // 只有全匹配對才替換,否則不做任何改變
        if($result_sub['level'] == 3){
            $result = $result_sub;
        }
    }

    // 聯繫方式-優先匹配電話
    if(preg_match('/1\d{10}/', $address, $mobiles)){ // 手機
        $result['mobile'] = $mobiles[0];
    } else if(preg_match('/(\d{3,4}-)?\d{7,8}/', $address, $mobiles)){ // 固定電話
        $result['mobile'] = $mobiles[0];
    }

    // 識別姓名-必須空格分享的--概率
    preg_match_all('/[\x{4e00}-\x{9fa5}]{2,}/iu', $address,$names);
    if($names){
        $name_where = '';
        foreach ($names[0] as $name){
            // 必須是大於1個字符且小於5個字符的
            if(1 < mb_strlen($name,'utf-8') && mb_strlen($name, 'utf-8') < 5){
                $sub_name = mb_substr($name, 0, 1, 'utf-8');
                $name_where .= "name like '{$sub_name}%' or ";
            }
        }
        if(!empty($name_where)){
            $name_where = substr($name_where, 0, -3);
            $names_sql = "select name from surname where {$name_where} order by sort desc";
            $list = Db::getInstance('DbTrade')->getAll($names_sql);

            // 統計有多少種可能性-姓名
            $result['name_num'] = count($list);

            if($list) {
                $name_first = $list[0]['name'];

                foreach ($names[0] as $name){
                    $len = mb_strlen($name_first, 'utf-8');
                    if (mb_substr($name, 0, $len, 'utf-8') == $name_first){
                        $result['name'] = $name;
                    }
                }
            }
        }
    }

    // 去掉詳細裏面的姓名和電話
    $result['info'] = str_replace($result['mobile'], '', $result['info']);
    $result['info'] = str_replace($result['name'], '', $result['info']);
    $result['info'] = $result['province']['region_name'] . $result['city']['region_name'] . $result['district']['region_name'] . $result['info'];


    return $this->getCityLevelList($result);
}

/**
 * 獲取對應城市等級列表
 */
function getCityLevelList($result){
    // 獲取所有地址遞歸列表
    $regions = $this->getRegionTreeList();
    // 獲取省份列表- 只有存在值才返回對應列表
    $province_id = $result['province']['region_id'];
    if ($province_id) {
        foreach ($regions as $region){
            unset($region['childs']);
            $result['province_list'][] = $region;
        }
    }
    // 獲取城市列表- 只有存在值才返回對應列表
    $city_id = $result['city']['region_id'];
    if ($city_id) {
        foreach ($regions[$province_id]['childs'] as $region){
            unset($region['childs']);
            $result['city_list'][] = $region;
        }
    }
    // 獲取地區列表- 只有存在值才返回對應列表

    $district_id = $result['district']['region_id'];
    if ($district_id) {
        foreach ($regions[$province_id]['childs'][$city_id]['childs'] as $region){
            unset($region['childs']);
            $result['district_list'][] = $region;
        }
    }

    return $result;
}

/**
 * 獲取所有地址遞歸列表
 */
function getRegionTreeList(){
    // IO
    $file_name = 'regions.json';
    if(is_file($file_name)){
        $regions = file_get_contents($file_name);
        $regions = json_decode($regions, true);
    } else {
        $region_sql = "select region_id,region_name,parent_id from region";
        $regions = Db::getInstance('DbTrade')->getAll($region_sql);
        $regions = $this->arrayKey($regions);
        file_put_contents($file_name, json_encode($regions));
    }
    return $regions;
}

/**
 * 第一種方法
 * 根據地址列表遞歸查找準確地址
 * @param $address
 * @return array
 */
function getAddressArrar($address){
    // 獲取所有地址遞歸列表
    $regions = $this->getRegionTreeList();
    // 初始化數據
    $province = $city = $district = array();

    // 先查找省份-第一級地區
    $province = $this->checkAddress($address, $regions);
    if($province){
        // 查找城市-第二級地區
        $city = $this->checkAddress($address, $province['list']);
        if($city){
            // 查找地區-第三級地區
            $district = $this->checkAddress($address, $city['list'],true);
        }
    }

    return $this->getAddressInfo($address, $province, $city, $district);
}

    /**
     * 第二種方法
     * 地址模糊查找
     */
function addressVague($address){
    $res = preg_match_all('/\S{2}[自市區鎮縣鄉島州]/iu', $address,$arr);
    if(!$res) return false;

    $where = ' where ';
    foreach ($arr[0] as $value){
        $where .= "region_name like '%{$value}' or ";
    }
    $where = substr($where,0,-3);

    $region_sql = "select region_id,region_name,parent_id,region_type from region " . $where;
    $citys = $GLOBALS['db']->getAll($region_sql);

    // 匹配所有地址
    $result = array();
    foreach ($citys as &$city){
        // 所有相關聯的地區id
        $city_ids = array();

        if($city['region_type'] == 2) {
            $city_ids = array($city['parent_id'], $city['region_id']);

            // 嘗試能不能匹配第三級
            $region_sql = "select region_id,region_name,parent_id,region_type,left(region_name,2) as ab_name from region where parent_id='{$city['region_id']}'" ;
            $areas = $GLOBALS['db']->getAll($region_sql);
            foreach ($areas as $row){
                if(mb_strpos($address,$row['ab_name'])){
                    $city_ids[] = $row['region_id'];
                }
            }
        } else if($city['region_type'] == 3){
            $region_sql = "select parent_id from region where region_id='{$city['parent_id']}'" ;
            $city['province_id'] = $GLOBALS['db']->getOne($region_sql);
            $city_ids = array($city['parent_id'], $city['region_id'], $city['province_id']);
        }

        // 查找該單詞所有相關的地區記錄
        $where = " where region_id in(" . join(',', $city_ids) . ")";
        $region_sql = "select region_id,region_name,parent_id,region_type,left(region_name,2) as ab_name from region " . $where . ' order by region_id asc';
        $city_list = $GLOBALS['db']->getAll($region_sql);

        sort($city_ids);
        $key = array_pop($city_ids);
        $result[$key] = $city_list;
        sort($result);

    }

    if($result){
        list($province, $city, $area) = $result[0];
        return $this->getAddressInfo($address, $province, $city, $area);
    }

    return false;
}

/**
 * 匹配正確的城市地址
 * @param $address
 * @param $city_list
 * @param int $force
 * @param int $str_len
 * @return array
 */
function checkAddress($address, $city_list, $force=false, $str_len=2){
    $num = 0;
    $list = array();
    $result = array();

    // 遍歷所有可能存在的城市
    foreach ($city_list as $city_key=>$city){
        $city_name = mb_substr($city['region_name'], 0, $str_len,'utf-8');

        // 判斷是否存包含當前地址字符
        $city_arr = explode($city_name, $address);

        // 如果存在相關字眼,保存該地址的所有子地址
        if(count($city_arr) >= 2){

            // 必須名稱長度同時達到當前比對長度
            if(strlen($city['region_name']) < $str_len){
                continue;
            }

            $num ++;
            $list = $list + $city['childs'];

            $result[] =  array(
                'region_id' =>  $city['region_id'],
                'region_name' =>  $city['region_name'],
                'list'  =>$list,
            );
        }
    }


    // 如果有多個存在,則加大字符匹配長度
    if($num > 1 && $force){
        $region_name1 = $result[0]['region_name'];
        $region_name2 = $result[1]['region_name'];

        if(strlen($region_name1) == strlen($region_name2) && strlen($region_name1) == $str_len){
            $region_id1 =  $result[0]['region_id'];
            $region_id2 =  $result[1]['region_id'];
            $index = $region_id1 > $region_id2 ? 1 : 0;
            $result = $result[$index];
            return $result;
        }
        return $this->checkAddress($address, $city_list, $force, $str_len+1);
    } else {
        $result[0]['list'] = $list;
        return $result[0];
    }
}

/**
 * 根據原地址返回詳細信息
 * @param $address
 * @param $province
 * @param $city
 * @param $area
 * @return array
 */
function getAddressInfo($address, $province, $city, $district){
    // 查找最後出現的地址 - 截取詳細信息
    $find_str = '';
    if($province['region_name']){
        $find_str = $province['region_name'];
        if($city['region_name']){
            $find_str = $city['region_name'];
            if($district['region_name']){
                $find_str = $district['region_name'];
            }
        }
    }

    // 截取詳細的信息
    $find_str_len = mb_strlen($find_str,'utf-8');
    for($i=0; $i<$find_str_len-1; $i++){
        $substr = mb_substr($find_str,0,$find_str_len - $i, 'utf-8');
        $end_index = mb_strpos($address, $substr);
        if ($end_index){
            $address = mb_substr($address, $end_index + mb_strlen($substr) , mb_strlen($address) - $end_index);
        }
    }
    !empty($find_str) && $find_str = '|\S*' . $find_str;
    $area['info'] = preg_replace("/\s*|,|,|:|:{$find_str}/i", '', $address);

    $level = 0;
    if($district['region_name']){
        $level = 3;
    } else if($city['region_name']){
        $level = 2;
    } else if ($province['region_name']) {
        $level = 1;
    }

    return array(
        'province'  => array('region_id'=>$province['region_id'], 'region_name'=>$province['region_name']),
        'city'      =>  array('region_id'=>$city['region_id'], 'region_name'=>$city['region_name']),
        'district'      => array('region_id'=>$district['region_id'], 'region_name'=>$district['region_name']),
        'info'      => $area['info'],
        'level'     => $level,
    );
}

/**
 * 遞歸所有地址成無限分類數組
 * @param $data
 * @param int $region_id
 * @return array
 */
function arrayKey($data, $region_id=1){
    $result = array();
    foreach ($data as $row){
        if($region_id == $row['parent_id']){
            $key = $row['region_id'];
            $row['childs'] = $this->arrayKey($data, $row['region_id']);
            $result[$key] = $row;
        }
    }
    return $result;
}
}
?>

姓氏surname表(id,姓,優先匹配順序)

DROP TABLE IF EXISTS `surname`;
CREATE TABLE `surname`  (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `name` char(10) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `sort` int(11) NULL DEFAULT NULL,
  PRIMARY KEY (`id`) USING BTREE,
  INDEX `name`(`name`) USING BTREE,
  INDEX `sort`(`sort`) USING BTREE
) ENGINE = InnoDB AUTO_INCREMENT = 481 CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '姓氏表' ROW_FORMAT = Compact;

地址region表()

CREATE TABLE `region`  (
  `region_id` smallint(5) UNSIGNED NOT NULL AUTO_INCREMENT,
  `parent_id` smallint(5) UNSIGNED NOT NULL DEFAULT 0,
  `region_name` varchar(120) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL DEFAULT '',
  `region_type` tinyint(1) NOT NULL DEFAULT 2,
  `agency_id` smallint(5) UNSIGNED NOT NULL DEFAULT 0,
  PRIMARY KEY (`region_id`) USING BTREE,
  INDEX `parent_id`(`parent_id`) USING BTREE,
  INDEX `region_type`(`region_type`) USING BTREE,
) ENGINE = InnoDB AUTO_INCREMENT = 3956 AVG_ROW_LENGTH = 44 CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Compact;
  • 如若有疑問,互相討論學習!
  • 歡迎聯繫博文QQ946876689
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章