为便捷使用高德地图的Web服务 API的驾车路线规划,博主写了这样一个ReadAmapApi类,用于达成以下效果:

  • 实现良好预处理,尽可能多地挖掘返回Json中的有效信息;
  • 不产生令人眼前一黑的、浪费额度的bug;
  • 稳定、易用、易复用。

基类ReadAmapApi:

Python
import re
import pandas as pd
import requests
from tqdm import tqdm
import time


class ReadAmapApi(object):

    def __init__(self, patterns, order, url, parameters):
        self.ptn_paths = re.compile(patterns[0])
        self.ptn_path_items = re.compile(patterns[1])
        self.ptn_roads = re.compile(patterns[2])
        self.ptn_road_items = re.compile(patterns[3])
        self.ptn_tmcs = re.compile(patterns[4])
        self.order = order  # v3和v5有些次序不一样,需要调整
        self.url = url
        self.parameters = parameters

    def read_result(self, text):
        paths = self.ptn_paths.findall(text)
        paths_dict = {}
        path_cnt = 0
        action_odr, tmcs_odr, tmc_status_odr, tmc_distance_odr = self.order
        for path in paths:
            path_items = self.ptn_path_items.findall(path)
            if not path_items:
                continue
            path_items_dict = {
                'distance': int(path_items[0][0]),
                'duration': int(path_items[0][1]),
                'tolls': int(path_items[0][2]),
                'traffic_lights': int(path_items[0][3]),
            }
            roads = self.ptn_roads.findall(path)
            road_items_dict = {
                'actions': [],
                'tmcs': {tmc: 0 for tmc in ('未知', '畅通', '缓行', '拥堵', '严重拥堵')},
                'districts': [],
            }
            for road in roads:
                road_items = self.ptn_road_items.findall(road)
                if road_items:
                    road_items_dict['actions'].append(road_items[0][action_odr])
                    tmcs = self.ptn_tmcs.findall(road_items[0][tmcs_odr])
                    for tmc in tmcs:
                        road_items_dict['tmcs'][tmc[tmc_status_odr]] += int(tmc[tmc_distance_odr])
                    road_items_dict['districts'].append(road_items[0][2])
            paths_dict = paths_dict | {path_cnt: path_items_dict | road_items_dict}
            path_cnt += 1
        return paths_dict

    def get_single_result(self, o_coord, d_coord):
        loc_dict = {
            'origin': o_coord,
            'destination': d_coord,
        }
        parameters = self.parameters | loc_dict
        response_text = requests.get(self.url, parameters).text
        paths = self.read_result(response_text)
        return paths

    def get_results(
            self,
            df,
            o_lon_col='o_lon',
            o_lat_col='o_lat',
            d_lon_col='d_lon',
            d_lat_col='d_lat',
            iters_limit=5000,  # 防超限额
            query_delay=0.35,  # 防超并发
            save_path=''
    ):
        result_df = pd.DataFrame(columns=df.columns.tolist() + ['chosen_path', 'alter_path_1', 'alter_path_2'])
        iters_cnt = 0
        try:
            for _ in tqdm(range(len(df))):
                o_coord = f'{round(df.loc[_, o_lon_col], 6)},{round(df.loc[_, o_lat_col], 6)}'
                d_coord = f'{round(df.loc[_, d_lon_col], 6)},{round(df.loc[_, d_lat_col], 6)}'
                paths_dict = self.get_single_result(o_coord, d_coord)
                path_list = [paths_dict[_] for _ in range(len(paths_dict))] + [{} for __ in range(3 - len(paths_dict))]
                result_df.loc[len(result_df)] = df.loc[_].tolist() + path_list
                time.sleep(query_delay)
                iters_cnt += 1
                if iters_cnt == iters_limit:
                    break
        except Exception as e:  # 防致命error,比如断网(别问我怎么发现的)
            print(f'An error occurred: {e}')
            if save_path:
                result_df.to_excel(save_path, index=False)
                print(f'Partial results saved to {save_path}')
            return result_df

        if save_path:
            result_df.to_excel(save_path, index=False)

        return result_df
        

两个子类ReadAmapApiV3ReadAmapApiV5,分别对应路径规划、路径规划2.0的API:

Python
class ReadAmapApiV3(ReadAmapApi):  # 路径规划

    def __init__(self, key):
        super().__init__(
            patterns=(
                r'(\{"dist.*?}]}]}]}]})',
                r'"distance":"(.*?)","duration":"(.*?)","strategy.*?tolls":"(.*?)","toll_distance.*?restriction.*?traffic_lights":"(.*?)"',
                r'(\{"road_name.*?}]}]}]})',
                r'"action":"(.*?)".*?tmcs":(.*?}]).*?districts.*?name":"(.*?)"',
                r'lcode.*?distance":"(.*?)","status":"(.*?)"',
            ),
            order=(0, 1, 1, 0),
            url='https://restapi.amap.com/v3/direction/driving?parameters',
            parameters={
                'key': key,
                'strategy': 10,  # 默认策略
                'ferry': 1,  # 不使用轮渡
                'roadaggregation': 'true',  # 返回路径聚合信息
                'extensions': 'all',  # 返回全部信息
            },
        )


class ReadAmapApiV5(ReadAmapApi):  # 路径规划2.0

    def __init__(self, key):
        super().__init__(
            patterns=(
                r'(\{"dist.*?}]}]}]})',
                r'"distance":"(.*?)".*?duration":"(.*?)","tolls":"(.*?)".*?traffic_lights":"(.*?)"',
                r'(\{"instruction.*?}]}]})',
                r'"tmcs":(.*?}]).*?action":"(.*?)".*?districts.*?name":"(.*?)"',
                r'"tmc_status":"(.*?)","tmc_distance":"(.*?)"',
            ),
            order=(1, 0, 0, 1),
            url='https://restapi.amap.com/v5/direction/driving?parameters',
            parameters={
                'key': key,
                'strategy': 32,  # 默认策略
                'ferry': 1,  # 不使用轮渡
                'show_fields': 'cost,tmcs,navi,cities',  # 返回需要的信息
            },
        )

使用方法示例:

Python
from utils import ReadAmapApiV3
import pandas as pd


def query_operate():
    '''
    对df的columns的要求:包含索引列和经纬度列(起、讫点各2个,共4个)
        假设这里的df.columns包含'id', 'o_lon', 'o_lat', 'd_lon', 'd_lat'
    '''
    df = pd.read_excel('example.xlsx')
    raa = ReadAmapApiV3(key='examplekey')  # 你自己的api key
    '''
    1. 由于df的经纬度列的列名和ReadAmapApi中的默认值完全一致,因而不再特意赋值
    2. get_results()有DataFrame类型的返回值
    3. save_path默认为空,不为空时将上述DataFrame输出到excel(只允许填excel路径)
    '''
    # raa.get_results(df, save_path='result.xlsx')
    
    result_df = raa.get_results(df)
    '''
    返回DataFrame的每一行除了原先df中的元素外,还增加了至多3个格式相同的路径数据:
        'chosen_path': 高德推荐1st的路径数据
        'alter_path_1', 'alter_path_2': 高德推荐2nd、3rd的路径数据
    '''
    print(result_df.loc[0])
    

if __name__ == '__main__':
    query_operate()

路径数据为字典形式(如果是从文件读取,可通过ast.literal_eval来解析),一份示例数据如下:

Python
{
  'distance': 28744, 
  'duration': 1849, 
  'tolls': 4, 
  'traffic_lights': 5, 
  'actions': ['靠左', '减速行驶', '靠左', '直行', '靠左', '直行', '向右前方行驶', '左转'], 
  'tmcs': {'未知': 19, '畅通': 21249, '缓行': 178, '拥堵': 0, '严重拥堵': 0}, 
  'districts': ['浦东新区', '浦东新区', '浦东新区', '浦东新区', '浦东新区', '杨浦区', '静安区', '宝山区']
}

从中可提取出行距离、出行时耗、收费、通过红绿灯数、方向变换、按长度的拥堵分布、途径行政区等信息,且这样的数据一个请求至多有3份,信息十分丰富,方便用于其他研究。

值得注意的是,由于高德路网是动态变化的,因而同一个路线规划请求在不同时间点的结果大概率不一样,实际使用时应考虑控制这一影响。