为便捷使用高德地图的Web服务 API的驾车路线规划,博主写了这样一个ReadAmapApi类,用于达成以下效果:
- 实现良好预处理,尽可能多地挖掘返回Json中的有效信息;
- 不产生令人眼前一黑的、浪费额度的bug;
- 稳定、易用、易复用。
基类ReadAmapApi:
Python
import re
import pandas as pd
import requests
from tqdm import tqdm
import time
class ReadAmapApi(object):
def __init__(self, patterns, order, url, parameters):
self.ptn_paths = re.compile(patterns[0])
self.ptn_path_items = re.compile(patterns[1])
self.ptn_roads = re.compile(patterns[2])
self.ptn_road_items = re.compile(patterns[3])
self.ptn_tmcs = re.compile(patterns[4])
self.order = order # v3和v5有些次序不一样,需要调整
self.url = url
self.parameters = parameters
def read_result(self, text):
paths = self.ptn_paths.findall(text)
paths_dict = {}
path_cnt = 0
action_odr, tmcs_odr, tmc_status_odr, tmc_distance_odr = self.order
for path in paths:
path_items = self.ptn_path_items.findall(path)
if not path_items:
continue
path_items_dict = {
'distance': int(path_items[0][0]),
'duration': int(path_items[0][1]),
'tolls': int(path_items[0][2]),
'traffic_lights': int(path_items[0][3]),
}
roads = self.ptn_roads.findall(path)
road_items_dict = {
'actions': [],
'tmcs': {tmc: 0 for tmc in ('未知', '畅通', '缓行', '拥堵', '严重拥堵')},
'districts': [],
}
for road in roads:
road_items = self.ptn_road_items.findall(road)
if road_items:
road_items_dict['actions'].append(road_items[0][action_odr])
tmcs = self.ptn_tmcs.findall(road_items[0][tmcs_odr])
for tmc in tmcs:
road_items_dict['tmcs'][tmc[tmc_status_odr]] += int(tmc[tmc_distance_odr])
road_items_dict['districts'].append(road_items[0][2])
paths_dict = paths_dict | {path_cnt: path_items_dict | road_items_dict}
path_cnt += 1
return paths_dict
def get_single_result(self, o_coord, d_coord):
loc_dict = {
'origin': o_coord,
'destination': d_coord,
}
parameters = self.parameters | loc_dict
response_text = requests.get(self.url, parameters).text
paths = self.read_result(response_text)
return paths
def get_results(
self,
df,
o_lon_col='o_lon',
o_lat_col='o_lat',
d_lon_col='d_lon',
d_lat_col='d_lat',
iters_limit=5000, # 防超限额
query_delay=0.35, # 防超并发
save_path=''
):
result_df = pd.DataFrame(columns=df.columns.tolist() + ['chosen_path', 'alter_path_1', 'alter_path_2'])
iters_cnt = 0
try:
for _ in tqdm(range(len(df))):
o_coord = f'{round(df.loc[_, o_lon_col], 6)},{round(df.loc[_, o_lat_col], 6)}'
d_coord = f'{round(df.loc[_, d_lon_col], 6)},{round(df.loc[_, d_lat_col], 6)}'
paths_dict = self.get_single_result(o_coord, d_coord)
path_list = [paths_dict[_] for _ in range(len(paths_dict))] + [{} for __ in range(3 - len(paths_dict))]
result_df.loc[len(result_df)] = df.loc[_].tolist() + path_list
time.sleep(query_delay)
iters_cnt += 1
if iters_cnt == iters_limit:
break
except Exception as e: # 防致命error,比如断网(别问我怎么发现的)
print(f'An error occurred: {e}')
if save_path:
result_df.to_excel(save_path, index=False)
print(f'Partial results saved to {save_path}')
return result_df
if save_path:
result_df.to_excel(save_path, index=False)
return result_df
两个子类ReadAmapApiV3
和ReadAmapApiV5
,分别对应路径规划、路径规划2.0的API:
Python
class ReadAmapApiV3(ReadAmapApi): # 路径规划
def __init__(self, key):
super().__init__(
patterns=(
r'(\{"dist.*?}]}]}]}]})',
r'"distance":"(.*?)","duration":"(.*?)","strategy.*?tolls":"(.*?)","toll_distance.*?restriction.*?traffic_lights":"(.*?)"',
r'(\{"road_name.*?}]}]}]})',
r'"action":"(.*?)".*?tmcs":(.*?}]).*?districts.*?name":"(.*?)"',
r'lcode.*?distance":"(.*?)","status":"(.*?)"',
),
order=(0, 1, 1, 0),
url='https://restapi.amap.com/v3/direction/driving?parameters',
parameters={
'key': key,
'strategy': 10, # 默认策略
'ferry': 1, # 不使用轮渡
'roadaggregation': 'true', # 返回路径聚合信息
'extensions': 'all', # 返回全部信息
},
)
class ReadAmapApiV5(ReadAmapApi): # 路径规划2.0
def __init__(self, key):
super().__init__(
patterns=(
r'(\{"dist.*?}]}]}]})',
r'"distance":"(.*?)".*?duration":"(.*?)","tolls":"(.*?)".*?traffic_lights":"(.*?)"',
r'(\{"instruction.*?}]}]})',
r'"tmcs":(.*?}]).*?action":"(.*?)".*?districts.*?name":"(.*?)"',
r'"tmc_status":"(.*?)","tmc_distance":"(.*?)"',
),
order=(1, 0, 0, 1),
url='https://restapi.amap.com/v5/direction/driving?parameters',
parameters={
'key': key,
'strategy': 32, # 默认策略
'ferry': 1, # 不使用轮渡
'show_fields': 'cost,tmcs,navi,cities', # 返回需要的信息
},
)
使用方法示例:
Python
from utils import ReadAmapApiV3
import pandas as pd
def query_operate():
'''
对df的columns的要求:包含索引列和经纬度列(起、讫点各2个,共4个)
假设这里的df.columns包含'id', 'o_lon', 'o_lat', 'd_lon', 'd_lat'
'''
df = pd.read_excel('example.xlsx')
raa = ReadAmapApiV3(key='examplekey') # 你自己的api key
'''
1. 由于df的经纬度列的列名和ReadAmapApi中的默认值完全一致,因而不再特意赋值
2. get_results()有DataFrame类型的返回值
3. save_path默认为空,不为空时将上述DataFrame输出到excel(只允许填excel路径)
'''
# raa.get_results(df, save_path='result.xlsx')
result_df = raa.get_results(df)
'''
返回DataFrame的每一行除了原先df中的元素外,还增加了至多3个格式相同的路径数据:
'chosen_path': 高德推荐1st的路径数据
'alter_path_1', 'alter_path_2': 高德推荐2nd、3rd的路径数据
'''
print(result_df.loc[0])
if __name__ == '__main__':
query_operate()
路径数据为字典形式(如果是从文件读取,可通过ast.literal_eval来解析),一份示例数据如下:
Python
{
'distance': 28744,
'duration': 1849,
'tolls': 4,
'traffic_lights': 5,
'actions': ['靠左', '减速行驶', '靠左', '直行', '靠左', '直行', '向右前方行驶', '左转'],
'tmcs': {'未知': 19, '畅通': 21249, '缓行': 178, '拥堵': 0, '严重拥堵': 0},
'districts': ['浦东新区', '浦东新区', '浦东新区', '浦东新区', '浦东新区', '杨浦区', '静安区', '宝山区']
}
从中可提取出行距离、出行时耗、收费、通过红绿灯数、方向变换、按长度的拥堵分布、途径行政区等信息,且这样的数据一个请求至多有3份,信息十分丰富,方便用于其他研究。
值得注意的是,由于高德路网是动态变化的,因而同一个路线规划请求在不同时间点的结果大概率不一样,实际使用时应考虑控制这一影响。