基于运营工作要求,需要对百度地图和高德地图,基于【关键词】的搜索结果进行爬取,整理
需求说明
Python爬虫,自动化实现【高德地图】和【百度地图】的【关键词搜索结果】爬取。
需求实现
- 实现方式:官方【API】
- 初始流程
代码实现
百度地图
#! /usr/bin/env python
# -*- coding:utf-8 -*-
# Usage:基于【百度地图】官方API接口,自动爬取【关键词】搜索结果
import json
import openpyxl
import requests
def get_result_pages():
params = {
"query": search_key_word,
"region": city_name,
"output": "json",
"page_size": 20,
"ak": ak,
}
response = requests.get(url=api_url, params=params)
if response.status_code == 200:
json_dic = json.loads(response.text)
return int(json_dic['total']) // 20 + 1
else:
return 1
def get_search_result(page):
params = {
"query": search_key_word,
"region": city_name,
"output": "json",
"page_size": 20,
"page_num": page,
"ak": ak,
}
response = requests.get(url=api_url, params=params)
if response.status_code == 200:
single_page_dic = json.loads(response.text)
if single_page_dic['results']:
for shop_item in single_page_dic['results']:
entire_shop_information_list.append(shop_item)
else:
return
def write_local_file():
workbook = openpyxl.Workbook()
sheet = workbook.create_sheet(city_name, 0)
title_list = ["省份名称", "城市名称", "区域名称", "店铺名称", "联系方式", "店铺地址", ]
sheet.append(title_list)
row_num, column_num = 2, 2
for single_shop_information in entire_shop_information_list:
sheet.cell(row=row_num, column=1).value = single_shop_information['province']
sheet.cell(row=row_num, column=2).value = single_shop_information['city']
sheet.cell(row=row_num, column=3).value = single_shop_information['area']
sheet.cell(row=row_num, column=4).value = single_shop_information['name']
try:
sheet.cell(row=row_num, column=5).value = single_shop_information['telephone']
except:
sheet.cell(row=row_num, column=5).value = 'None'
sheet.cell(row=row_num, column=6).value = single_shop_information['address']
row_num += 1
workbook.save(r'C:\Users\guoshuang\Desktop\【{0}】{1}.xlsx'.format(city_name, search_key_word))
if __name__ == '__main__':
ak = r'NNsD7E9scB982HCOslP0AszqIWwAAXzb'
city_name, search_key_word, entire_shop_information_list = '济南', '电脑维修', []
api_url = r'https://api.map.baidu.com/place/v2/search'
page_number = get_result_pages()
for page in range(0, page_number + 1):
get_search_result(page)
write_local_file()
高德地图
#! /usr/bin/env python
# -*- coding:utf-8 -*-
# Usage:基于【高德地图】官方API接口,自动爬取【关键词】搜索结果
# 功能优化:
# 1. 城市名称采用列表形式,支持多城市遍历获取
# 2. Excel文件改为在当前目录下生成,规避多电脑保存路径不同的问题
# 3. 获取页码函数针对【API次数超限】做兼容,规避超限后无法获取返回内容导致的KeyError错误
import json
import openpyxl
import requests
def get_result_pages(city):
search_url = r'https://restapi.amap.com/v3/place/text?keywords={0}&city={1}&offset=20&key={2}&extensions=all' \
.format(search_key_word, city, amap_web_key)
response = requests.get(search_url)
if response.status_code == 200:
json_dic = json.loads(response.text)
try:
return int(json_dic['count']) // 20 + 1
except KeyError:
exit('当日API使用次数已达上限')
else:
return 1
def get_search_result(page, city):
search_url = r'https://restapi.amap.com/v3/place/text?keywords={0}&city={1}&offset=20&page={2}&key={3}&' \
r'extensions=all'.format(search_key_word, city, page, amap_web_key)
response = requests.get(search_url)
if response.status_code == 200:
single_page_dic = json.loads(response.text)
if single_page_dic['pois']:
for item in single_page_dic['pois']:
entire_shop_information_list.append(item)
else:
return
def write_local_file():
workbook = openpyxl.Workbook()
sheet = workbook.create_sheet(search_key_word, 0)
title_list = ['城市编码', "区域编码", "省份名称", "城市名称", "区域名称", "店铺名称", "联系方式", "店铺地址", ]
sheet.append(title_list)
row_num, column_num = 2, 2
for single_shop_basic_information in entire_shop_information_list:
sheet.cell(row=row_num, column=1).value = single_shop_basic_information['citycode']
sheet.cell(row=row_num, column=2).value = single_shop_basic_information['adcode']
sheet.cell(row=row_num, column=3).value = single_shop_basic_information['pname']
sheet.cell(row=row_num, column=4).value = single_shop_basic_information['cityname']
sheet.cell(row=row_num, column=5).value = single_shop_basic_information['adname']
sheet.cell(row=row_num, column=6).value = single_shop_basic_information['name']
if single_shop_basic_information['tel']:
sheet.cell(row=row_num, column=7).value = single_shop_basic_information['tel']
else:
sheet.cell(row=row_num, column=7).value = 'None'
try:
sheet.cell(row=row_num, column=8).value = single_shop_basic_information['address']
except ValueError:
sheet.cell(row=row_num, column=8).value = 'None'
row_num += 1
workbook.save(r'.\【AMAP】{0}.xlsx'.format(search_key_word))
if __name__ == '__main__':
amap_web_key = r'46ce87666577f0c206dcbdfa5275c405'
city_name_list = ['邯郸市']
search_key_word = '电脑维修'
entire_page_dic, entire_shop_information_list = {}, []
for city_name in city_name_list:
page_number = get_result_pages(city_name)
for page in range(page_number + 1):
get_search_result(str(page), city_name)
write_local_file()
爬取结果
保存到【桌面】Excel。例如高德地图爬取【长春】电脑维修关键词的结果
本文为原创文章,撰写发布者:GSolaris,转载请注明出处:https://www.blissfulcandy.com/index.php/2023/07/19/pythonmapcrawler/