Python爬取地图关键词搜索结果

基于运营工作要求,需要对百度地图和高德地图,基于【关键词】的搜索结果进行爬取,整理

基于运营工作要求,需要对百度地图和高德地图,基于【关键词】的搜索结果进行爬取,整理

需求说明

Python爬虫,自动化实现【高德地图】和【百度地图】的【关键词搜索结果】爬取。

需求实现

  1. 实现方式:官方【API】
网站开发者注册Key管理搜索API说明
高德地图https://console.amap.com/dev/indexhttps://console.amap.com/dev/key/apphttps://lbs.amap.com/api/webservice/guide/api/search
百度地图https://lbs.baidu.com/apiconsole/authhttps://lbs.baidu.com/apiconsole/key#/homehttps://lbs.baidu.com/faq/api?title=webapi/guide/webservice-placeapi/district
  1. 初始流程
Python爬取地图关键词搜索结果
Python爬取地图关键词搜索结果

代码实现

百度地图

📎地点检索 _ 百度地图API SDK.pdf

#! /usr/bin/env python
# -*- coding:utf-8 -*-
# Usage:基于【百度地图】官方API接口,自动爬取【关键词】搜索结果
import json
import openpyxl
import requests


def get_result_pages():
    params = {
        "query": search_key_word,
        "region": city_name,
        "output": "json",
        "page_size": 20,
        "ak": ak,

    }
    response = requests.get(url=api_url, params=params)
    if response.status_code == 200:
        json_dic = json.loads(response.text)
        return int(json_dic['total']) // 20 + 1
    else:
        return 1


def get_search_result(page):
    params = {
        "query": search_key_word,
        "region": city_name,
        "output": "json",
        "page_size": 20,
        "page_num": page,
        "ak": ak,

    }
    response = requests.get(url=api_url, params=params)
    if response.status_code == 200:
        single_page_dic = json.loads(response.text)
        if single_page_dic['results']:
            for shop_item in single_page_dic['results']:
                entire_shop_information_list.append(shop_item)
    else:
        return


def write_local_file():
    workbook = openpyxl.Workbook()
    sheet = workbook.create_sheet(city_name, 0)
    title_list = ["省份名称", "城市名称", "区域名称", "店铺名称", "联系方式", "店铺地址", ]
    sheet.append(title_list)
    row_num, column_num = 2, 2
    for single_shop_information in entire_shop_information_list:
        sheet.cell(row=row_num, column=1).value = single_shop_information['province']
        sheet.cell(row=row_num, column=2).value = single_shop_information['city']
        sheet.cell(row=row_num, column=3).value = single_shop_information['area']
        sheet.cell(row=row_num, column=4).value = single_shop_information['name']
        try:
            sheet.cell(row=row_num, column=5).value = single_shop_information['telephone']
        except:
            sheet.cell(row=row_num, column=5).value = 'None'
        sheet.cell(row=row_num, column=6).value = single_shop_information['address']
        row_num += 1
    workbook.save(r'C:\Users\guoshuang\Desktop\【{0}】{1}.xlsx'.format(city_name, search_key_word))


if __name__ == '__main__':
    ak = r'NNsD7E9scB982HCOslP0AszqIWwAAXzb'
    city_name, search_key_word, entire_shop_information_list = '济南', '电脑维修', []
    api_url = r'https://api.map.baidu.com/place/v2/search'
    page_number = get_result_pages()
    for page in range(0, page_number + 1):
        get_search_result(page)
    write_local_file()

高德地图

#! /usr/bin/env python
# -*- coding:utf-8 -*-
# Usage:基于【高德地图】官方API接口,自动爬取【关键词】搜索结果
# 功能优化:
# 1. 城市名称采用列表形式,支持多城市遍历获取
# 2. Excel文件改为在当前目录下生成,规避多电脑保存路径不同的问题
# 3. 获取页码函数针对【API次数超限】做兼容,规避超限后无法获取返回内容导致的KeyError错误
import json
import openpyxl
import requests


def get_result_pages(city):
    search_url = r'https://restapi.amap.com/v3/place/text?keywords={0}&city={1}&offset=20&key={2}&extensions=all' \
        .format(search_key_word, city, amap_web_key)
    response = requests.get(search_url)
    if response.status_code == 200:
        json_dic = json.loads(response.text)
        try:
            return int(json_dic['count']) // 20 + 1
        except KeyError:
            exit('当日API使用次数已达上限')
    else:
        return 1


def get_search_result(page, city):
    search_url = r'https://restapi.amap.com/v3/place/text?keywords={0}&city={1}&offset=20&page={2}&key={3}&' \
                 r'extensions=all'.format(search_key_word, city, page, amap_web_key)
    response = requests.get(search_url)
    if response.status_code == 200:
        single_page_dic = json.loads(response.text)
        if single_page_dic['pois']:
            for item in single_page_dic['pois']:
                entire_shop_information_list.append(item)
    else:
        return


def write_local_file():
    workbook = openpyxl.Workbook()
    sheet = workbook.create_sheet(search_key_word, 0)
    title_list = ['城市编码', "区域编码", "省份名称", "城市名称", "区域名称", "店铺名称", "联系方式", "店铺地址", ]
    sheet.append(title_list)
    row_num, column_num = 2, 2
    for single_shop_basic_information in entire_shop_information_list:
        sheet.cell(row=row_num, column=1).value = single_shop_basic_information['citycode']
        sheet.cell(row=row_num, column=2).value = single_shop_basic_information['adcode']
        sheet.cell(row=row_num, column=3).value = single_shop_basic_information['pname']
        sheet.cell(row=row_num, column=4).value = single_shop_basic_information['cityname']
        sheet.cell(row=row_num, column=5).value = single_shop_basic_information['adname']
        sheet.cell(row=row_num, column=6).value = single_shop_basic_information['name']
        if single_shop_basic_information['tel']:
            sheet.cell(row=row_num, column=7).value = single_shop_basic_information['tel']
        else:
            sheet.cell(row=row_num, column=7).value = 'None'
        try:
            sheet.cell(row=row_num, column=8).value = single_shop_basic_information['address']
        except ValueError:
            sheet.cell(row=row_num, column=8).value = 'None'
        row_num += 1
    workbook.save(r'.\【AMAP】{0}.xlsx'.format(search_key_word))


if __name__ == '__main__':
    amap_web_key = r'46ce87666577f0c206dcbdfa5275c405'
    city_name_list = ['邯郸市']
    search_key_word = '电脑维修'
    entire_page_dic, entire_shop_information_list = {}, []
    for city_name in city_name_list:
        page_number = get_result_pages(city_name)
        for page in range(page_number + 1):
            get_search_result(str(page), city_name)
    write_local_file()

爬取结果

保存到【桌面】Excel。例如高德地图爬取【长春】电脑维修关键词的结果

📎【AMAP】【长春】电脑维修.xlsx

本文为原创文章,撰写发布者:GSolaris,转载请注明出处:https://www.blissfulcandy.com/index.php/2023/07/19/pythonmapcrawler/

(0)
GSolarisGSolaris

相关推荐

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

微信联系
关注公众号