需求: 需要爬取到浙江省各个县区的边界的geo数据,用于后续前端绘制地图使用。
解决方案: 1、找到浙江省各个县区的边界数据,这里从阿里云数据(阿里云地图数据可视化)中获取;2、编写爬虫脚本获取数据;3、存入数据库
具体实现:
python">import pymssql
import random
import time
# 地图数据链接
"""
https://geo.datav.aliyun.com/areas_v3/bound/330000.json
"""
import json
import requests
def get_zhejiang_code(shi_all_json):
code_list = []
for shi_dict in shi_all_json["features"]:
code = shi_dict["properties"].get("adcode")
code_list.append(code)
return code_list
def get_datas(version, code_list):
"""爬取并解析数据"""
insert_data_list = []
for i in range(len(code_list)):
url = "https://geo.datav.aliyun.com/{version}/bound/{code}.json".format(version=version, code=code_list[i])
try:
response = requests.get(url=url)
feature = response.json().get("features")[0]
code = feature["properties"].get("adcode")
name = feature["properties"].get("name")
data = {
"code": code,
"name": name,
"feature": feature,
"version": version
}
insert_data_list.append(data)
print("成功爬取 {code},还剩 {num}个".format(code=code, num=len(ZheJiang_provinces_code_v3)-i))
time.sleep(random.randint(1, 3))
except:
pass
return insert_data_list
# 插入数据库
def insert_into_model(insert_data_list):
connect = pymssql.connect(host="XXXXXXX", user="XXXX", password="XXXXX", database="XXXXXXX")
cursor = connect.cursor()
try:
for data in insert_data_list:
sql = """
insert into 表名 values('{code}','{name}', '{feature}', '{version}');
""".format(code=data.get("code"), name=data.get("name"), feature=json.dumps(data.get("feature")),
version=data.get("version"))
cursor.execute(sql)
except Exception as e:
print(e)
connect.commit()
if __name__ == '__main__':
# 浙江省,浙江省各市,浙江省各区县编码
ZheJiang_provinces_code_v3 = [330100, 330200, 330300, 330400, 330500, 330600, 330700, 330800, 330900, 331000,
331100,
330102, 330105, 330106, 330108, 330109, 330110, 330111, 330112, 330114, 330113,
330122,
330127, 330182, 330203, 330205, 330206, 330211, 330212, 330213, 330225, 330226,
330281,
330282, 330302, 330303, 330304, 330305, 330324, 330326, 330327, 330328, 330329,
330381,
330382, 330383, 330402, 330411, 330421, 330424, 330481, 330482, 330483, 330502,
330503,
330521, 330522, 330523, 330602, 330603, 330604, 330624, 330681, 330683, 330702,
330703,
330723, 330726, 330727, 330781, 330782, 330783, 330784, 330802, 330803, 330822,
330824,
330825, 330881, 330902, 330903, 330921, 330922, 331002, 331003, 331004, 331022,
331023,
331024, 331081, 331082, 331083, 331102, 331121, 331122, 331123, 331124, 331125,
331126,
331127, 331181]
ZheJiang_provinces_code_v2 = [330100, 330200, 330300, 330400, 330500, 330600, 330700, 330800, 330900, 331000, 331100,
330102, 330103, 330104, 330105, 330106, 330108, 330109, 330110, 330111, 330112, 330122,
330127, 330182, 330203, 330205, 330206, 330211, 330212, 330213, 330225, 330226, 330281,
330282, 330302, 330303, 330304, 330305, 330324, 330326, 330327, 330328, 330329, 330381,
330382, 330383, 330402, 330411, 330421, 330424, 330481, 330482, 330483, 330502, 330503,
330521, 330522, 330523, 330602, 330603, 330604, 330624, 330681, 330683, 330702, 330703,
330723, 330726, 330727, 330781, 330782, 330783, 330784, 330802, 330803, 330822, 330824,
330825, 330881, 330902, 330903, 330921, 330922, 331002, 331003, 331004, 331022, 331023,
331024, 331081, 331082, 331083, 331102, 331121, 331122, 331123, 331124, 331125, 331126,
331127, 331181]
# res_insert_data_list = get_datas(version="areas_v2", code_list=ZheJiang_provinces_code_v2)
# res_insert_data_list = get_datas(version="areas_v3", code_list=ZheJiang_provinces_code_v3)
# print("开始插入表格--->")
# insert_into_model(insert_data_list=res_insert_data_list)
# # for i in res:
# # print(i)
# print("插入完成!")