4
Python 获取景点代码
source link: https://xushanxiang.com/2019/07/python-captures-tourist-attraction-data.html
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
Python 获取景点代码
本文章是以python3写的一个获取代码的案例。
from tqdm import tqdm
import time
from selenium import webdriver
from selenium.common.exceptions import TimeoutException, WebDriverException
import pandas as pd
import numpy as np
import os
position = ["北京","天津","上海","重庆",
"河北","山西","辽宁","吉林",
"福建","江西","山东","河南",
"湖北","湖南","广东","海南",
"四川","贵州","云南","陕西",
"甘肃","青海","台湾","内蒙古",
"广西","西藏","宁夏","新疆",
"香港","澳门"
]
position = ['北京']
name,level,hot,address,num=[],[],[],[],[]
def get_one_page(key,page):
try:
option_chrome = webdriver.ChromeOptions()
option_chrome.add_argument('--headless')
driver = webdriver.Chrome(chrome_options=option_chrome)
time.sleep(5)
url="http://piao.qunar.com/ticket/list.htm?keyword="+str(key)+"®ion=&from=mpl_search_suggest&page="+str(page)
driver.get(url)
infor = driver.find_elements_by_class_name("sight_item")
for i in range(len(infor)):
#景点名字
name.append(infor[i].find_element_by_class_name("name").text)
#获取景点评级
try:
level.append(infor[i].find_element_by_class_name("level").text)
except:
level.append("")
#获取景点热度
hot.append(infor[i].find_element_by_class_name("product_star_level").text[3:])
#获取景点地址
address.append(infor[i].find_element_by_class_name("area").text)
#huo qu jing dian xiao liang
try:
num.append(infor[i].find_element_by_class_name("hot_num").text)
except:
num.append(0)
driver.quit()
return
except TimeoutException or WebDriverException:
return get_one_page()
for key in tqdm(position):
print("正在爬取{}".format(key))
for page in range(1,14):
print("正在爬取第{}页".format(page))
get_one_page(key,page)
sight = {'name': name, 'level': level, 'hot': hot, 'address': address, 'num':num}
sight = pd.DataFrame(sight, columns=['name', 'level', 'hot', 'address', 'num'])
sight.to_csv("sight.csv",encoding="utf_8_sig")
在这个代码需要下载chromedriver.exe。下载地址是:
http://chromedriver.chromium.org/downloads
本案例获取的数据是以.csv格式保存到本地。
如果觉得我的文章对您有用,请随意赞赏。您的支持将鼓励我继续创作!
发表评论 取消回复
电子邮件地址不会被公开。 必填项已用*标注
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK