import requests
import pandas as pd
from bs4 import BeautifulSoup
num = 0
def get_text(url):
try:
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/85.0.4183.102 Safari/537.36', 'Accept-Language': 'zh-CN '
}
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return "爬取網站失敗!"
def run(game_info, jump_link, game_evaluation, text):
soup = BeautifulSoup(text, "html.parser")
# 游戲評價
w = soup.find_all(class_="col search_reviewscore responsive_secondrow")
for u in w:
if u.span is not None:
game_evaluation.append(
u.span["data-tooltip-html"].split("<br>")[0] + "," + u.span["data-tooltip-html"].split("<br>")[-1])
else:
game_evaluation.append("暫無評價!")
# 游戲詳情頁面鏈接
link_text = soup.find_all("div", id="search_resultsRows")
for k in link_text:
b = k.find_all('a')
for j in b:
jump_link.append(j['href'])
# 名字和價格
global num
name_text = soup.find_all('div', class_="responsive_search_name_combined")
for z in name_text:
# 每個游戲的價格
name = z.find(class_="title").string.strip()
# 判斷折扣是否為None,提取價格
if z.find(class_="col search_discount responsive_secondrow").string is None:
price = z.find(class_="col search_price discounted responsive_secondrow").text.strip().split("¥")
game_info.append([num + 1, name, price[2].strip(), game_evaluation[num], jump_link[num]])
else:
price = z.find(class_="col search_price responsive_secondrow").string.strip().split("¥")
game_info.append([num + 1, name, price[1], game_evaluation[num], jump_link[num]])
num = num + 1
def save_data(game_info):
save_path = "D:/Steam.csv"
df = pd.DataFrame(game_info, columns=['排行榜', '游戲名字', '目前游戲價格¥', '游戲評價', '游戲頁面鏈接'])
df.to_csv(save_path, index=0)
print("檔案保存成功!")
if __name__ == "__main__":
Game_info = [] # 游戲全部資訊
Turn_link = [] # 翻頁鏈接
Jump_link = [] # 游戲詳情頁面鏈接
Game_evaluation = [] # 游戲好評率和評價
for i in range(1, 11):
Turn_link.append("https://store.steampowered.com/search/?tags=3859%2C1685%2C3843&category1=998&category3=1%2C9%2C38&os=win&filter=topsellers&page=1" + str("&page=" + str(i)))
run(Game_info, Jump_link, Game_evaluation, get_text(Turn_link[i-1]))
save_data(Game_info)
出現以下錯誤
Traceback (most recent call last):
File "C:\Users\DD\Desktop\4489489.py", line 70, in <module>
run(Game_info, Jump_link, Game_evaluation, get_text(Turn_link[i-1]))
File "C:\Users\DD\Desktop\4489489.py", line 52, in run
game_info.append([num + 1, name, price[1], game_evaluation[num], jump_link[num]])
IndexError: list index out of range
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/284151.html
上一篇:求一個簡單的BPSK符號同步的matlab代碼,要有注釋能看得懂
下一篇:if [ -x "$JAVA_HOME/jre/sh/java" ]是說存在$JAVA_HOME/jre/sh/java這個檔案嗎?