1. 需要的类库
import requests
from bs4 import BeautifulSoup
import pandas as pd
2. 请求榜单
def fetch_ranking_data():
url = "https://m.xxx.com/rankm/" #某家
response = requests.get(url)
if response.status_code == 200:
return response.content
else:
print(f"Error fetching data. Status code: {response.status_code}")
return None
3. 解析响应
def parse_html(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
rank_items = soup.find_all('div', class_='placeholder one-img-plc')
data = []
for rank_item in rank_items:
rank_num = rank_item.select_one('.rank-num').text
title = rank_item.select_one('.plc-title').text
url = rank_item.select_one('a')['href']
data.append({
'Rank': rank_num,
'Title': title,
'URL': url
})
return data
4.输出文件
def create_excel(data):
df = pd.DataFrame(data)
df.to_excel('ranking_data.xlsx', index=False)
print("Excel file created successfully.")
5. 成果展示
文章来源地址https://www.toymoban.com/news/detail-794961.html
文章来源:https://www.toymoban.com/news/detail-794961.html
到了这里,关于python爬虫实战(7)--获取it某家热榜的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!