爬虫案例—雪球网行情中心板块数据抓取-Toy模板网

这篇具有很好参考价值的文章主要介绍了爬虫案例—雪球网行情中心板块数据抓取。希望对大家有所帮助。如果存在错误或未考虑完全的地方，请大家不吝赐教，您也可以点击"举报违法"按钮提交疑问。

爬虫案例—雪球网行情中心板块数据抓取

雪球网行情中心网址：https://xueqiu.com/hq

目标：市场一览板块、热股榜板块、新股预告板块、关注排行榜板块

import datetime

import requests

headers = {
    'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
   }

# 实例化session对象，进行会话保持
session = requests.Session()
url = 'https://xueqiu.com'
session.get(url, headers=headers)

# 时间戳转成日期的函数
def timestamp_to_date(timestamp_data):
    timestamp = timestamp_data
    # 使用datetime.fromtimestamp()方法将时间戳转换为datetime对象
    datetime_obj = datetime.datetime.fromtimestamp(timestamp)
    # 使用strftime()方法将datetime对象格式化为字符串表示的日期
    formatted_date = datetime_obj.strftime("%Y-%m-%d")
    return formatted_date


# 获取四大板块的指数
def get_four_index():
    url = 'https://stock.xueqiu.com/v5/stock/batch/quote.json?symbol=SH000001,SZ399001,SZ399006,SH000688'
    res = session.get(url, headers=headers)
    items_lst = res.json()['data']['items']
    print('市 场 一 览：')
    print()
    print('板块名称\t指 数\t涨跌幅\t\t\t总市值')
    for item in items_lst:
        data_dic = item['quote']

        print(data_dic['name'], data_dic['current'], str(data_dic['chg']) + '(' + str(data_dic['percent']) + ')\t\t',
              f"{data_dic['market_capital'] / 1000000000000: >.2f}万亿")
    print('- ' * 50)


def get_stock(url_dict):
    for stock_type in url_lst_dict.keys():
        res = session.get(url_dict[stock_type], headers=headers)
        res.encoding = res.apparent_encoding

        stock_data = res.json()
        print(f'热股榜——{stock_type}：\n')
        print('股票代码\t\t', '股票名称\t\t\t\t\t', '股票涨跌幅')
        for stock in stock_data['data']['items']:
            print(f'{stock["code"]:8}\t', f'{stock["name"]:<25}', f'{stock["percent"]:>8}')

        print('- ' * 30)


# 定义获取新股json函数
def get_json(new_stock_url):
    res = session.get(new_stock_url, headers=headers)
    new_stock = res.json()
    return new_stock


# 定义获取港股新股函数
def hk_new_stokc(hk_new_stock_url):
    new_stock = get_json(hk_new_stock_url)
    print('- ' * 40)
    print('港股')

    print('新股代码\t', '新股名称\t', '上市日期\t', '招股价下限\t', '招股价上限')
    new_stock_info = new_stock['data']['items']

    for new_stock_item in new_stock_info:
        # 上市日期时间戳
        list_timestamp = new_stock_item['list_date'] / 1000
        print(new_stock_item['symbol'], '\t', new_stock_item['name'], '\t', timestamp_to_date(list_timestamp), '\t',
              new_stock_item['issprice_min'], '\t', new_stock_item['issprice_max'])


# 定义沪深新股抓取函数
def hs_new_stock(hs_new_stock_url):
    new_stock = get_json(hs_new_stock_url)
    print('新 股 预 告')
    print('- ' * 40)
    print('沪深')
    print('新股发行数量：', new_stock['data']['count'])
    print('新股代码\t', '新股名称\t', '申购代码\t', '预计发行量（万股）\t', '申购上限（万股）\t', '申购日期\t', '中签号公布日')
    new_stock_info = new_stock['data']['items']

    for new_stock_item in new_stock_info:
        # 申购日期时间戳
        distr_timestamp = new_stock_item['onl_distr_date'] / 1000

        # 公布中签日期时间戳
        draw_timestamp = new_stock_item['onl_lotwiner_stpub_date'] / 1000
        print(new_stock_item['symbol'], new_stock_item['name'], '\t', new_stock_item['onl_subcode'], '\t',
              new_stock_item['actissqty'], '\t\t\t', new_stock_item['onl_sub_maxqty'], '\t\t',
              timestamp_to_date(distr_timestamp), '\t', timestamp_to_date(draw_timestamp))


# 定义美股新股抓取函数
def un_new_stock(un_new_stock_url):
    new_stock = get_json(un_new_stock_url)
    print('- ' * 50)
    print('美股')
    print('新股发行数量：', new_stock['data']['count'])
    print('新股代码\t', '新股名称\t', '上市日期\t', '\t股本', '\t招股价下限\t', '招股价上限')
    new_stock_info = new_stock['data']['items']

    for new_stock_item in new_stock_info:
        # 上市日期时间戳
        list_timestamp = new_stock_item['list_date'] / 1000
        if new_stock_item['shares']:
            new_shares = '\t' + str(new_stock_item['shares'] / 10000) + '万'
        else:
            new_shares = '\t\t-\t'

        if new_stock_item['issprice_min']:
            new_min = '\t' + str(new_stock_item['issprice_min']) + '\t'

        if new_stock_item['issprice_max']:
            new_max = '\t' + str(new_stock_item['issprice_max']) + '\t'

        else:
            new_max = '\t-'
            new_min = '\t-\t'

        print(new_stock_item['symbol'], '\t', new_stock_item['name'][:8], '\t', timestamp_to_date(list_timestamp),
              new_shares, new_min, new_max)


# 本周排行榜， 本周新增股票，最热门股票
def get_new_add_stock(new_add_url):
    new_add_stock = get_json(new_add_url)
    print('- ' * 50)
    print('关注排行榜——本周新增')
    new_list = new_add_stock['data']['list']
    print('股票名称\t\t股 价\t\t关 注')
    for add_stock in new_list:
        print(f"{add_stock['name']}\t\t{add_stock['current']}\t\t{int(add_stock['follow7d']):<}")


# 本周排行榜，最热门股票
def get_hot_stock(new_hot_url):
    hot_stock = get_json(new_hot_url)
    print('- ' * 50)
    print('关注排行榜——最热门')
    hot_lst = hot_stock['data']['list']
    print('股票名称\t\t股 价\t\t关 注')
    for hot_stock in hot_lst:
        print(f"{hot_stock['name']}\t\t{hot_stock['current']}\t\t{int(hot_stock['follow']):<}")


if __name__ == '__main__':
    # 四大板块信息
    get_four_index()

    # 热门股票
    url_lst_dict = {'沪深': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=12&type=12',
                    '港股': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=13&type=13',
                    '美股': 'https://stock.xueqiu.com/v5/stock/hot_stock/list.json?page=1&size=9&_type=11&type=11'}
    get_stock(url_lst_dict)

    print()
    # 沪深新股网址
    hs_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/cn/query.json?type=subscribe&order_by=onl_subbeg_date&order=asc&source=new_subscribe&page=1&size=10'
    hs_new_stock(hs_new_stock_url)
    # 港股新股网址
    hk_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/hk/query.json?order=desc&order_by=list_date&type=unlisted&page=1&size=10'
    hk_new_stokc(hk_new_stock_url)

    # 美股新股网址
    un_new_stock_url = 'https://stock.xueqiu.com/v5/stock/preipo/us/list.json?order=desc&order_by=list_date&type=unlisted&page=1&size=10'
    un_new_stock(un_new_stock_url)

    # 关注排行榜，本周新增
    new_add_stock_url = 'https://stock.xueqiu.com/v5/stock/screener/screen.json?page=1&only_count=0&size=10&category=CN&order_by=follow7d&order=desc'
    get_new_add_stock(new_add_stock_url)

    # 最热门股票
    hot_stock_url = 'https://stock.xueqiu.com/v5/stock/screener/screen.json?page=1&only_count=0&size=10&category=CN&order_by=follow&order=desc'
    get_hot_stock(hot_stock_url)