人生苦短 我用Python
Python其他实用资料:点击此处跳转文末名片获取
一、数据概览
1.背景描述
该数据集整理了从1896年雅典奥运会至2016年里约热内卢奥运会120年的奥林匹克运动会的历史数据。
需要注意的是,在1896年-1992年期间,冬季奥运会与夏季奥运会都是在同一年举行的。在这之后,冬季与夏季的奥运会才被错开举办,冬季奥运会从1994年开始4年举办一次,夏季奥运会从1996开始4年举办一次。大家在分析这些数据时,经常会犯得一个错误就是认为夏季与冬季奥运会是一直错开举办的。
-
受疫情影响,2020东京奥运会将延期至2021年举行;
-
虽然延期,但此次奥运会依旧会沿用「2020东京奥运会」这个名称;
-
这也将是奥运会历史上首次延期(1916年、1940年、1944年曾因一战,二战停办);
2.数据说明
-
文件列表 该数据集包含两个文件:
-
athlete_events.csv :参赛运动员基本生物数据和奖牌结果
-
noc_regions.csv : 国家奥委会3个字母的代码与对应国家信息
3.属性描述
文件athlete_events.csv中包含15个字段,具体信息如下:
每一行代表的是一个参加个人比赛运动员
文件noc_regions.csv中包含3个字段,具体信息如下:
4.数据来源
数据集源自于kaggle平台用户分享,
基于证书 CC0: Public Domain 发布,
具体信息内容源自https://www.sports-reference.com/
二、数据集可探索、研究的方向
可以从以下几个方面来探索奥林匹克运动会的演变历程:
-
历年来 男女参赛运动员的表现如何?
-
那不同地区?
-
不同运动项目?
-
不同比赛项目?
三、可视化分析
1.🏆各国累计奖牌数
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
athlete_data = pd.read_csv('./data/athlete_events.csv')
noc_region = pd.read_csv('./data/noc_regions.csv')
# 关联代表国家
data = pd.merge(athlete_data, noc_region, on='NOC', how='left')
print(data.head())
medal_data = data.groupby(['Year', 'Season', 'region', 'Medal'])['Event'].nunique().reset_index()
medal_data.columns = ['Year', 'Season', 'region', 'Medal', 'Nums']
medal_data = medal_data.sort_values(by="Year", ascending=True)
def medal_stat(year, season='Summer'):
t_data = medal_data[(medal_data['Year'] <= year) & (medal_data['Season'] == season)]
t_data = t_data.groupby(['region', 'Medal'])['Nums'].sum().reset_index()
t_data = t_data.set_index(['region', 'Medal']).unstack().reset_index().fillna(0, inplace=False)
t_data = sorted(
[(row['region'][0], int(row['Nums']['Gold']), int(row['Nums']['Silver']), int(row['Nums']['Bronze']))
for _, row in t_data.iterrows()], key=lambda x: x[1] + x[2] + x[3], reverse=True)[:20]
return t_data
year_list = sorted(list(set(medal_data['Year'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
for year in year_list:
t_data = medal_stat(year)[::-1]
bar = (
Bar(init_opts=opts.InitOpts())
.add_xaxis([x[0] for x in t_data])
.add_yaxis("铜牌🥉", [x[3] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(218,165,32)'))
.add_yaxis("银牌🥈", [x[2] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(192,192,192)'))
.add_yaxis("金牌🏅️", [x[1] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(255,215,0)'))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='insideRight',
font_style='italic'), )
.set_global_opts(
title_opts=opts.TitleOpts(title="各国累计奖牌数(夏季奥运会)"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
legend_opts=opts.LegendOpts(is_show=True),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text=year,
font="bold 26px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#fff"
),
),
),
],
)
], )
.reversal_axis())
tl.add(bar, year)
tl.render(r".\htmlRender\01_各国累计奖牌数(夏季奥运会).html")
文章来源:https://www.toymoban.com/news/detail-487484.html
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
athlete_data = pd.read_csv('./data/athlete_events.csv')
noc_region = pd.read_csv('./data/noc_regions.csv')
# 关联代表国家
data = pd.merge(athlete_data, noc_region, on='NOC', how='left')
print(data.head())
medal_data = data.groupby(['Year', 'Season', 'region', 'Medal'])['Event'].nunique().reset_index()
medal_data.columns = ['Year', 'Season', 'region', 'Medal', 'Nums']
medal_data = medal_data.sort_values(by="Year", ascending=True)
def medal_stat(year, season='Summer'):
t_data = medal_data[(medal_data['Year'] <= year) & (medal_data['Season'] == season)]
t_data = t_data.groupby(['region', 'Medal'])['Nums'].sum().reset_index()
t_data = t_data.set_index(['region', 'Medal']).unstack().reset_index().fillna(0, inplace=False)
t_data = sorted(
[(row['region'][0], int(row['Nums']['Gold']), int(row['Nums']['Silver']), int(row['Nums']['Bronze']))
for _, row in t_data.iterrows()], key=lambda x: x[1] + x[2] + x[3], reverse=True)[:20]
return t_data
year_list = sorted(list(set(medal_data['Year'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
year_list = sorted(list(set(medal_data['Year'][medal_data.Season == 'Winter'].to_list())), reverse=True)
tl = Timeline(init_opts=opts.InitOpts(theme='dark', width='1000px', height='1000px'))
tl.add_schema(is_timeline_show=True, is_rewind_play=True, is_inverse=False,
label_opts=opts.LabelOpts(is_show=False))
for year in year_list:
t_data = medal_stat(year, 'Winter')[::-1]
bar = (
Bar(init_opts=opts.InitOpts(theme='dark'))
.add_xaxis([x[0] for x in t_data])
.add_yaxis("铜牌🥉", [x[3] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(218,165,32)'))
.add_yaxis("银牌🥈", [x[2] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(192,192,192)'))
.add_yaxis("金牌🏅️", [x[1] for x in t_data],
stack='stack1',
itemstyle_opts=opts.ItemStyleOpts(border_color='rgb(220,220,220)', color='rgb(255,215,0)'))
.set_series_opts(label_opts=opts.LabelOpts(is_show=True,
position='insideRight',
font_style='italic'), )
.set_global_opts(
title_opts=opts.TitleOpts(title="各国累计奖牌数(冬季奥运会)"),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=45)),
legend_opts=opts.LegendOpts(is_show=True),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text='截止{}'.format(year),
font="bold 26px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#fff"
),
),
),
],
)
], )
.reversal_axis())
tl.add(bar, year)
tl.render(r".\htmlRender\02_各国累计奖牌数(冬季奥运会).html")
文章来源地址https://www.toymoban.com/news/detail-487484.html
2.⚽️各项运动产生金牌数
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
athlete_data = pd.read_csv('./data/athlete_events.csv')
noc_region = pd.read_csv('./data/noc_regions.csv')
# 关联代表国家
data = pd.merge(athlete_data, noc_region, on='NOC', how='left')
print(data.head())
medal_data = data.groupby(['Year', 'Season', 'region', 'Medal'])['Event'].nunique().reset_index()
medal_data.columns = ['Year', 'Season', 'region', 'Medal', 'Nums']
medal_data = medal_data.sort_values(by="Year", ascending=True)
background_color_js = """new echarts.graphic.RadialGradient(0.5, 0.5, 1, [{
offset: 0,
color: '#696969'
}, {
offset: 1,
color: '#000000'
}])"""
tab = Tab()
temp = data[(data['Medal'] == 'Gold') & (data['Year'] == 2016) & (data['Season'] == 'Summer')]
event_medal = temp.groupby(['Sport'])['Event'].nunique().reset_index()
event_medal.columns = ['Sport', 'Nums']
event_medal = event_medal.sort_values(by="Nums", ascending=False)
pie = (Pie(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js), width='1000px', height='800px'))
.add('金牌🏅️', [(row['Sport'], row['Nums']) for _, row in event_medal.iterrows()],
radius=["30%", "75%"],
rosetype="radius")
.set_global_opts(title_opts=opts.TitleOpts(title="2016年夏季奥运会各项运动产生金牌占比",
pos_left="center",
title_textstyle_opts=opts.TextStyleOpts(color="white",
font_size=20), ),
legend_opts=opts.LegendOpts(is_show=False))
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {d}%"),
tooltip_opts=opts.TooltipOpts(trigger="item", formatter="{a} <br/>{b}: {c} ({d}%)"), )
)
tab.add(pie, '2016年夏奥会')
temp = data[(data['Medal'] == 'Gold') & (data['Year'] == 2014) & (data['Season'] == 'Winter')]
event_medal = temp.groupby(['Sport']
到了这里,关于基于python的奥运会历史数据分析【120年】的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!