Pearson相关性分析 & plot绘图(相关性系数柱状图、绘制非空值数量柱状图)
1.Pearson相关性分析
- Pearson相关性分析是一种用于检测两个变量之间线性关系强度的统计方法,其结果介于-1和1之间。一个相关系数为1表示完全正相关,-1表示完全负相关,0则表示没有线性关系。 Pearson相关性分析假设数据来自正态分布,并且对异常值敏感。
2.Pearson相关性分析实例
# 计算pearsonr相关系数
def calculate_pearsonr(pd):
head = pd.head().columns.values
GDM = pd["目标变量"].tolist()
coefficient_of_association = {}
significance_level = {}
feature_cnt = {}
for feature in head:
if feature != "目标变量":
ftc = 0
feature_values = pd[feature].tolist()
GDM_temp, feature_temp, tag = [], [], 0
for v in feature_values:
if str(v) != "nan":
ftc += 1
GDM_temp.append(GDM[tag])
feature_temp.append(v)
tag += 1
feature_cnt[feature] = ftc
if len(feature_temp) > 1:
pc = pearsonr(np.array(feature_temp), np.array(GDM_temp))
if str(pc[0]) != "nan":
ca = pc[0]
if ca < -0.0001:
ca = ca*-1
coefficient_of_association[feature] = ca
significance_level[feature] = pc[1]
elif ca > 0.0001:
coefficient_of_association[feature] = ca
significance_level[feature] = pc[1]
dp_ca = sorted(
coefficient_of_association.items(),
key=lambda x: x[1],
reverse=True)
print("pearsonr-相关系数:",dp_ca)
dp_ca_Nempty=[(i[0], feature_cnt[i[0]]) for i in dp_ca]
print("非空值的数量:",dp_ca_Nempty)
return dp_ca
import matplotlib.pyplot as plt
def plot1(dp_ca):
# 将元组列表转换为字典
dp_ca_dict = dict(dp_ca)
# 创建子图
# fig, ax = plt.subplots()
fig = plt.figure(figsize=(16, 10))
ax = fig.add_subplot(1, 1, 1)
# 绘制相关性系数柱状图
ax.bar(dp_ca_dict.keys(), dp_ca_dict.values())
ax.set_title('Correlation between Feature and 目标变量')
ax.set_xlabel('Features')
ax.set_ylabel('Correlation Coefficient')
# 调整布局并显示图形
plt.xticks(rotation=45,ha='right') ## # 将x轴标签旋转45度,并以最后一个字符为旋转中心
# 设置x轴刻度标签字体大小为8
ax.tick_params(axis='x', labelsize=10)
plt.tight_layout()
plt.savefig("./Pearson.jpeg")
plt.show()
if __name__ == '__main__':
file = pd.read_excel("./filename.xlsx")
dp_ca=calculate_pearsonr(file)
plot1(dp_ca)
文章来源:https://www.toymoban.com/news/detail-707803.html
3.plot绘图(相关性系数柱状图、绘制非空值数量柱状图)
import matplotlib.pyplot as plt
# 获取数据
dp_ca = [('feature1', 0.8), ('feature2', 0.6), ('feature3', 0.4),('feature4', 0.77), ('feature5', 0.2), ('feature6', 0.4)]
dp_ca_Nempty = [('feature1', 100), ('feature3', 50), ('feature2', 20),('feature4', 70), ('feature5', 10), ('feature6', 26)]
# 将元组列表转换为字典
dp_ca_dict = dict(dp_ca)
dp_ca_Nempty_dict = dict(dp_ca_Nempty)
# 创建子图
fig, axs = plt.subplots(1, 2, figsize=(10, 5))
# 绘制相关性系数柱状图
axs[0].bar(dp_ca_dict.keys(), dp_ca_dict.values())
axs[0].set_title('Pearson correlation coefficients')
axs[0].set_xlabel('Features')
axs[0].set_ylabel('Correlation coefficient')
# 绘制非空值数量柱状图
axs[1].bar(dp_ca_Nempty_dict.keys(), dp_ca_Nempty_dict.values())
axs[1].set_title('Number of non-empty values')
axs[1].set_xlabel('Features')
axs[1].set_ylabel('Count')
# 调整布局并显示图形
plt.xticks(rotation=45,ha='right') ## # 将x轴标签旋转45度,并以最后一个字符为旋转中心
# 设置x轴刻度标签字体大小为10
axs[0].tick_params(axis='x', labelsize=10)
axs[1].tick_params(axis='x', labelsize=10)
# 调整布局并显示图形
plt.tight_layout()
plt.show()
文章来源地址https://www.toymoban.com/news/detail-707803.html
到了这里,关于Pearson相关性分析& plot绘图(相关性系数柱状图、绘制非空值数量柱状图)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!