**续上,
11.岭回归 (Ridge Regression)
岭回归通过添加L2正则化项来改进线性回归的算法。
from sklearn.linear_model import Ridge
import numpy as np
import matplotlib.pyplot as plt
#创建一些示例数据
X = np.random.rand(100, 1)
y = 2 + 3 * X + np.random.randn(100, 1)
#初始化岭回归模型
ridge_reg = Ridge(alpha=1.0)
#训练模型
ridge_reg.fit(X, y)
#可视化
plt.scatter(X, y, color=‘blue’)
plt.plot(X, ridge_reg.predict(X), color=‘red’)
plt.title(‘Ridge Regression’)
plt.show()
12.Lasso回归 (Lasso Regression)
Lasso回归通过添加L1正则化项来改进线性回归的算法。
from sklearn.linear_model import Lasso
import numpy as np
import matplotlib.pyplot as plt
#创建一些示例数据
X = np.random.rand(100, 1)
y = 2 + 3 * X + np.random.randn(100, 1)
#初始化Lasso回归模型
lasso_reg = Lasso(alpha=0.1)
#训练模型
lasso_reg.fit(X, y)
#可视化
plt.scatter(X, y, color=‘blue’)
plt.plot(X, lasso_reg.predict(X), color=‘red’)
plt.title(‘Lasso Regression’)
plt.show()
13.结合了岭回归和Lasso回归的特点,使用L1和L2正则化。
from sklearn.linear_model import ElasticNet
import numpy as np
import matplotlib.pyplot as plt
#创建一些示例数据
X = np.random.rand(100, 1)
y = 2 + 3 * X + np.random.randn(100, 1)
#初始化弹性网络模型
elastic_net = ElasticNet(alpha=0.1, l1_ratio=0.5)
#训练模型
elastic_net.fit(X, y)
#可视化
plt.scatter(X, y, color=‘blue’)
plt.plot(X, elastic_net.predict(X), color=‘red’)
plt.title(‘Elastic Net’)
plt.show()
14.最小角回归 (Least Angle Regression, LARS)
用于高维数据的回归分析
from sklearn.linear_model import Lars
import numpy as np
import matplotlib.pyplot as plt
#创建一些示例数据
X = np.random.rand(100, 10) #10个特征
y = np.dot(X, np.array([1.5, -2., 3., 0.5, -1., 2., 1.5, -0.5, 1., -2.])) + np.random.randn(100)
#初始化LARS模型
lars = Lars(n_nonzero_coefs=10)
#训练模型
lars.fit(X, y)
#预测
y_pred = lars.predict(X)
#可视化第一个特征
plt.scatter(X[:, 0], y, color=‘blue’)
plt.scatter(X[:, 0], y_pred, color=‘red’)
plt.title(‘Least Angle Regression (LARS)’)
plt.show()
15.LightGBM
LightGBM是一种基于梯度提升的高效实现,特别适用于处理大规模数据。
import lightgbm as lgb
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
#创建一些示例数据
X = np.random.rand(100, 10) #10个特征
y = np.random.randint(0, 2, 100) #二分类标签
#划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#创建LightGBM数据集
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test)
#设置参数
params = {
‘boosting_type’: ‘gbdt’,
‘objective’: ‘binary’,
‘metric’: ‘binary_logloss’,
‘learning_rate’: 0.05
}
#训练模型
lgbm = lgb.train(params, train_data, valid_sets=test_data, num_boost_round=100, early_stopping_rounds=20)
#预测
y_pred = lgbm.predict(X_test, num_iteration=lgbm.best_iteration)
#转换预测结果
y_pred_binary = np.where(y_pred > 0.5, 1, 0)
#计算准确率
accuracy = accuracy_score(y_test, y_pred_binary)
print(‘Accuracy:’, accuracy)
16.CatBoost
CatBoost是专门为处理分类特征而设计的高效GBM实现。
from catboost import CatBoostClassifier
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
#创建一些示例数据
X = np.random.rand(100, 10) #10个特征
y = np.random.randint(0, 2, 100) #二分类标签
#划分数据集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
#初始化CatBoost分类器
catb = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0)
#训练模型
catb.fit(X_train, y_train)
#预测
y_pred = catb.predict(X_test)
#计算准确率
accuracy = accuracy_score(y_test, y_pred)
print(‘Accuracy:’, accuracy)
17.主题模型 (Topic Models) - LDA
隐狄利克雷分布(Latent Dirichlet Allocation, LDA)用于从文本数据中发现主题。
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
#示例文本数据
data = [‘Dog bites man.’, ‘Man bites dog.’, ‘Dog eats meat.’, ‘Man eats food.’]
#使用CountVectorizer转换数据
vectorizer = CountVectorizer(stop_words=‘english’)
X = vectorizer.fit_transform(data)
#初始化LDA模型
lda = LatentDirichletAllocation(n_components=2, random_state=0)
#训练模型
lda.fit(X)
#查看主题
feature_names = vectorizer.get_feature_names_out()
for topic_idx, topic in enumerate(lda.components_):
print(“Topic #%d:” % topic_idx)
print(" ".join([feature_names[i] for i in topic.argsort()[:-4 - 1:-1]]))
18.关联规则学习 (Association Rule Learning) - Apriori
Apriori算法用于发现大数据集中的有趣关联。
from mlxtend.frequent_patterns import apriori, association_rules
import pandas as pd
#示例数据
data = pd.DataFrame({
‘item’: [‘bread’, ‘milk’, ‘butter’, ‘beer’, ‘chips’, ‘bread’, ‘butter’],
‘transaction_id’: [1, 1, 1, 2, 2, 3, 3]
})
#数据预处理
hot_encoded_df = data.groupby([‘transaction_id’, ‘item’])[‘item’].count().unstack().reset_index().fillna(0).set_index(‘transaction_id’)
hot_encoded_df = hot_encoded_df.applymap(lambda x: 0 if x <= 0 else 1)
#运用Apriori算法
frequent_itemsets = apriori(hot_encoded_df, min_support=0.5, use_colnames=True)
#关联规则
rules = association_rules(frequent_itemsets, metric=“confidence”, min_threshold=0.7)
print(rules)
19.序列模型 (Sequence Models) - 隐马尔可夫模型 (HMMs)
隐马尔可夫模型(Hidden Markov Models, HMMs)适用于时间序列数据分析。
from hmmlearn import hmm
import numpy as np
#示例数据
states = [“Rainy”, “Sunny”]
observations = [“walk”, “shop”, “clean”]
#转换为数值
obs_map = {“walk”: 0, “shop”: 1, “clean”: 2}
obs = np.array([[obs_map[o]] for o in [“walk”, “walk”, “shop”, “clean”]])
#定义模型
model = hmm.MultinomialHMM(n_components=2)
model.startprob_ = np.array([0.6, 0.4])
model.transmat_ = np.array([[0.7, 0.3],
[0.4, 0.6]])
model.emissionprob_ = np.array([[0.1, 0.4, 0.5],
[0.6, 0.3, 0.1]])
#预测状态
logprob, states = model.decode(obs, algorithm=“viterbi”)
print(“Observations:”, ", ".join(map(lambda x: observations[x], np.squeeze(obs))))
print(“States:”, ", ".join(map(lambda x: states[x], states)))
20.生成对抗网络 (Generative Adversarial Networks, GANs)
from keras.models import Sequential
from keras.layers import Dense, Reshape, Flatten, Conv2D, Conv2DTranspose, LeakyReLU
from keras.optimizers import Adam
import numpy as np
#生成器模型
generator = Sequential()
generator.add(Dense(256, input_dim=100, activation=LeakyReLU(alpha=0.2)))
generator.add(Reshape((16, 16, 1)))
generator.add(Conv2DTranspose(128, kernel_size=4, strides=2, padding=‘same’, activation=‘relu’))
generator.add(Conv2DTranspose(128, kernel_size=4, strides=2, padding=‘same’, activation=‘relu’))
generator.add(Conv2D(1, kernel_size=7, padding=‘same’, activation=‘tanh’))
#判别器模型
discriminator = Sequential()
discriminator.add(Conv2D(64, kernel_size=3, strides=2, padding=‘same’, input_shape=(64, 64, 1), activation=LeakyReLU(alpha=0.2)))
discriminator.add(Flatten())
discriminator.add(Dense(1, activation=‘sigmoid’))
#编译判别器
discriminator.compile(loss=‘binary_crossentropy’, optimizer=Adam())
#GAN模型
gan = Sequential([generator, discriminator])
#编译GAN
discriminator.trainable = False
gan.compile(loss=‘binary_crossentropy’, optimizer=Adam())
#随机噪声生成图像
noise = np.random.normal(0, 1, (1, 100))
generated_image = generator.predict(noise)文章来源:https://www.toymoban.com/news/detail-815007.html
#由于模型较为复杂,这里仅展示了模型架构文章来源地址https://www.toymoban.com/news/detail-815007.html
到了这里,关于机器学习代码示例3(含注释和部分数据)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!