- 积分
- 3638
- 贡献
-
- 精华
- 在线时间
- 小时
- 注册时间
- 2014-10-21
- 最后登录
- 1970-1-1
|
楼主 |
发表于 2022-10-6 20:53:07
|
显示全部楼层
2、不同估计器的效果对比
将以上程序改写成函数形式,以估计器和数据集为参数,然后用10种估计器和3个数据集来嵌套循环。
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
def draw(data,clf,figtitle,plt):
# 预处理,把数据分为训练集和测试集:
X, y = data
X = StandardScaler().fit_transform(X)
X_train, X_test, y_train, y_test =train_test_split(X, y, test_size=.4, random_state=42)
# 使用训练集进行训练:
clf.fit(X_train, y_train)
# 以下是生成一些跟X范围差不多的栅格点,用训练好的模型来预测之:
x_min=np.min(X[:, 0])
x_max=np.max(X[:, 0])
y_min=np.min(X[:, 1])
y_max=np.max(X[:, 1])
# 栅格点:
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),np.arange(y_min, y_max, .02))
# 预测栅格点的标记:
if hasattr(clf, "decision_function"):
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
else:
Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]
# 画栅格点:
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.RdBu)
# 叠加训练集的点(圆点):
cm_bright = ListedColormap(['#FF0000', '#0000FF'])
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright,edgecolors='k',marker='o')
# 叠加测试集的点(方点):
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, edgecolors='k',marker='s')
plt.xlim(x_min,x_max)
plt.ylim(y_min,y_max)
plt.title(figtitle)
return plt
if __name__=="__main__":
# 分类器:
classifiers = [
KNeighborsClassifier(3),
SVC(kernel="linear", C=0.025),
SVC(gamma=2, C=1),
GaussianProcessClassifier(1.0 * RBF(1.0)),
DecisionTreeClassifier(max_depth=5),
RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
MLPClassifier(alpha=1, max_iter=1000),
AdaBoostClassifier(),
GaussianNB(),
QuadraticDiscriminantAnalysis()]
# 分类器名称:
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
"Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
"Naive Bayes", "QDA"]
# 构造数据:
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2,
random_state=1, n_clusters_per_class=1)
rng = np.random.RandomState(2)
X += 2 * rng.uniform(size=X.shape)
linearly_separable = (X, y)
# 用于处理的3个数据:
datasets = [make_moons(noise=0.3, random_state=0),
make_circles(noise=0.2, factor=0.5, random_state=1),
linearly_separable]
plt.figure(figsize=(9,27),dpi=200)
pos=1
for i in range(10):
for j in range(3):
# 样例数据:
data=datasets[j]
# 使用的分类器:
clf=classifiers
figtitle=names
plt.subplot(10,3,pos)
result=draw(data,clf,figtitle,plt)
pos=pos+1
result.tight_layout()
result.savefig('000.jpg')
result.close()
效果图中,3列表示3种数据集,10行表示10种估计器
|
|