sklearn 组合分类器 | 组合分类器

组合分类器：

组合分类器有4种方法：

(1)通过处理训练数据集。如baging boosting

(2)通过处理输入特征。如 Random forest

(3)通过处理类标号。error_correcting output coding

(4)通过处理学习算法。如voting

1 bagging

 from sklearn.ensemble import BaggingClassifier

 from sklearn.neighbors import KNeighborsClassifier

 meta_clf = KNeighborsClassifier()

 bg_clf = BaggingClassifier(meta_clf, max_samples=0.5, max_features=0.5)

2 adaboosting

 from sklearn.ensemble import AdaBoostClassifier

 bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),

                          algorithm="SAMME",

                          n_estimators=200)

 bdt.fit(X, y)

3 voting

 from sklearn import datasets

 from sklearn import cross_validation

 from sklearn.linear_model import LogisticRegression

 from sklearn.naive_bayes import GaussianNB

 from sklearn.ensemble import RandomForestClassifier

 from sklearn.ensemble import VotingClassifier

 iris = datasets.load_iris()

 X, y = iris.data[:, 1:3], iris.target

 clf1 = LogisticRegression(random_state=1)

 clf2 = RandomForestClassifier(random_state=1)

 clf3 = GaussianNB()

 eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard', weights=[2,1,2])

 for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):

     scores = cross_validation.cross_val_score(clf, X, y, cv=5, scoring='accuracy')

     print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))