python - sklearn中VotingClassifier的继承

我正试图修改sklearnVotingClassifier以在RandomSearchCV中使用。
其思想是，对于数量更多的分类器，可能的权重组合会爆炸，并且更好地由单个权重选择来表示，而不是由许多不同的元组来表示。此外，这还允许更改为更智能的超参数调整方法，因为在权重更改中有信息。
因此，如何正确地对VotingClassifier类进行子类划分，因为下面的代码要么没有传递给权重，要么使用默认值，搜索结果抱怨权重不受参数（它们是参数）控制。

class VotingClassifier2(VotingClassifier):
    def __init__(self, estimators, w1, w2, voting='soft', weights=None, n_jobs=None, flatten_transform=True):
        super().__init__(estimators, voting, weights, n_jobs, flatten_transform)
        if w1:
            tot=w1+w2
        else:
            breakpoint()
        self.weights = (w1/tot, w2/tot)


pipe = Pipeline(
    [
        [
            "vc",
            VotingClassifier2(
                estimators=[
                    ("xgb", XGBClassifier()),
                    ('lr', LogisticRegression(fit_intercept=True, max_iter=300, solver='lbfgs'))

                ],
                voting="soft",
                weights=None,
                w1=1,
                w2=0

            ),
        ]
    ]
)


opt = RandomizedSearchCV(
    pipe,
    {
        "vc__w1": uniform(0.1, 1),
        "vc__w2": uniform(0.1, 1)
    },
    n_iter=5,
    cv=5,
    n_jobs=25,
    return_train_score=False,
    error_score='raise'
)

最初调用时，w1和w2为none，但已根据需要从输入计算了权重。然后搜索运行，无法设置它们。

RuntimeError: Cannot clone object VotingClassifier2(estimators=[('xgb', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1,
       max_delta_step=0, max_depth=3, min_child_weight=1, missing=None,
       n_estimators=100, n_jobs=1, nthread=None,
       objectiv...alty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False))]))],
         flatten_transform=True, n_jobs=None, voting='soft', w1=None,
         w2=None, weights=(1.0, 0.0)), as the constructor either does not set or modifies parameter weights

最佳答案

RandomizedSearchCV通过属性更改估计器的参数，因此如果要修改weights属性以及w1和w2，可以用propertydecorator包装它们。另一种选择是直接包装weights，例如：

import scipy as sp
from dask_ml.xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import RandomizedSearchCV


class VotingClassifier2(VotingClassifier):
    @property
    def weights(self):
        return self._weights

    @weights.setter
    def weights(self, value):
        if isinstance(value, float):
            value = [value, 1-value]
        self._weights = value


# setup a client based on your environment
client = ...

pipe = Pipeline(
    [
        [
            "vc",
            VotingClassifier2(
                estimators=[
                    ("xgb", XGBClassifier(sheduler=client)),
                    ('lr', LogisticRegression(fit_intercept=True, max_iter=300, solver='lbfgs'))

                ],
                voting="soft",
                weights=[.5, .5],
            ),
        ]
    ]
)


opt = RandomizedSearchCV(
    pipe,
    {
        "vc__weights": sp.stats.uniform(0.1, 1),
    },
    n_iter=5,
    cv=5,
    n_jobs=25,
    return_train_score=False,
    error_score='raise'
)

编辑：
如果确实需要使用w1和w2方法参数，则应将它们绑定到weights并从weights方法参数中删除__init__：


class VotingClassifier2(VotingClassifier):
    def __init__(self, estimators, w1, w2, voting='soft', n_jobs=None, flatten_transform=True):
        super().__init__(estimators, voting, [w1, w2], n_jobs, flatten_transform)
        self.w1 = w1
        self.w2 = w2

    @property
    def w1(self):
        return self.weights[0]

    @w1.setter
    def w1(self, value):
        if value is not None:
            self.weights[0] = value

    @property
    def w2(self):
        return self.weights[1]

    @w2.setter
    def w2(self, value):
        if value is not None:
            self.weights[1] = value

关于python - sklearn中VotingClassifier的继承，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/57198838/