python - 以f1为评分函数的网格搜索，几页错误信息

想使用 Gridsearch 寻找最佳参数并使用 f1 作为评分指标。

如果我删除评分功能，一切正常，我不会出错。

这是我的代码:

from sklearn import grid_search
parameters = {'n_neighbors':(1,3,5,10,15),'weights':('uniform','distance'),'algorithm':('ball_tree','kd_tree','brute'),'leaf_size':(5,10,20,30,50)}
reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring="f1")
train_classifier(reg, X_train, y_train)
train_f1_score = predict_labels(reg, X_train, y_train)
print reg.best_params_
print "F1 score for training set: {}".format(train_f1_score)
print "F1 score for test set: {}".format(predict_labels(reg, X_test, y_test))

当我执行时，我将页面上的页面作为错误，并且我无法对其进行正面或反面:(

ValueError                                Traceback (most recent call last)
<ipython-input-17-3083ff8a20ea> in <module>()
      3 parameters = {'n_neighbors':(1,3,5,10,15),'weights':('uniform','distance'),'algorithm':('ball_tree','kd_tree','brute'),'leaf_size':(5,10,20,30,50)}
      4 reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring="f1")
----> 5 train_classifier(reg, X_train, y_train)
      6 train_f1_score = predict_labels(reg, X_train, y_train)
      7 print reg.best_params_

<ipython-input-9-b56ce25fd90b> in train_classifier(clf, X_train, y_train)
      5     print "Training {}...".format(clf.__class__.__name__)
      6     start = time.time()
----> 7     clf.fit(X_train, y_train)
      8     end = time.time()
      9     print "Done!\nTraining time (secs): {:.3f}".format(end - start)

//anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in fit(self, X, y)
    802
    803         """
--> 804         return self._fit(X, y, ParameterGrid(self.param_grid))
    805
    806

//anaconda/lib/python2.7/site-packages/sklearn/grid_search.pyc in _fit(self, X, y, parameter_iterable)
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
    554                 for train, test in cv)
    555

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self, iterable)
    802             self._iterating = True
    803
--> 804             while self.dispatch_one_batch(iterator):
    805                 pass
    806

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in dispatch_one_batch(self, iterator)
    660                 return False
    661             else:
--> 662                 self._dispatch(tasks)
    663                 return True
    664

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in _dispatch(self, batch)
    568
    569         if self._pool is None:
--> 570             job = ImmediateComputeBatch(batch)
    571             self._jobs.append(job)
    572             self.n_dispatched_batches += 1

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __init__(self, batch)
    181         # Don't delay the application, to avoid keeping the input
    182         # arguments in memory
--> 183         self.results = batch()
    184
    185     def get(self):

//anaconda/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.pyc in __call__(self)
     70
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
     73
     74     def __len__(self):

//anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, error_score)
   1548
   1549     else:
-> 1550         test_score = _score(estimator, X_test, y_test, scorer)
   1551         if return_train_score:
   1552             train_score = _score(estimator, X_train, y_train, scorer)

//anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc in _score(estimator, X_test, y_test, scorer)
   1604         score = scorer(estimator, X_test)
   1605     else:
-> 1606         score = scorer(estimator, X_test, y_test)
   1607     if not isinstance(score, numbers.Number):
   1608         raise ValueError("scoring must return a number, got %s (%s) instead."

//anaconda/lib/python2.7/site-packages/sklearn/metrics/scorer.pyc in __call__(self, estimator, X, y_true, sample_weight)
     88         else:
     89             return self._sign * self._score_func(y_true, y_pred,
---> 90                                                  **self._kwargs)
     91
     92

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight)
    637     return fbeta_score(y_true, y_pred, 1, labels=labels,
    638                        pos_label=pos_label, average=average,
--> 639                        sample_weight=sample_weight)
    640
    641

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight)
    754                                                  average=average,
    755                                                  warn_for=('f-score',),
--> 756                                                  sample_weight=sample_weight)
    757     return f
    758

//anaconda/lib/python2.7/site-packages/sklearn/metrics/classification.pyc in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight)
    982                 else:
    983                     raise ValueError("pos_label=%r is not a valid label: %r" %
--> 984                                      (pos_label, present_labels))
    985             labels = [pos_label]
    986     if labels is None:

ValueError: pos_label=1 is not a valid label: array(['no', 'yes'],
      dtype='|S3')

最佳答案

似乎您有值为“no”和“yes”的标签数组，您应该将它们转换为二进制 1-0 数字表示，因为您的错误指出评分函数无法理解 0 和 1 在标签数组中的位置。

在不修改标签数组的情况下解决它的其他可能方法:

from sklearn.metrics import f1_score
from sklearn.metrics import make_scorer

f1_scorer = make_scorer(f1_score, pos_label="yes")
reg = grid_search.GridSearchCV(estimator=neigh,param_grid=parameters,scoring=f1_scorer)

关于python - 以f1为评分函数的网格搜索，几页错误信息，我们在Stack Overflow上找到一个类似的问题：https://stackoverflow.com/questions/34221712/