ValueError Traceback (most recent call last)
Cell In[33], line 2
1 gnb = GaussianNB()
----> 2 cv = cross_val_score(gnb,X_train,y_train,cv=5, error_score = 'raise')
3 print(cv)
4 print(cv.mean())
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:515, in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)
512 # To ensure multimetric format is not supported
513 scorer = check_scoring(estimator, scoring=scoring)
--> 515 cv_results = cross_validate(
516 estimator=estimator,
517 X=X,
518 y=y,
519 groups=groups,
520 scoring={"score": scorer},
521 cv=cv,
522 n_jobs=n_jobs,
523 verbose=verbose,
524 fit_params=fit_params,
525 pre_dispatch=pre_dispatch,
526 error_score=error_score,
527 )
528 return cv_results["test_score"]
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:266, in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)
263 # We clone the estimator to make sure that all the folds are
264 # independent, and that it is pickle-able.
265 parallel = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
--> 266 results = parallel(
267 delayed(_fit_and_score)(
268 clone(estimator),
269 X,
270 y,
271 scorers,
272 train,
273 test,
274 verbose,
275 None,
276 fit_params,
277 return_train_score=return_train_score,
278 return_times=True,
279 return_estimator=return_estimator,
280 error_score=error_score,
281 )
282 for train, test in cv.split(X, y, groups)
283 )
285 _warn_or_raise_about_fit_failures(results, error_score)
287 # For callabe scoring, the return type is only know after calling. If the
288 # return type is a dictionary, the error scores can now be inserted with
289 # the correct key.
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:63, in Parallel.__call__(self, iterable)
58 config = get_config()
59 iterable_with_config = (
60 (_with_config(delayed_func, config), args, kwargs)
61 for delayed_func, args, kwargs in iterable
62 )
---> 63 return super().__call__(iterable_with_config)
File /opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1918, in Parallel.__call__(self, iterable)
1916 output = self._get_sequential_output(iterable)
1917 next(output)
-> 1918 return output if self.return_generator else list(output)
1920 # Let's create an ID that uniquely identifies the current call. If the
1921 # call is interrupted early and that the same instance is immediately
1922 # re-used, this id will be used to prevent workers that were
1923 # concurrently finalizing a task from the previous call to run the
1924 # callback.
1925 with self._lock:
File /opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1847, in Parallel._get_sequential_output(self, iterable)
1845 self.n_dispatched_batches += 1
1846 self.n_dispatched_tasks += 1
-> 1847 res = func(*args, **kwargs)
1848 self.n_completed_tasks += 1
1849 self.print_progress()
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:123, in _FuncWrapper.__call__(self, *args, **kwargs)
121 config = {}
122 with config_context(**config):
--> 123 return self.function(*args, **kwargs)
File /opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:686, in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)
684 estimator.fit(X_train, **fit_params)
685 else:
--> 686 estimator.fit(X_train, y_train, **fit_params)
688 except Exception:
689 # Note fit time as time until error
690 fit_time = time.time() - start_time
File /opt/conda/lib/python3.10/site-packages/sklearn/naive_bayes.py:267, in GaussianNB.fit(self, X, y, sample_weight)
265 self._validate_params()
266 y = self._validate_data(y=y)
--> 267 return self._partial_fit(
268 X, y, np.unique(y), _refit=True, sample_weight=sample_weight
269 )
File /opt/conda/lib/python3.10/site-packages/sklearn/naive_bayes.py:427, in GaussianNB._partial_fit(self, X, y, classes, _refit, sample_weight)
424 if _refit:
425 self.classes_ = None
--> 427 first_call = _check_partial_fit_first_call(self, classes)
428 X, y = self._validate_data(X, y, reset=first_call)
429 if sample_weight is not None:
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/multiclass.py:420, in _check_partial_fit_first_call(clf, classes)
413 raise ValueError(
414 "`classes=%r` is not the same as on last call "
415 "to partial_fit, was: %r" % (classes, clf.classes_)
416 )
418 else:
419 # This is the first call to partial_fit
--> 420 clf.classes_ = unique_labels(classes)
421 return True
423 # classes is None and clf.classes_ has already previously been set:
424 # nothing to do
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/multiclass.py:107, in unique_labels(*ys)
105 _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None)
106 if not _unique_labels:
--> 107 raise ValueError("Unknown label type: %s" % repr(ys))
109 if is_array_api:
110 # array_api does not allow for mixed dtypes
111 unique_ys = xp.concat([_unique_labels(y) for y in ys])
ValueError: Unknown label type: (array([0.0, 1.0], dtype=object),)`
我正在尝试在 Kaggle 上的泰坦尼克号太空船项目中实现交叉验证,您可以在此处找到我的完整笔记本 我无法运行交叉验证功能。我已经摆弄了几个小时,但一直找不到解决方案。 我按照
Ken Jee 的泰坦尼克号项目示例 完成了我自己的 泰坦尼克号预测提交 在此代码中,交叉验证函数可以成功运行,我尝试将其重新用于泰坦尼克号太空船项目。 如果您需要任何其他详细信息,请告诉我。我对机器学习很陌生,所以我提前感谢您的耐心。
I've tried getting rid of null values, verified that the data types for my training and test lists are the same, and I've tried mapping categorical variables to numerical variables.
遇到的错误“ValueError:未知标签类型:(array([0.0, 1.0], dtype=object),)”表明的目标变量(
,这表示它可能包含混合类型或被视为字符串。Scikit-learn 期望目标变量为数值类型(通常为整数)用于分类。
检查数据类型: 在调用
。python print(y_train.dtype)
转换数据类型: 如果数据类型为
方法执行此操作:python y_train = y_train.astype(int)
将它们转换为数值:python from sklearn.preprocessing import LabelEncoder le = LabelEncoder() y_train = le.fit_transform(y_train)
在数据源处查找问题: 错误的数据类型可能表示数据加载或预处理过程中存在问题。仔细检查从CSV文件或其他数据源加载数据的方式,并确保目标列包含预期类型的值。
检查空值: 确保的目标变量
中没有空值或NaN值。可以使用以下方法处理空值: - 删除包含空值的行。
- 使用平均值、中位数或最频繁值等策略填充空值。