fix error in xgboost #443

puririshi98 · 2024-08-30T19:17:43Z

_______ test_gbdt_with_save_load[task_type_and_metric4-stypes1-XGBoost] ________
gbdt_cls = <class 'torch_frame.gbdt.tuned_xgboost.XGBoost'>
stypes = [<stype.categorical: 'categorical'>]
task_type_and_metric = (<TaskType.MULTICLASS_CLASSIFICATION: 'multiclass_classification'>, <Metric.ACCURACY: 'accuracy'>)
    @pytest.mark.parametrize('gbdt_cls', [
        CatBoost,
        XGBoost,
        LightGBM,
    ])
    @pytest.mark.parametrize('stypes', [
        [stype.numerical],
        [stype.categorical],
        [stype.text_embedded],
        [stype.numerical, stype.numerical, stype.text_embedded],
    ])
    @pytest.mark.parametrize('task_type_and_metric', [
        (TaskType.REGRESSION, Metric.RMSE),
        (TaskType.REGRESSION, Metric.MAE),
        (TaskType.BINARY_CLASSIFICATION, Metric.ACCURACY),
        (TaskType.BINARY_CLASSIFICATION, Metric.ROCAUC),
        (TaskType.MULTICLASS_CLASSIFICATION, Metric.ACCURACY),
    ])
    def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric):
        task_type, metric = task_type_and_metric
        dataset: Dataset = FakeDataset(
            num_rows=30,
            with_nan=True,
            stypes=stypes,
            create_split=True,
            task_type=task_type,
            col_to_text_embedder_cfg=TextEmbedderConfig(
                text_embedder=HashTextEmbedder(8)),
        )
        dataset.materialize()
        gbdt = gbdt_cls(
            task_type=task_type,
            num_classes=dataset.num_classes
            if task_type == TaskType.MULTICLASS_CLASSIFICATION else None,
            metric=metric,
        )
    
        with tempfile.TemporaryDirectory() as temp_dir:
            path = osp.join(temp_dir, 'model.json')
            with pytest.raises(RuntimeError, match="is not yet fitted"):
                gbdt.save(path)
    
            if isinstance(gbdt_cls, XGBoost):
                gbdt.tune(tf_train=dataset.tensor_frame,
                          tf_val=dataset.tensor_frame, num_trials=2,
                          num_boost_round=1000, early_stopping_rounds=2)
                assert gbdt.model.best_iteration is not None
            else:
>               gbdt.tune(
                    tf_train=dataset.tensor_frame,
                    tf_val=dataset.tensor_frame,
                    num_trials=2,
                    num_boost_round=2,
                )
gbdt/test_gbdt.py:63: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/gbdt.py:88: in tune
    self._tune(tf_train, tf_val, num_trials=num_trials, *args, **kwargs)
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:227: in _tune
    study.optimize(
/usr/local/lib/python3.10/dist-packages/optuna/study/study.py:451: in optimize
    _optimize(
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:62: in _optimize
    _optimize_sequential(
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:159: in _optimize_sequential
    frozen_trial = _run_trial(study, func, catch)
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:247: in _run_trial
    raise func_err
/usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:196: in _run_trial
    value_or_values = func(trial)
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:228: in <lambda>
    lambda trial: self.objective(
/usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:178: in objective
    boost = xgboost.train(self.params, dtrain,
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:726: in inner_f
    return func(**kwargs)
/usr/local/lib/python3.10/dist-packages/xgboost/training.py:181: in train
    bst.update(dtrain, iteration=i, fobj=obj)
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:2100: in update
    _check_call(
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
ret = -1
    def _check_call(ret: int) -> None:
        """Check the return value of C API call
    
        This function will raise exception when error occurs.
        Wrap every API call with this function
    
        Parameters
        ----------
        ret :
            return value from API calls
        """
        if ret != 0:
>           raise XGBoostError(py_str(_LIB.XGBGetLastError()))
E           xgboost.core.XGBoostError: [17:59:51] /home/coder/xgboost/src/gbm/gblinear.cc:147: Check failed: !p_fmat->Info().HasCategorical(): `gblinear` doesn't support categorical features.
E           Stack trace:
E             [bt] (0) /usr/lib/libxgboost.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x78) [0xfff162a309f8]
E             [bt] (1) /usr/lib/libxgboost.so(xgboost::gbm::GBLinear::DoBoost(xgboost::DMatrix*, xgboost::linalg::Tensor<xgboost::detail::GradientPairInternal<float>, 2>*, xgboost::PredictionCacheEntry*, xgboost::ObjFunction const*)+0x608) [0xfff162d0a7f8]
E             [bt] (2) /usr/lib/libxgboost.so(xgboost::LearnerImpl::UpdateOneIter(int, std::shared_ptr<xgboost::DMatrix>)+0x354) [0xfff162d63e54]
E             [bt] (3) /usr/lib/libxgboost.so(XGBoosterUpdateOneIter+0x7c) [0xfff1629876dc]
E             [bt] (4) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x6e10) [0xfffbc7296e10]
E             [bt] (5) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x3a94) [0xfffbc7293a94]
E             [bt] (6) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x121c8) [0xfffbc72c21c8]
E             [bt] (7) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x109ec) [0xfffbc72c09ec]
E             [bt] (8) /usr/bin/python(_PyObject_MakeTpCall+0x28c) [0xaaad7c0fa030]
/usr/local/lib/python3.10/dist-packages/xgboost/core.py:284: XGBoostError

``` _______ test_gbdt_with_save_load[task_type_and_metric4-stypes1-XGBoost] ________ gbdt_cls = <class 'torch_frame.gbdt.tuned_xgboost.XGBoost'> stypes = [<stype.categorical: 'categorical'>] task_type_and_metric = (<TaskType.MULTICLASS_CLASSIFICATION: 'multiclass_classification'>, <Metric.ACCURACY: 'accuracy'>) @pytest.mark.parametrize('gbdt_cls', [ CatBoost, XGBoost, LightGBM, ]) @pytest.mark.parametrize('stypes', [ [stype.numerical], [stype.categorical], [stype.text_embedded], [stype.numerical, stype.numerical, stype.text_embedded], ]) @pytest.mark.parametrize('task_type_and_metric', [ (TaskType.REGRESSION, Metric.RMSE), (TaskType.REGRESSION, Metric.MAE), (TaskType.BINARY_CLASSIFICATION, Metric.ACCURACY), (TaskType.BINARY_CLASSIFICATION, Metric.ROCAUC), (TaskType.MULTICLASS_CLASSIFICATION, Metric.ACCURACY), ]) def test_gbdt_with_save_load(gbdt_cls, stypes, task_type_and_metric): task_type, metric = task_type_and_metric dataset: Dataset = FakeDataset( num_rows=30, with_nan=True, stypes=stypes, create_split=True, task_type=task_type, col_to_text_embedder_cfg=TextEmbedderConfig( text_embedder=HashTextEmbedder(8)), ) dataset.materialize() gbdt = gbdt_cls( task_type=task_type, num_classes=dataset.num_classes if task_type == TaskType.MULTICLASS_CLASSIFICATION else None, metric=metric, ) with tempfile.TemporaryDirectory() as temp_dir: path = osp.join(temp_dir, 'model.json') with pytest.raises(RuntimeError, match="is not yet fitted"): gbdt.save(path) if isinstance(gbdt_cls, XGBoost): gbdt.tune(tf_train=dataset.tensor_frame, tf_val=dataset.tensor_frame, num_trials=2, num_boost_round=1000, early_stopping_rounds=2) assert gbdt.model.best_iteration is not None else: > gbdt.tune( tf_train=dataset.tensor_frame, tf_val=dataset.tensor_frame, num_trials=2, num_boost_round=2, ) gbdt/test_gbdt.py:63: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/gbdt.py:88: in tune self._tune(tf_train, tf_val, num_trials=num_trials, *args, **kwargs) /usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:227: in _tune study.optimize( /usr/local/lib/python3.10/dist-packages/optuna/study/study.py:451: in optimize _optimize( /usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:62: in _optimize _optimize_sequential( /usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:159: in _optimize_sequential frozen_trial = _run_trial(study, func, catch) /usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:247: in _run_trial raise func_err /usr/local/lib/python3.10/dist-packages/optuna/study/_optimize.py:196: in _run_trial value_or_values = func(trial) /usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:228: in <lambda> lambda trial: self.objective( /usr/local/lib/python3.10/dist-packages/torch_frame/gbdt/tuned_xgboost.py:178: in objective boost = xgboost.train(self.params, dtrain, /usr/local/lib/python3.10/dist-packages/xgboost/core.py:726: in inner_f return func(**kwargs) /usr/local/lib/python3.10/dist-packages/xgboost/training.py:181: in train bst.update(dtrain, iteration=i, fobj=obj) /usr/local/lib/python3.10/dist-packages/xgboost/core.py:2100: in update _check_call( _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ ret = -1 def _check_call(ret: int) -> None: """Check the return value of C API call This function will raise exception when error occurs. Wrap every API call with this function Parameters ---------- ret : return value from API calls """ if ret != 0: > raise XGBoostError(py_str(_LIB.XGBGetLastError())) E xgboost.core.XGBoostError: [17:59:51] /home/coder/xgboost/src/gbm/gblinear.cc:147: Check failed: !p_fmat->Info().HasCategorical(): `gblinear` doesn't support categorical features. E Stack trace: E [bt] (0) /usr/lib/libxgboost.so(dmlc::LogMessageFatal::~LogMessageFatal()+0x78) [0xfff162a309f8] E [bt] (1) /usr/lib/libxgboost.so(xgboost::gbm::GBLinear::DoBoost(xgboost::DMatrix*, xgboost::linalg::Tensor<xgboost::detail::GradientPairInternal<float>, 2>*, xgboost::PredictionCacheEntry*, xgboost::ObjFunction const*)+0x608) [0xfff162d0a7f8] E [bt] (2) /usr/lib/libxgboost.so(xgboost::LearnerImpl::UpdateOneIter(int, std::shared_ptr<xgboost::DMatrix>)+0x354) [0xfff162d63e54] E [bt] (3) /usr/lib/libxgboost.so(XGBoosterUpdateOneIter+0x7c) [0xfff1629876dc] E [bt] (4) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x6e10) [0xfffbc7296e10] E [bt] (5) /usr/lib/aarch64-linux-gnu/libffi.so.8(+0x3a94) [0xfffbc7293a94] E [bt] (6) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x121c8) [0xfffbc72c21c8] E [bt] (7) /usr/lib/python3.10/lib-dynload/_ctypes.cpython-310-aarch64-linux-gnu.so(+0x109ec) [0xfffbc72c09ec] E [bt] (8) /usr/bin/python(_PyObject_MakeTpCall+0x28c) [0xaaad7c0fa030] /usr/local/lib/python3.10/dist-packages/xgboost/core.py:284: XGBoostError ```

for more information, see https://pre-commit.ci

zechengz

Thanks.

akihironitta

It'd be good to add a quick test case to cover what this PR fixes :P

akihironitta

Oh, it was indeed covered by the test case. Do you know why we didn't catch this in our CI?

puririshi98 requested a review from yiweny August 30, 2024 19:17

[pre-commit.ci] auto fixes from pre-commit.com hooks

186a908

for more information, see https://pre-commit.ci

puririshi98 self-assigned this Aug 30, 2024

zechengz approved these changes Sep 3, 2024

View reviewed changes

zechengz added the skip-changelog label Sep 3, 2024

yiweny merged commit 200b962 into master Sep 4, 2024
13 of 14 checks passed

yiweny deleted the fix-xgboost branch September 4, 2024 01:27

akihironitta reviewed Sep 5, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix error in xgboost #443

fix error in xgboost #443

puririshi98 commented Aug 30, 2024

zechengz left a comment

akihironitta left a comment

akihironitta left a comment

fix error in xgboost #443

fix error in xgboost #443

Conversation

puririshi98 commented Aug 30, 2024

zechengz left a comment

Choose a reason for hiding this comment

akihironitta left a comment

Choose a reason for hiding this comment

akihironitta left a comment

Choose a reason for hiding this comment