Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

there is assertion error in case test_crossvalidator of tests/test_tuning.py:99 #371

Open
johnnyzhon opened this issue Aug 15, 2023 · 0 comments

Comments

@johnnyzhon
Copy link

Environment:
4 V100 gpu
spark-rapids-ml branch-23.08
CUML 23.08
CUDA 12.1
python3.9
spark3.4.0

pytest error:
src/spark_rapids_ml/tree.py:260: ValueError
_________________ test_crossvalidator[(100, 8)-float32-vector] _________________

tmp_path = '/tmp/spark_rapids_ml_tests_3ywlntku', feature_type = 'vector'
data_type = <class 'numpy.float32'>, data_shape = (100, 8)

@pytest.mark.parametrize("feature_type", [feature_types.vector])
@pytest.mark.parametrize("data_type", [np.float32])
@pytest.mark.parametrize("data_shape", [(100, 8)], ids=idfn)
def test_crossvalidator(
    tmp_path: str,
    feature_type: str,
    data_type: np.dtype,
    data_shape: Tuple[int, int],
) -> None:
    X, _, y, _ = make_regression_dataset(
        datatype=data_type,
        nrows=data_shape[0],
        ncols=data_shape[1],
    )

    with CleanSparkSession() as spark:
        df, features_col, label_col = create_pyspark_dataframe(
            spark, feature_type, data_type, X, y
        )
        assert label_col is not None

        rfc = RandomForestRegressor()
        rfc.setFeaturesCol(features_col)
        rfc.setLabelCol(label_col)

        evaluator = RegressionEvaluator()
        evaluator.setLabelCol(label_col)

        grid = ParamGridBuilder().addGrid(rfc.maxBins, [3, 5]).build()

        cv = CrossValidator(
            estimator=rfc,
            estimatorParamMaps=grid,
            evaluator=evaluator,
            numFolds=2,
            seed=101,
        )

        def check_cv(cv_est: Union[CrossValidator, CrossValidatorModel]) -> None:
            assert isinstance(cv_est, (CrossValidator, CrossValidatorModel))
            assert isinstance(cv_est.getEstimator(), RandomForestRegressor)
            assert isinstance(cv_est.getEvaluator(), RegressionEvaluator)
            assert cv_est.getNumFolds() == 2
            assert cv_est.getSeed() == 101
            assert cv_est.getEstimatorParamMaps() == grid

        check_cv(cv)

        path = tmp_path + "/cv"
        cv_path = f"{path}/cv"

        cv.write().overwrite().save(cv_path)
        cv_loaded = CrossValidator.load(cv_path)

        check_cv(cv_loaded)

        cv_model = cv.fit(df)
        check_cv(cv_model)

        cv_model_path = f"{path}/cv-model"
        cv_model.write().overwrite().save(cv_model_path)
        cv_model_loaded = CrossValidatorModel.load(cv_model_path)

        check_cv(cv_model_loaded)
      assert evaluator.evaluate(cv_model.transform(df)) == evaluator.evaluate(
            cv_model_loaded.transform(df)
        )

E assert 129.9102859946124 == 129.91028599461237
E + where 129.9102859946124 = (DataFrame[label_col: float, features: array, prediction: double])
E + where = RegressionEvaluator_c12c2d118a71.evaluate
E + and DataFrame[label_col: float, features: array, prediction: double] = (DataFrame[label_col: float, features: vector])
E + where = CrossValidatorModel_4072bbc9e0e1.transform
E + and 129.91028599461237 = (DataFrame[label_col: float, features: array, prediction: double])
E + where = RegressionEvaluator_c12c2d118a71.evaluate
E + and DataFrame[label_col: float, features: array, prediction: double] = (DataFrame[label_col: float, features: vector])
E + where = CrossValidatorModel_4072bbc9e0e1.transform

tests/test_tuning.py:99: AssertionError

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant