From 83b52663eb32077f43e82fcd4bec56dd14c67dfd Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Tue, 13 Feb 2024 22:35:32 +0000 Subject: [PATCH] [bug] fix ensemble algo _onedal_gpu_supported logic (#1696) (#1710) * Update _forest.py * Update deselected_tests.yaml * this is definitely going to fail CI * Update deselected_tests.yaml * Update deselected_tests.yaml * Update deselected_tests.yaml * Update test_forest.py * Update test_forest.py (cherry picked from commit e0a405c213d154be3f713f07285bd4fc11d34862) Co-authored-by: Ian Faust --- deselected_tests.yaml | 32 +++++++++++++---------- sklearnex/ensemble/_forest.py | 4 +-- sklearnex/ensemble/tests/test_forest.py | 34 +++++++++++-------------- 3 files changed, 36 insertions(+), 34 deletions(-) diff --git a/deselected_tests.yaml b/deselected_tests.yaml index 933b265a43..f4cd725505 100755 --- a/deselected_tests.yaml +++ b/deselected_tests.yaml @@ -441,8 +441,6 @@ gpu: - ensemble/tests/test_bagging.py::test_gridsearch - ensemble/tests/test_bagging.py::test_estimators_samples - ensemble/tests/test_common.py::test_ensemble_heterogeneous_estimators_behavior - - ensemble/tests/test_forest.py::test_min_samples_split[RandomForestClassifier] - - ensemble/tests/test_forest.py::test_min_weight_fraction_leaf - ensemble/tests/test_voting.py::test_parallel_fit - ensemble/tests/test_voting.py::test_sample_weight @@ -640,8 +638,6 @@ gpu: - model_selection/tests/test_search.py::test_random_search_cv_results # Segmentation faults on GPU - - ensemble/tests/test_forest.py::test_forest_classifier_oob - - ensemble/tests/test_forest.py::test_forest_regressor_oob - tests/test_common.py::test_search_cv - manifold/tests/test_t_sne.py::test_n_iter_without_progress @@ -736,15 +732,25 @@ gpu: - tests/test_common.py::test_f_contiguous_array_estimator[TSNE] - manifold/tests/test_t_sne.py::test_tsne_works_with_pandas_output - # GPU ensemble (Random Forest and Extra Trees) algorithms have a different - # implementation compared to CPU and require further validation - - ensemble/tests/test_forest.py::test_importances[ExtraTreesClassifier-gini-float64] - - ensemble/tests/test_forest.py::test_importances[ExtraTreesClassifier-gini-float32] - - ensemble/tests/test_forest.py::test_importances[ExtraTreesRegressor-squared_error-float64] - - ensemble/tests/test_forest.py::test_importances[ExtraTreesRegressor-squared_error-float32] - - ensemble/tests/test_forest.py::test_importances[RandomForestClassifier-gini-float32] - - ensemble/tests/test_forest.py::test_importances[RandomForestRegressor-squared_error-float64] - - ensemble/tests/test_forest.py::test_importances[RandomForestRegressor-squared_error-float32] + # GPU Forest algorithm implementation does not follow certain Scikit-learn standards + - ensemble/tests/test_forest.py::test_max_leaf_nodes_max_depth + - ensemble/tests/test_forest.py::test_min_samples_split[ExtraTreesClassifier] + - ensemble/tests/test_forest.py::test_min_samples_split[RandomForestClassifier] + - ensemble/tests/test_forest.py::test_min_samples_split[ExtraTreesRegressor] + - ensemble/tests/test_forest.py::test_max_samples_boundary_regressors + + # numerical issues in GPU Forest algorithms which require further investigation + - ensemble/tests/test_forest.py::test_forest_classifier_oob[X0-y0-0.9-array-ExtraTreesClassifier] + - ensemble/tests/test_forest.py::test_forest_classifier_oob[X0-y0-0.9-array-RandomForestClassifier] + - ensemble/tests/test_forest.py::test_forest_classifier_oob[X1-y1-0.65-array-RandomForestClassifier] + - ensemble/tests/test_forest.py::test_forest_classifier_oob[X2-y2-0.65-array-ExtraTreesClassifier] + - ensemble/tests/test_forest.py::test_forest_classifier_oob[X2-y2-0.65-array-RandomForestClassifier] + - ensemble/tests/test_forest.py::test_forest_regressor_oob[X0-y0-0.7-array-RandomForestRegressor] + - ensemble/tests/test_stacking.py::test_stacking_regressor_drop_estimator + - ensemble/tests/test_voting.py::test_predict_on_toy_problem[42] + - tests/test_common.py::test_estimators[ExtraTreesClassifier()-check_class_weight_classifiers] + - tests/test_common.py::test_estimators[ExtraTreesRegressor()-check_sample_weights_invariance(kind=zeros)] + - tests/test_common.py::test_estimators[RandomForestRegressor()-check_regressor_data_not_an_array] # GPU implementation of Extra Trees doesn't support sample_weights # comparisons to GPU with sample weights will use different algorithms diff --git a/sklearnex/ensemble/_forest.py b/sklearnex/ensemble/_forest.py index 716eb71dff..fa2015c155 100644 --- a/sklearnex/ensemble/_forest.py +++ b/sklearnex/ensemble/_forest.py @@ -745,7 +745,7 @@ def _onedal_gpu_supported(self, method_name, *data): or self.estimator.__class__ == DecisionTreeClassifier, "ExtraTrees only supported starting from oneDAL version 2023.1", ), - (sample_weight is not None, "sample_weight is not supported."), + (sample_weight is None, "sample_weight is not supported."), ] ) @@ -1052,7 +1052,7 @@ def _onedal_gpu_supported(self, method_name, *data): or self.estimator.__class__ == DecisionTreeClassifier, "ExtraTrees only supported starting from oneDAL version 2023.1", ), - (sample_weight is not None, "sample_weight is not supported."), + (sample_weight is None, "sample_weight is not supported."), ] ) diff --git a/sklearnex/ensemble/tests/test_forest.py b/sklearnex/ensemble/tests/test_forest.py index 4429d2f1db..80dec7dce7 100644 --- a/sklearnex/ensemble/tests/test_forest.py +++ b/sklearnex/ensemble/tests/test_forest.py @@ -45,11 +45,7 @@ def test_sklearnex_import_rf_classifier(dataframe, queue): assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]]))) -# TODO: -# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) def test_sklearnex_import_rf_regression(dataframe, queue): from sklearnex.ensemble import RandomForestRegressor @@ -59,17 +55,17 @@ def test_sklearnex_import_rf_regression(dataframe, queue): rf = RandomForestRegressor(max_depth=2, random_state=0).fit(X, y) assert "sklearnex" in rf.__module__ pred = _as_numpy(rf.predict([[0, 0, 0, 0]])) - if daal_check_version((2024, "P", 0)): - assert_allclose([-6.971], pred, atol=1e-2) + + if queue is not None and queue.sycl_device.is_gpu: + assert_allclose([-0.011208], pred, atol=1e-2) else: - assert_allclose([-6.839], pred, atol=1e-2) + if daal_check_version((2024, "P", 0)): + assert_allclose([-6.971], pred, atol=1e-2) + else: + assert_allclose([-6.839], pred, atol=1e-2) -# TODO: -# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) def test_sklearnex_import_et_classifier(dataframe, queue): from sklearnex.ensemble import ExtraTreesClassifier @@ -90,11 +86,7 @@ def test_sklearnex_import_et_classifier(dataframe, queue): assert_allclose([1], _as_numpy(rf.predict([[0, 0, 0, 0]]))) -# TODO: -# investigate failure for `dpnp.ndarrays` and `dpctl.tensors` on `GPU` -@pytest.mark.parametrize( - "dataframe,queue", get_dataframes_and_queues(device_filter_="cpu") -) +@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues()) def test_sklearnex_import_et_regression(dataframe, queue): from sklearnex.ensemble import ExtraTreesRegressor @@ -114,4 +106,8 @@ def test_sklearnex_import_et_regression(dataframe, queue): ] ) ) - assert_allclose([0.445], pred, atol=1e-2) + + if queue is not None and queue.sycl_device.is_gpu: + assert_allclose([1.909769], pred, atol=1e-2) + else: + assert_allclose([0.445], pred, atol=1e-2)