From 1c0fd47005d3881334efe1f7ace5fe1f4d48ac57 Mon Sep 17 00:00:00 2001 From: Dennis Collaris Date: Sun, 14 Apr 2024 15:25:11 +0200 Subject: [PATCH] fix: force multinomial if both classes are specified in the pmml This is similar to the logic that was there before, but was broken due to scikit-learn refactoring. --- sklearn_pmml_model/ensemble/gb.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/sklearn_pmml_model/ensemble/gb.py b/sklearn_pmml_model/ensemble/gb.py index 3da68c9..975cf82 100644 --- a/sklearn_pmml_model/ensemble/gb.py +++ b/sklearn_pmml_model/ensemble/gb.py @@ -5,10 +5,6 @@ import numpy as np from sklearn.tree import DecisionTreeRegressor from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor -try: - from sklearn.ensemble._gb_losses import MultinomialDeviance -except ImportError: - pass from sklearn_pmml_model.base import PMMLBaseClassifier, PMMLBaseRegressor, IntegerEncodingMixin from sklearn_pmml_model.tree import get_tree from scipy.special import expit @@ -86,6 +82,7 @@ def __init__(self, pmml): self.template_estimator = clf try: + from sklearn.ensemble._gb_losses import MultinomialDeviance self._check_params() if self.n_classes_ == 2 and len(segments) == 3 and segments[-1].find('TreeModel') is None: @@ -96,10 +93,18 @@ def __init__(self, pmml): except AttributeError: self._loss = MultinomialDeviance(self.n_classes_ + 1) self._loss.K = 2 - except AttributeError: + except ImportError: + from sklearn._loss.loss import HalfMultinomialLoss + self._set_max_features() - self._loss = self._get_loss(sample_weight=None) - self.n_trees_per_iteration_ = 1 if self.n_classes_ == 2 else self.n_classes_ + + if self.n_classes_ == 2 and len(segments) == 3 and segments[-1].find('TreeModel') is None: + # For binary classification where both sides are specified, we need to force multinomial deviance + self._loss = HalfMultinomialLoss(sample_weight=None, n_classes=self.n_classes_ + 1) + self.n_trees_per_iteration_ = self.n_classes_ + else: + self._loss = self._get_loss(sample_weight=None) + self.n_trees_per_iteration_ = 1 if self.n_classes_ == 2 else self.n_classes_ try: self.init = None