diff --git a/onnxmltools/convert/lightgbm/_parse.py b/onnxmltools/convert/lightgbm/_parse.py index b31710b30..f4c21eef4 100644 --- a/onnxmltools/convert/lightgbm/_parse.py +++ b/onnxmltools/convert/lightgbm/_parse.py @@ -11,7 +11,7 @@ Int64Type, ) -from lightgbm import LGBMClassifier, LGBMRegressor +from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker lightgbm_classifier_list = [LGBMClassifier] @@ -21,6 +21,7 @@ lightgbm_operator_name_map = { LGBMClassifier: "LgbmClassifier", LGBMRegressor: "LgbmRegressor", + LGBMRanker: "LgbmRanker", } @@ -39,6 +40,8 @@ def __init__(self, booster): ("regression", "poisson", "gamma", "quantile", "huber", "tweedie") ): self.operator_name = "LgbmRegressor" + elif self.objective_.startswith(("lambdarank", "rank_xendcg")): + self.operator_name = "LgbmRanker" else: raise NotImplementedError( "Unsupported LightGbm objective: %r." % self.objective_ diff --git a/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py b/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py index b6ed558b5..df6fd9d14 100644 --- a/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py +++ b/onnxmltools/convert/lightgbm/operator_converters/LightGbm.py @@ -566,6 +566,10 @@ def convert_lightgbm(scope, operator, container): # so we need to add an 'Exp' post transform node to the model attrs["post_transform"] = "NONE" post_transform = "Exp" + elif gbm_text["objective"].startswith(("lambdarank", "rank_xendcg")): + n_classes = 1 # Ranker has only one output variable + attrs["n_targets"] = n_classes + attrs["post_transform"] = "NONE" else: raise RuntimeError( "LightGBM objective should be cleaned already not '{}'.".format( @@ -1026,3 +1030,4 @@ def convert_lgbm_zipmap(scope, operator, container): register_converter("LgbmClassifier", convert_lightgbm) register_converter("LgbmRegressor", convert_lightgbm) register_converter("LgbmZipMap", convert_lgbm_zipmap) +register_converter("LgbmRanker", convert_lightgbm) \ No newline at end of file diff --git a/onnxmltools/convert/lightgbm/shape_calculators/Ranker.py b/onnxmltools/convert/lightgbm/shape_calculators/Ranker.py new file mode 100644 index 000000000..989ce0d38 --- /dev/null +++ b/onnxmltools/convert/lightgbm/shape_calculators/Ranker.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: Apache-2.0 + +from ...common._registration import register_shape_calculator +from ...common.shape_calculator import calculate_linear_regressor_output_shapes + +register_shape_calculator("LgbmRanker", calculate_linear_regressor_output_shapes) \ No newline at end of file diff --git a/onnxmltools/convert/lightgbm/shape_calculators/__init__.py b/onnxmltools/convert/lightgbm/shape_calculators/__init__.py index e7a2c3d9b..7fd1224e6 100644 --- a/onnxmltools/convert/lightgbm/shape_calculators/__init__.py +++ b/onnxmltools/convert/lightgbm/shape_calculators/__init__.py @@ -3,3 +3,4 @@ # To register shape calculators for lightgbm operators, import associated modules here. from . import Classifier from . import Regressor +from . import Ranker \ No newline at end of file diff --git a/tests/lightgbm/test_objective_functions.py b/tests/lightgbm/test_objective_functions.py index 3a414d6e4..2bf96b920 100644 --- a/tests/lightgbm/test_objective_functions.py +++ b/tests/lightgbm/test_objective_functions.py @@ -4,24 +4,30 @@ import numpy as np import onnxruntime import pandas as pd +from lightgbm import Booster, Dataset, LGBMRanker, LGBMRegressor from onnx import ModelProto from onnx.defs import onnx_opset_version -from onnxmltools.convert.common.onnx_ex import DEFAULT_OPSET_NUMBER -from onnxmltools.convert.common.data_types import DoubleTensorType, TensorType -from onnxmltools import convert_lightgbm from onnxruntime import InferenceSession from pandas.core.frame import DataFrame -from lightgbm import LGBMRegressor, Booster, Dataset +from onnxmltools import convert_lightgbm +from onnxmltools.convert.common.data_types import DoubleTensorType, TensorType +from onnxmltools.convert.common.onnx_ex import DEFAULT_OPSET_NUMBER _N_ROWS = 10_000 _N_COLS = 10 _N_DECIMALS = 5 _FRAC = 0.9997 +# Used with LGBM Ranker +_N_ITEMS_PER_GROUP = 10 + _X = pd.DataFrame(np.random.random(size=(_N_ROWS, _N_COLS))) _Y = pd.Series(np.random.random(size=_N_ROWS)) +# Use integer labels when using LGBM ranking models +_Y_RANKING = pd.Series(np.random.randint(0, 5, size=_N_ROWS)) + _DTYPE_MAP: Dict[str, TensorType] = { "float64": DoubleTensorType, } @@ -40,13 +46,17 @@ class ObjectiveTest(unittest.TestCase): "tweedie", ) + _ranker_objectives: Tuple[str] = ( + "lambdarank", + "rank_xendcg", + ) + @staticmethod def _calc_initial_types(X: DataFrame) -> List[Tuple[str, TensorType]]: dtypes = set(str(dtype) for dtype in X.dtypes) if len(dtypes) > 1: raise RuntimeError( - f"Test expects homogenous input matrix. " - f"Found multiple dtypes: {dtypes}." + f"Test expects homogenous input matrix. Found multiple dtypes: {dtypes}." ) dtype = dtypes.pop() tensor_type = _DTYPE_MAP[dtype] @@ -120,6 +130,47 @@ def test_objective_LGBMRegressor(self): frac=_FRAC, ) + def _get_data_group_sizes(self, X: DataFrame) -> np.array: + """ + Returns group sizes for ranking tasks. + For 10,000 rows with 10 items per group, this returns + an array of length 1,000, where each element is the number 10. + """ + n_groups = X.shape[0] // _N_ITEMS_PER_GROUP + return np.full(fill_value=_N_ITEMS_PER_GROUP, shape=n_groups) + + def test_objective_LGBMRanker(self): + """ + Test if a LGBMRanker a with certain objective (e.g. 'lambdarank') + can be converted to ONNX + and whether the ONNX graph and the original model produce + almost equal predictions. + + Note that this tests is a bit flaky because of precision + differences with ONNX and LightGBM + and therefore sometimes fails randomly. In these cases, + a retry should resolve the issue. + """ + for objective in self._ranker_objectives: + with self.subTest(X=_X, objective=objective): + ranker = LGBMRanker(objective=objective, num_thread=1) + groups = self._get_data_group_sizes(_X) + + ranker.fit(_X, _Y_RANKING, group=groups) + ranker_onnx: ModelProto = convert_lightgbm( + ranker, + initial_types=self._calc_initial_types(_X), + target_opset=TARGET_OPSET, + ) + y_pred = ranker.predict(_X) + y_pred_onnx = self._predict_with_onnx(ranker_onnx, _X) + self._assert_almost_equal( + y_pred, + y_pred_onnx, + decimal=_N_DECIMALS, + frac=_FRAC, + ) + def test_objective_Booster(self): """ Test if a Booster a with certain objective (e.g. 'poisson') @@ -132,12 +183,20 @@ def test_objective_Booster(self): and therefore sometimes fails randomly. In these cases, a retry should resolve the issue. """ - for objective in self._regressor_objectives: + objectives = self._regressor_objectives + self._ranker_objectives + + for objective in objectives: with self.subTest(X=_X, objective=objective): - ds = Dataset(_X, feature_name="auto").construct() - ds.set_label(_Y) + if objective in self._ranker_objectives: + groups = self._get_data_group_sizes(_X) + ds = Dataset(_X, feature_name="auto", group=groups).construct() + ds.set_label(_Y_RANKING) + else: + ds = Dataset(_X, feature_name="auto").construct() + ds.set_label(_Y) + regressor = Booster(params={"objective": objective}, train_set=ds) - for k in range(10): + for _ in range(10): regressor.update() regressor_onnx: ModelProto = convert_lightgbm(