Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion onnxmltools/convert/lightgbm/_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
Int64Type,
)

from lightgbm import LGBMClassifier, LGBMRegressor
from lightgbm import LGBMClassifier, LGBMRegressor, LGBMRanker

lightgbm_classifier_list = [LGBMClassifier]

Expand All @@ -21,6 +21,7 @@
lightgbm_operator_name_map = {
LGBMClassifier: "LgbmClassifier",
LGBMRegressor: "LgbmRegressor",
LGBMRanker: "LgbmRanker",
}


Expand All @@ -39,6 +40,8 @@ def __init__(self, booster):
("regression", "poisson", "gamma", "quantile", "huber", "tweedie")
):
self.operator_name = "LgbmRegressor"
elif self.objective_.startswith(("lambdarank", "rank_xendcg")):
self.operator_name = "LgbmRanker"
else:
raise NotImplementedError(
"Unsupported LightGbm objective: %r." % self.objective_
Expand Down
5 changes: 5 additions & 0 deletions onnxmltools/convert/lightgbm/operator_converters/LightGbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,10 @@ def convert_lightgbm(scope, operator, container):
# so we need to add an 'Exp' post transform node to the model
attrs["post_transform"] = "NONE"
post_transform = "Exp"
elif gbm_text["objective"].startswith(("lambdarank", "rank_xendcg")):
n_classes = 1 # Ranker has only one output variable
attrs["n_targets"] = n_classes
attrs["post_transform"] = "NONE"
else:
raise RuntimeError(
"LightGBM objective should be cleaned already not '{}'.".format(
Expand Down Expand Up @@ -1026,3 +1030,4 @@ def convert_lgbm_zipmap(scope, operator, container):
register_converter("LgbmClassifier", convert_lightgbm)
register_converter("LgbmRegressor", convert_lightgbm)
register_converter("LgbmZipMap", convert_lgbm_zipmap)
register_converter("LgbmRanker", convert_lightgbm)
6 changes: 6 additions & 0 deletions onnxmltools/convert/lightgbm/shape_calculators/Ranker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# SPDX-License-Identifier: Apache-2.0

from ...common._registration import register_shape_calculator
from ...common.shape_calculator import calculate_linear_regressor_output_shapes

register_shape_calculator("LgbmRanker", calculate_linear_regressor_output_shapes)
1 change: 1 addition & 0 deletions onnxmltools/convert/lightgbm/shape_calculators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
# To register shape calculators for lightgbm operators, import associated modules here.
from . import Classifier
from . import Regressor
from . import Ranker
79 changes: 69 additions & 10 deletions tests/lightgbm/test_objective_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,30 @@
import numpy as np
import onnxruntime
import pandas as pd
from lightgbm import Booster, Dataset, LGBMRanker, LGBMRegressor
from onnx import ModelProto
from onnx.defs import onnx_opset_version
from onnxmltools.convert.common.onnx_ex import DEFAULT_OPSET_NUMBER
from onnxmltools.convert.common.data_types import DoubleTensorType, TensorType
from onnxmltools import convert_lightgbm
from onnxruntime import InferenceSession
from pandas.core.frame import DataFrame

from lightgbm import LGBMRegressor, Booster, Dataset
from onnxmltools import convert_lightgbm
from onnxmltools.convert.common.data_types import DoubleTensorType, TensorType
from onnxmltools.convert.common.onnx_ex import DEFAULT_OPSET_NUMBER

_N_ROWS = 10_000
_N_COLS = 10
_N_DECIMALS = 5
_FRAC = 0.9997

# Used with LGBM Ranker
_N_ITEMS_PER_GROUP = 10

_X = pd.DataFrame(np.random.random(size=(_N_ROWS, _N_COLS)))
_Y = pd.Series(np.random.random(size=_N_ROWS))

# Use integer labels when using LGBM ranking models
_Y_RANKING = pd.Series(np.random.randint(0, 5, size=_N_ROWS))

_DTYPE_MAP: Dict[str, TensorType] = {
"float64": DoubleTensorType,
}
Expand All @@ -40,13 +46,17 @@ class ObjectiveTest(unittest.TestCase):
"tweedie",
)

_ranker_objectives: Tuple[str] = (
"lambdarank",
"rank_xendcg",
)

@staticmethod
def _calc_initial_types(X: DataFrame) -> List[Tuple[str, TensorType]]:
dtypes = set(str(dtype) for dtype in X.dtypes)
if len(dtypes) > 1:
raise RuntimeError(
f"Test expects homogenous input matrix. "
f"Found multiple dtypes: {dtypes}."
f"Test expects homogenous input matrix. Found multiple dtypes: {dtypes}."
)
dtype = dtypes.pop()
tensor_type = _DTYPE_MAP[dtype]
Expand Down Expand Up @@ -120,6 +130,47 @@ def test_objective_LGBMRegressor(self):
frac=_FRAC,
)

def _get_data_group_sizes(self, X: DataFrame) -> np.array:
"""
Returns group sizes for ranking tasks.
For 10,000 rows with 10 items per group, this returns
an array of length 1,000, where each element is the number 10.
"""
n_groups = X.shape[0] // _N_ITEMS_PER_GROUP
return np.full(fill_value=_N_ITEMS_PER_GROUP, shape=n_groups)

def test_objective_LGBMRanker(self):
"""
Test if a LGBMRanker a with certain objective (e.g. 'lambdarank')
can be converted to ONNX
and whether the ONNX graph and the original model produce
almost equal predictions.

Note that this tests is a bit flaky because of precision
differences with ONNX and LightGBM
and therefore sometimes fails randomly. In these cases,
a retry should resolve the issue.
"""
for objective in self._ranker_objectives:
with self.subTest(X=_X, objective=objective):
ranker = LGBMRanker(objective=objective, num_thread=1)
groups = self._get_data_group_sizes(_X)

ranker.fit(_X, _Y_RANKING, group=groups)
ranker_onnx: ModelProto = convert_lightgbm(
ranker,
initial_types=self._calc_initial_types(_X),
target_opset=TARGET_OPSET,
)
y_pred = ranker.predict(_X)
y_pred_onnx = self._predict_with_onnx(ranker_onnx, _X)
self._assert_almost_equal(
y_pred,
y_pred_onnx,
decimal=_N_DECIMALS,
frac=_FRAC,
)

def test_objective_Booster(self):
"""
Test if a Booster a with certain objective (e.g. 'poisson')
Expand All @@ -132,12 +183,20 @@ def test_objective_Booster(self):
and therefore sometimes fails randomly. In these cases,
a retry should resolve the issue.
"""
for objective in self._regressor_objectives:
objectives = self._regressor_objectives + self._ranker_objectives

for objective in objectives:
with self.subTest(X=_X, objective=objective):
ds = Dataset(_X, feature_name="auto").construct()
ds.set_label(_Y)
if objective in self._ranker_objectives:
groups = self._get_data_group_sizes(_X)
ds = Dataset(_X, feature_name="auto", group=groups).construct()
ds.set_label(_Y_RANKING)
else:
ds = Dataset(_X, feature_name="auto").construct()
ds.set_label(_Y)

regressor = Booster(params={"objective": objective}, train_set=ds)
for k in range(10):
for _ in range(10):
regressor.update()

regressor_onnx: ModelProto = convert_lightgbm(
Expand Down