-
Notifications
You must be signed in to change notification settings - Fork 218
Fix/xgboost multitarget tree info #761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,15 @@ | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
|
|
||
| from ...common._registration import register_shape_calculator | ||
| from ...common.shape_calculator import calculate_linear_regressor_output_shapes | ||
| from ...common.data_types import FloatTensorType | ||
| from ..common import get_xgb_params | ||
|
|
||
| register_shape_calculator("XGBRegressor", calculate_linear_regressor_output_shapes) | ||
| register_shape_calculator("XGBRFRegressor", calculate_linear_regressor_output_shapes) | ||
|
|
||
| def calculate_xgboost_regressor_output_shapes(operator): | ||
| N = operator.inputs[0].type.shape[0] | ||
| n_targets = get_xgb_params(operator.raw_operator).get("n_targets", 1) | ||
| operator.outputs[0].type = FloatTensorType([N, n_targets]) | ||
|
Comment on lines
+8
to
+11
|
||
|
|
||
|
|
||
| register_shape_calculator("XGBRegressor", calculate_xgboost_regressor_output_shapes) | ||
| register_shape_calculator("XGBRFRegressor", calculate_xgboost_regressor_output_shapes) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,9 @@ | |
|
|
||
| import unittest | ||
|
|
||
| import numpy as np | ||
| from numpy.testing import assert_allclose | ||
|
Comment on lines
+5
to
+6
|
||
|
|
||
| try: | ||
| from xgboost import XGBRegressor | ||
| except Exception: | ||
|
|
@@ -34,8 +37,9 @@ def xgbregressor_shape_calculator(operator): | |
| convert_xgboost, | ||
| ) | ||
| # Your data and labels | ||
| X = np.random.rand(100, 10) | ||
| y = np.random.rand(100, 2) | ||
| rng = np.random.default_rng() | ||
| X = rng.random((100, 10)) | ||
| y = rng.random((100, 2)) | ||
|
|
||
| # Train XGBoost regressor | ||
| model = xgboost.XGBRegressor( | ||
|
|
@@ -58,6 +62,62 @@ def xgbregressor_shape_calculator(operator): | |
| got = sess.run(None, {"float_input": X.astype(np.float32)}) | ||
| self.assertEqual(got[0].shape, (100, 2)) | ||
|
|
||
| @unittest.skipIf(XGBRegressor is None, "xgboost is not available") | ||
| def test_issue_676_values(self): | ||
| import onnxruntime | ||
| import xgboost | ||
| from onnxmltools.convert import convert_xgboost | ||
| from onnxmltools.convert.common.data_types import FloatTensorType | ||
|
|
||
| rng = np.random.default_rng(0) | ||
| X = rng.random((50, 10)).astype(np.float32) | ||
| y = rng.random((50, 10)) | ||
|
|
||
| model = xgboost.XGBRegressor(objective="reg:squarederror", n_estimators=3) | ||
| model.fit(X, y) | ||
|
|
||
| onnx_model = convert_xgboost( | ||
| model, initial_types=[("float_input", FloatTensorType([None, 10]))] | ||
| ) | ||
| sess = onnxruntime.InferenceSession( | ||
| onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] | ||
| ) | ||
| got = sess.run(None, {"float_input": X})[0] | ||
| expected = model.predict(X) | ||
|
|
||
| self.assertEqual(got.shape, (50, 10)) | ||
| assert_allclose(got, expected, rtol=1e-5, atol=1e-5) | ||
|
|
||
| @unittest.skipIf(XGBRegressor is None, "xgboost is not available") | ||
| def test_quantile_regression(self): | ||
| import onnxruntime | ||
| import xgboost | ||
| from onnxmltools.convert import convert_xgboost | ||
| from onnxmltools.convert.common.data_types import FloatTensorType | ||
|
|
||
| rng = np.random.default_rng(0) | ||
| X = rng.random((20, 3)).astype(np.float32) | ||
| y = rng.random(20) | ||
|
|
||
| model = xgboost.XGBRegressor( | ||
| objective="reg:quantileerror", | ||
| quantile_alpha=[0.1, 0.5, 0.9], | ||
| n_estimators=3, | ||
| ) | ||
| model.fit(X, y) | ||
|
|
||
| onnx_model = convert_xgboost( | ||
| model, initial_types=[("input", FloatTensorType([None, 3]))] | ||
| ) | ||
| sess = onnxruntime.InferenceSession( | ||
| onnx_model.SerializeToString(), providers=["CPUExecutionProvider"] | ||
| ) | ||
| got = sess.run(None, {"input": X})[0] | ||
| expected = model.predict(X) | ||
|
|
||
| self.assertEqual(got.shape, (20, 3)) | ||
| assert_allclose(got, expected, rtol=1e-5, atol=1e-5) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| unittest.main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
tree_infois obtained by callingsave_raw(raw_format="json")and thenjson.loads(...), which parses the entire model a second time (in addition toget_dump(..., dump_format="json")). For large multi-target models this can significantly increase conversion time/memory. Consider extracting only thetree_infoarray (e.g., via a lightweight parse/regex similar toonnxmltools/convert/xgboost/_parse.py:_get_attributes) or using a deterministic mapping (like round-robintreeid % n_targets) as a fallback when full JSON parsing isn’t necessary.