Source code for skl2onnx.convert

# SPDX-License-Identifier: Apache-2.0

import warnings
from uuid import uuid4
from typing import Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
import numpy as np
import sklearn.base
from .proto import get_latest_tested_opset_version
from .common._topology import convert_topology
from .common.utils_sklearn import _process_options
from ._parse import parse_sklearn_model

# Invoke the registration of all our converters and shape calculators.
from . import shape_calculators  # noqa: F401
from . import operator_converters  # noqa: F401


[docs] def convert_sklearn( model, name=None, initial_types=None, doc_string="", target_opset=None, custom_conversion_functions=None, custom_shape_calculators=None, custom_parsers=None, options=None, intermediate=False, white_op=None, black_op=None, final_types=None, dtype=None, naming=None, model_optim=True, verbose=0, ): """ This function produces an equivalent ONNX model of the given scikit-learn model. The supported converters is returned by function :func:`supported_converters <skl2onnx.supported_converters>`. For pipeline conversion, user needs to make sure each component is one of our supported items. This function converts the specified *scikit-learn* model into its *ONNX* counterpart. Note that for all conversions, initial types are required. *ONNX* model name can also be specified. :param model: A scikit-learn model :param initial_types: a python list. Each element is a tuple of a variable name and a type defined in `data_types.py` :param name: The name of the graph (type: GraphProto) in the produced ONNX model (type: ModelProto) :param doc_string: A string attached onto the produced ONNX model :param target_opset: number, for example, 7 for ONNX 1.2, and 8 for ONNX 1.3, if value is not specified, the function will choose the latest tested opset (see :py:func:`skl2onnx.get_latest_tested_opset_version`) :param custom_conversion_functions: a dictionary for specifying the user customized conversion function, it takes precedence over registered converters :param custom_shape_calculators: a dictionary for specifying the user customized shape calculator it takes precedence over registered shape calculators. :param custom_parsers: parsers determines which outputs is expected for which particular task, default parsers are defined for classifiers, regressors, pipeline but they can be rewritten, *custom_parsers* is a dictionary ``{ type: fct_parser(scope, model, inputs, custom_parsers=None) }`` :param options: specific options given to converters (see :ref:`l-conv-options`) :param intermediate: if True, the function returns the converted model and the instance of :class:`Topology` used, it returns the converted model otherwise :param white_op: white list of ONNX nodes allowed while converting a pipeline, if empty, all are allowed :param black_op: black list of ONNX nodes allowed while converting a pipeline, if empty, none are blacklisted :param final_types: a python list. Works the same way as initial_types but not mandatory, it is used to overwrites the type (if type is not None) and the name of every output. :param dtype: removed in version 1.7.5, dtype is now inferred from input types, converters may add operators Cast to switch to double when it is necessary :param naming: the user may want to change the way intermediate are named, this parameter can be a string (a prefix) or a function, which signature is the following: `get_name(name, existing_names)`, the library will then check this name is unique and modify it if not :param model_optim: enable or disable model optimisation after the model was converted into onnx, it reduces the number of identity nodes :param verbose: display progress while converting a model :return: An ONNX model (type: ModelProto) which is equivalent to the input scikit-learn model Example of *initial_types*: Assume that the specified *scikit-learn* model takes a heterogeneous list as its input. If the first 5 elements are floats and the last 10 elements are integers, we need to specify initial types as below. The [None] in [None, 5] indicates the batch size here is unknown. :: from skl2onnx.common.data_types import FloatTensorType, Int64TensorType initial_type = [('float_input', FloatTensorType([None, 5])), ('int64_input', Int64TensorType([None, 10]))] .. note:: If a pipeline includes an instance of `ColumnTransformer <https://scikit-learn.org/stable/modules/ generated/sklearn.compose.ColumnTransformer.html>`_, *scikit-learn* allow the user to specify columns by names. This option is not supported by *sklearn-onnx* as features names could be different in input data and the ONNX graph (defined by parameter *initial_types*), only integers are supported. Converters options ++++++++++++++++++ Some ONNX operators exposes parameters *sklearn-onnx* cannot guess from the raw model. Some default values are usually suggested but the users may have to manually overwrite them. This need is not obvious to do when a model is included in a pipeline. That's why these options can be given to function *convert_sklearn* as a dictionary ``{model_type: parameters in a dictionary}`` or ``{model_id: parameters in a dictionary}``. Option *sep* is used to specify the delimiters between two words when the ONNX graph needs to tokenize a string. The default value is short and may not include all the necessary values. It can be overwritten as: :: extra = {TfidfVectorizer: {"separators": [' ', '[.]', '\\\\?', ',', ';', ':', '\\\\!', '\\\\(', '\\\\)']}} model_onnx = convert_sklearn( model, "tfidf", initial_types=[("input", StringTensorType([None, 1]))], options=extra) But if a pipeline contains two model of the same class, it is possible to distinguish between the two with function *id*: :: extra = {id(model): {"separators": [' ', '.', '\\\\?', ',', ';', ':', '\\\\!', '\\\\(', '\\\\)']}} model_onnx = convert_sklearn( pipeline, "pipeline-with-2-tfidf", initial_types=[("input", StringTensorType([None, 1]))], options=extra) It is used in example :ref:`l-example-tfidfvectorizer`. .. versionchanged:: 1.10.0 Parameter *naming* was added. """ if initial_types is None: if hasattr(model, "infer_initial_types"): initial_types = model.infer_initial_types() else: raise ValueError( "Initial types are required. See usage of " "convert(...) in skl2onnx.convert for details" ) if name is None: name = str(uuid4().hex) if dtype is not None: warnings.warn( "Parameter dtype is no longer supported. It will be removed in 1.9.0.", DeprecationWarning, stacklevel=0, ) target_opset = target_opset if target_opset else get_latest_tested_opset_version() # Parse scikit-learn model as our internal data structure # (i.e., Topology) if verbose >= 1: print("[convert_sklearn] parse_sklearn_model") topology = parse_sklearn_model( model, initial_types, target_opset, custom_conversion_functions, custom_shape_calculators, custom_parsers, options=options, white_op=white_op, black_op=black_op, final_types=final_types, naming=naming, ) # Convert our Topology object into ONNX. The outcome is an ONNX model. options = _process_options(model, options) if verbose >= 1: print("[convert_sklearn] convert_topology") onnx_model = convert_topology( topology, name, doc_string, target_opset, options=options, remove_identity=model_optim and not intermediate, verbose=verbose, ) if verbose >= 1: print("[convert_sklearn] end") if verbose >= 2: scope = topology.scopes[0] print("---INPUTS---") for inp in scope.input_variables: print(" %r" % inp) print("---OUTPUTS---") for inp in scope.output_variables: print(" %r" % inp) print("---VARIABLES---") for k, v in sorted(scope.variables.items()): print(" %r: is.fed=%r is_leaf=%r - %r" % (k, v.is_fed, v.is_leaf, v)) print("---OPERATORS---") for k, v in sorted(scope.operators.items()): print(" %r: is.evaluated=%r - %r" % (k, v.is_evaluated, v)) return (onnx_model, topology) if intermediate else onnx_model
[docs] def to_onnx( model: sklearn.base.BaseEstimator, X: Optional[np.array] = None, name: Optional[str] = None, initial_types: Optional[ List[Tuple[str, Sequence[Optional[Union[int, str]]]]] ] = None, target_opset: Optional[Union[Dict[str, int], int]] = None, options: Optional[Dict] = None, white_op: Optional[Set[str]] = None, black_op: Optional[Set[str]] = None, final_types: Optional[List[Tuple[str, Sequence[Optional[Union[int, str]]]]]] = None, dtype: Optional[np.dtype] = None, naming: Optional[Callable] = None, model_optim: bool = True, verbose: int = 0, ): """ Calls :func:`convert_sklearn` with simplified parameters. :param model: model to convert :param X: training set, can be None, it is used to infered the input types (*initial_types*) :param initial_types: if X is None, then *initial_types* must be defined :param target_opset: conversion with a specific target opset :param options: specific options given to converters (see :ref:`l-conv-options`) :param name: name of the model :param white_op: white list of ONNX nodes allowed while converting a pipeline, if empty, all are allowed :param black_op: black list of ONNX nodes allowed while converting a pipeline, if empty, none are blacklisted :param final_types: a python list. Works the same way as initial_types but not mandatory, it is used to overwrites the type (if type is not None) and the name of every output. :param dtype: removed in version 1.7.5, dtype is now inferred from input types, converters may add operators Cast to switch to double when it is necessary :param naming: the user may want to change the way intermediate are named, this parameter can be a string (a prefix) or a function, which signature is the following: `get_name(name, existing_names)`, the library will then check this name is unique and modify it if not :param model_optim: enable or disable model optimisation after the model was converted into onnx, it reduces the number of identity nodes :param verbose: display progress while converting a model :return: converted model This function checks if the model inherits from class :class:`OnnxOperatorMixin`, it calls method *to_onnx* in that case otherwise it calls :func:`convert_sklearn`. .. versionchanged:: 1.10.0 Parameter *naming* was added. .. versionchanged:: 1.18.0 The main opset is now equal to target_opset and not a value equal or less than the given value. """ from .algebra.onnx_operator_mixin import OnnxOperatorMixin from .algebra.type_helper import guess_initial_types if isinstance(model, OnnxOperatorMixin): if options is not None: raise NotImplementedError( "options not yet implemented for OnnxOperatorMixin." ) return model.to_onnx(X=X, name=name, target_opset=target_opset) if name is None: name = "ONNX(%s)" % model.__class__.__name__ initial_types = guess_initial_types(X, initial_types) if verbose >= 1: print("[to_onnx] initial_types=%r" % initial_types) model = convert_sklearn( model, initial_types=initial_types, target_opset=target_opset, name=name, options=options, white_op=white_op, black_op=black_op, final_types=final_types, dtype=dtype, verbose=verbose, naming=naming, model_optim=model_optim, ) new_target_model = None for op in model.opset_import: if op.domain == "": new_target_model = op.version break expected_target = ( target_opset if not isinstance(target_opset, dict) else target_opset.get("", None) ) if expected_target is not None and new_target_model != expected_target: for op in model.opset_import: if op.domain == "": op.version = expected_target break return model
def wrap_as_onnx_mixin(model, target_opset=None): """ Combines a *scikit-learn* class with :class:`OnnxOperatorMixin` which produces a new object which combines *scikit-learn* API and *OnnxOperatorMixin* API. """ from .algebra.sklearn_ops import find_class cl = find_class(model.__class__) if "automation" in str(cl): raise RuntimeError("Wrong class name '{}'.".format(cl)) state = model.__getstate__() obj = object.__new__(cl) obj.__setstate__(state) obj.op_version = target_opset return obj