Source code for skl2onnx.common._topology

# SPDX-License-Identifier: Apache-2.0


import re
import warnings
import pprint
from logging import getLogger
from collections import OrderedDict
import numpy as np
from onnx import onnx_pb as onnx_proto
from onnx.helper import (
    make_graph,
    make_model,
    make_tensor_value_info,
    make_node,
    make_function,
)
from skl2onnx.common.data_types import (
    DataType,
    TensorType,
    FloatTensorType,
    Int64TensorType,
    StringTensorType,
    DoubleTensorType,
    Int32TensorType,
    BooleanTensorType,
    Int8TensorType,
    UInt8TensorType,
)

from ..proto import get_opset_number_from_onnx, get_latest_tested_opset_version
from . import _registration
from . import utils
from .exceptions import MissingShapeCalculator, MissingConverter
from ._container import ModelComponentContainer, _build_options
from .onnx_optimisation_identity import onnx_remove_node_identity

type_fct = type


def _default_OPSET_TO_IR_VERSION():
    return {
        1: 3,
        2: 3,
        3: 3,
        4: 3,
        5: 3,
        6: 3,
        7: 3,
        8: 4,
        9: 4,
        10: 5,
        11: 6,
        12: 7,
        13: 7,
        14: 7,
        15: 8,
        16: 8,
        17: 8,
        18: 8,
        19: 9,
        20: 9,
        21: 10,
    }


try:
    from skl2onnx.topology import OPSET_TO_IR_VERSION

    assert OPSET_TO_IR_VERSION[18] is not None
except (ImportError, KeyError):
    OPSET_TO_IR_VERSION = _default_OPSET_TO_IR_VERSION()

OPSET_ML_TO_OPSET = {1: 11, 2: 15, 3: 18}

logger = getLogger("skl2onnx")


def get_default_opset_for_domain(domain):
    """
    Returns the associated for a domain given the main opset.
    """
    from .. import __max_supported_opset__ as main_opset

    if domain == "":
        return main_opset
    if domain == "ai.onnx.ml":
        if main_opset >= 18:
            return 3
        if main_opset >= 6:
            return 2
        return 1
    if domain == "ai.onnx.training":
        return 1
    return None



[docs]
class Variable:
    """
    Defines a variable which holds any data defined
    from *ONNX* types.
    """

    _UNIQUE_NUMBER_ = 0

    def __init__(self, raw_name, onnx_name, scope, type=None):
        """
        :param raw_name: A string indicating the variable's name in the
                         original model. Usually, it's the seed string
                         used to created its ONNX name (i.e., the
                         field *onnx_name* below).
        :param onnx_name: A string indicating the variable's name in
                          the converted model
        :param scope: A string. It's the name of the scope where this
                      variable is declared
        :param type: A type object defined in .common.data_types.py;
                     e.g., FloatTensorType
        """
        if not isinstance(raw_name, str):
            raise TypeError("raw_name must be a string not '%s'." % raw_name.__class__)
        if type is not None and not hasattr(type, "shape"):
            raise TypeError(
                "Unexpected type for variable raw_name=%r, type=%r." % (raw_name, type)
            )
        if not isinstance(onnx_name, str) or "(" in onnx_name:
            if onnx_name.startswith("u(") and onnx_name[-1] == ")":
                onnx_name0 = onnx_name
                if scope is None:
                    onnx_name = "UU%03dUU" % Variable._UNIQUE_NUMBER_
                    Variable._UNIQUE_NUMBER_ += 1
                else:
                    onnx_name = scope.get_unique_variable_name("U")
                logger.debug(
                    "[Var] rename raw_name=%r, onnx_name=%r into %r",
                    raw_name,
                    onnx_name0,
                    onnx_name,
                )
            else:
                raise TypeError("onnx_name must be a string not %r." % onnx_name)

        if type is not None:
            shape = type.shape
            if shape is not None:
                not_none = [v for v in shape if v is not None]
                if len(not_none) and min(not_none) == 0:
                    raise RuntimeError(
                        "A variable cannot be empty, raw_name=%r, "
                        "onnx_name=%r, shape=%r, type=%r."
                        % (raw_name, onnx_name, shape, type)
                    )

        self._raw_name = raw_name
        self._onnx_name = onnx_name
        self._scope = scope
        self._type = type
        self._parent = None

        # The following fields are bool variables used in parsing and
        # compiling stages
        self._is_fed = None
        self._is_root = None
        self._is_leaf = None
        if self.type is not None and not isinstance(self.type, DataType):
            raise TypeError("shape must be a DataType not {}.".format(self.type))
        if isinstance(self.type, TensorType):
            shape = self.type.shape
            if not isinstance(shape, (list, tuple)):
                try:
                    shape = list(shape)
                except TypeError:
                    raise TypeError(  # noqa: B904
                        "shape must be a tuple or a list not "
                        "{}.".format(type_fct(shape))
                    )
            for dim in shape:
                if dim is None:
                    continue
                if not isinstance(dim, (int, np.int32, np.int64, np.intc)):
                    raise TypeError(
                        "shape must contains integers not %r (type=%r)."
                        "" % (dim, dim.__class__)
                    )
        logger.debug("[Var] +%s", self)

        # links to operators using those variables
        self.operators_outputs_ = []
        self.operators_inputs_ = []
        self._check()

    def _check(self):
        if self.type is not None and self.type.shape is not None:
            for k in self.type.shape:
                if k is None:
                    continue
                if not isinstance(k, (int, np.integer)):
                    raise ValueError(
                        "Unexpected type %r for shape %r." % (type(k), self)
                    )

    @property
    def raw_name(self):
        return self._raw_name

    @property
    def onnx_name(self):
        return self._onnx_name

    @property
    def scope(self):
        return self._scope

    @property
    def type(self):
        return self._type

    @property
    def is_fed(self):
        return self._is_fed

    @property
    def is_root(self):
        return self._is_root

    @property
    def is_leaf(self):
        return self._is_leaf

    def init_status(self, is_fed=None, is_root=None, is_leaf=None):
        if is_fed is not None and is_fed != self.is_fed:
            logger.debug(
                "[Var] update is_fed=%r for %r, parent=%r", is_fed, self, self._parent
            )
            self._is_fed = is_fed
        if is_root is not None and is_root != self.is_root:
            logger.debug("[Var] update is_root=%r for %r", is_root, self)
            self._is_root = is_root
        if is_leaf is not None and is_leaf != self.is_leaf:
            logger.debug("[Var] update is_leaf=%r for %r", is_leaf, self)
            self._is_leaf = is_leaf

    def __setattr__(self, name, value):
        if name == "type":
            self.set_type(value)
        elif name == "onnx_name":
            raise AttributeError("You must use method set_onnx_name.")
        elif name in {"is_fed", "is_root", "is_leaf"}:
            raise AttributeError("You must use method init_status.")
        elif name in {"scope", "raw_name"}:
            raise AttributeError("scope or raw_name cannot be changed.")
        self.__dict__[name] = value

    def set_type(self, new_type):
        if (
            new_type is None
            or isinstance(new_type, (str, Variable))
            or not hasattr(new_type, "shape")
        ):
            raise TypeError(
                "Unexpected new type for variable %r, new_type=%r." % (self, new_type)
            )
        logger.debug("[Var] update type for %r", self)
        self._type = new_type
        self._check()

    def set_onnx_name(self, onnx_name):
        if onnx_name != self._onnx_name:
            logger.debug(
                "[Var] update onnx_name, from %r to %r in %r",
                self.onnx_name,
                onnx_name,
                self,
            )
            if self.scope is not None and not isinstance(self.scope, str):
                self.scope.rename_onnx_name(self._onnx_name, onnx_name)
            self._onnx_name = onnx_name

    def set_parent(self, operator):
        if self._parent is not None:
            raise RuntimeError(
                "This variable is already the output of operator %r. "
                "It cannot be the output of %r." % (self._parent, operator)
            )
        logger.debug("[Var] set parent for %r, parent=%r", self, operator)
        self._parent = operator

    def get_first_dimension(self):
        """
        Returns the first dimension (batch dimension) or
        None if not specified (shape is empty).
        """
        if self.type is None or self.type.shape is None or len(self.type.shape) == 0:
            return None
        return self.type.shape[0]

    def get_second_dimension(self):
        if self.type is None or self.type.shape is None or len(self.type.shape) < 2:
            return None
        return self.type.shape[1]

    @property
    def full_name(self):
        """
        Return a globally unique variable ID
        """
        return self.onnx_name

    def __repr__(self):
        return "Variable('{0}', '{1}', type={2})".format(
            self.raw_name, self.onnx_name, self.type
        )

    @staticmethod
    def from_pb(obj):
        """
        Creates a data type from a protobuf object.
        """

        def get_dim(d):
            r = d.dim_value
            if "dim_param" in str(d):
                return None
            if r == 0:
                # dim_value is 0 when it is 0 or undefined
                return 0 if "0" in str(d) else None
            return r

        def get_shape(tt):
            return [get_dim(tt.shape.dim[i]) for i in range(len(tt.shape.dim))]

        if hasattr(obj, "extend"):
            return [Variable.from_pb(o) for o in obj]

        name = obj.name
        if obj.type.tensor_type:
            tt = obj.type.tensor_type
            elem = tt.elem_type
            shape = get_shape(tt)
            if elem == onnx_proto.TensorProto.FLOAT:
                ty = FloatTensorType(shape)
            elif elem == onnx_proto.TensorProto.BOOL:
                ty = BooleanTensorType(shape)
            elif elem == onnx_proto.TensorProto.DOUBLE:
                ty = DoubleTensorType(shape)
            elif elem == onnx_proto.TensorProto.STRING:
                ty = StringTensorType(shape)
            elif elem == onnx_proto.TensorProto.INT64:
                ty = Int64TensorType(shape)
            elif elem == onnx_proto.TensorProto.INT32:
                ty = Int32TensorType(shape)
            elif UInt8TensorType is not None and elem == onnx_proto.TensorProto.UINT8:
                ty = UInt8TensorType(shape)
            elif Int8TensorType is not None and elem == onnx_proto.TensorProto.INT8:
                ty = Int8TensorType(shape)
            elif elem == 0:
                ty = FloatTensorType(shape)
            else:
                raise NotImplementedError(
                    "Unsupported type '{}' (elem_type={}).".format(
                        type(obj.type.tensor_type), elem
                    )
                )
        else:
            raise NotImplementedError(
                "Unsupported type '{}' as a string ({}).".format(type(obj), obj)
            )

        return Variable(name, name, None, ty)

    def __iter__(self):
        "Enables expression such as `a,b = self`."
        yield self.onnx_name
        yield self.type

    def __getitem__(self, index):
        if index == 0:
            return self.onnx_name
        if index == 1:
            return self.type
        raise IndexError("Unreachable element at index %d." % index)

    def add_operator(self, op, in_or_out):
        "Add a link to an operator, True for output, False for input."
        if in_or_out:
            self.operators_outputs_.append(op)
        else:
            self.operators_inputs_.append(op)

    def check_compatible_type(self, other_type):
        def empty_shape(shape):
            return shape is None or len(shape) == 0

        if self.type is None:
            if other_type is None:
                return
        elif other_type is not None:  # noqa: SIM102
            if isinstance(self.type, type(other_type)):
                if self.type.shape == other_type.shape:
                    return
                if empty_shape(other_type.shape):
                    return
        raise TypeError(
            "Incompatible type for variable %r and type %r." % (self, other_type)
        )



class VariableStr(Variable):
    """
    Defines a variable a string. This should be avoided.
    """

    def __init__(self, name, scope=None, type=None):
        Variable.__init__(self, name, name, scope=scope, type=type)

    @property
    def raw_name(self):
        return self._raw_name

    @property
    def onnx_name(self):
        if self._onnx_name.startswith("u("):
            raise RuntimeError(
                "Variable should be renamed as onnx_name=%r." % self._onnx_name
            )
        return self._onnx_name



[docs]
class Operator:
    """
    Defines an operator available in *ONNX*.
    """

    class OperatorList(list):
        def __init__(self, parent, kind):
            super(Operator.OperatorList, self).__init__()
            self.parent = parent
            self.kind = kind

        def __eq__(self, second):
            raise NotImplementedError("Operator equal not implemented and not needed.")

        def append(self, v):
            if not isinstance(v, Variable):
                raise TypeError(
                    "Input and output must be of type Variable not %r." % type(v)
                )
            if self.kind == "Out":
                v.set_parent(self.parent)
            super(Operator.OperatorList, self).append(v)
            logger.debug("[Op] add %s %r to %r", self.kind, v, self.parent)
            if self.kind == "In":
                v.add_operator(self.parent, False)
            elif self.kind == "Out":
                v.add_operator(self.parent, True)
            else:
                raise RuntimeError("Unexpected value for kind=%r." % self.kind)

        def extend(self, vs):
            for v in vs:
                self.append(v)

        def __getitem__(self, i):
            v = list.__getitem__(self, i)
            if isinstance(i, int) and not isinstance(v, Variable):
                raise TypeError("Element %d must be a Variable not %r." % (i, type(v)))
            return v

        def __setitem__(self, i, v):
            raise LookupError("Setter should not be used to modify an element.")

        def set_element(self, i, v):
            "Updates element i."
            if not isinstance(v, Variable):
                raise TypeError("Value v must be a Variable not %r." % type(v))
            logger.debug(
                "[Op] %s-change element %d from %r to %r in %r",
                self.kind,
                i,
                self[i],
                v,
                self.parent,
            )
            list.__setitem__(self, i, v)

        def to_string(self):
            names = []
            for o in self:
                if hasattr(o, "onnx_name"):
                    names.append(o.onnx_name)
                else:
                    names.append('"%s"' % str(o))
            return ",".join(names)

    def __init__(self, onnx_name, scope, type, raw_operator, target_opset, scope_inst):
        """
        :param onnx_name: A unique ID, which is a string
        :param scope: The name of the scope where this operator is
                      declared. It's a string.
        :param type: A object which uniquely characterizes the type of
                     this operator. For example, it can be a string,
                     pooling, if this operator is associated with a
                     CoreML pooling layer.
        :param raw_operator: The original operator which defines this operator;
                             for example, a scikit-learn Imputer and
                             a CoreML Normalizer.
        :param target_opset: The target opset number for the converted model.
        :param scope_inst: :class:`Scope` instance the operator belongs to
        """
        if isinstance(raw_operator, str):
            raise RuntimeError(
                "Parameter raw_operator must be an object not "
                "a string '{0}'.".format(raw_operator)
            )
        # operator name in the converted model, if raw_operator
        # is not None, output_shapes can be guessed
        # from the raw model. Otherwise, it can be guessed
        # from the input shapes.
        self.onnx_name = onnx_name
        self.scope = scope
        self.type = type
        self.raw_operator = raw_operator
        self.inputs = Operator.OperatorList(self, "In")
        self.outputs = Operator.OperatorList(self, "Out")
        self._is_evaluated = None
        self.target_opset = target_opset
        self.scope_inst = scope_inst
        logger.debug("[Op] +%r", self)

    def new_raw_operator(self, raw_operator, alias):
        """
        Returns a shallow copy of this operator,
        changes the raw_operator but keeps the same inputs
        and outputs.
        """
        op = Operator(
            self.onnx_name,
            self.scope,
            alias,
            raw_operator,
            self.target_opset,
            self.scope_inst,
        )
        op.inputs = self.inputs
        op.outputs = self.outputs
        return op

    def __repr__(self):
        try:
            textop = repr(self.raw_operator)
        except AttributeError:
            textop = "MISSING OP"
        except KeyError:
            # The line above fails for python 3.7
            textop = type(self.raw_operator)
        if isinstance(textop, str) and "\n" in textop:
            textop = textop.replace("\n", "").replace(" ", "")
        return (
            "Operator(type='{0}', onnx_name='{1}', inputs='{2}', "
            "outputs='{3}', raw_operator={4})".format(
                self.type,
                self.onnx_name,
                self.inputs.to_string(),
                self.outputs.to_string(),
                textop,
            )
        )

    def __setattr__(self, name, value):
        if name in ("inputs", "outputs"):
            if isinstance(value, list) and not isinstance(value, Operator.OperatorList):
                if name == "inputs":
                    self.inputs = Operator.OperatorList(self, "In")
                    self.inputs.extend(value)
                    return
                if name == "outputs":
                    self.outputs = Operator.OperatorList(self, "Out")
                    self.outputs.extend(value)
                    return
            if not isinstance(value, Operator.OperatorList):
                raise TypeError(
                    "inputs or outputs must be of type Operator.OperatorList."
                )
            ioo = name == "outputs"
            for v in value:
                v.add_operator(self, ioo)
        self.__dict__[name] = value

    @property
    def is_evaluated(self):
        return self._is_evaluated

    def init_status(self, is_evaluated=None):
        if is_evaluated is not None and is_evaluated != self.is_evaluated:
            logger.debug("[Op] update is_evaluated=%r for %r", is_evaluated, self)
            self._is_evaluated = is_evaluated

    @property
    def full_name(self):
        """
        Return a globally unique operator ID
        """
        return self.onnx_name

    @property
    def input_full_names(self):
        """
        Return all input variables' names
        """
        return [variable.full_name for variable in self.inputs]

    @property
    def output_full_names(self):
        """
        Return all output variables' names
        """
        return [variable.full_name for variable in self.outputs]

    @property
    def original_operator(self):
        """
        Return the original operator/layer
        """
        return self.raw_operator

    def infer_types(self):
        # Invoke a core inference function
        if self.type is None:
            raise MissingShapeCalculator(
                "Unable to find a shape calculator for type '{}'.".format(
                    type(self.raw_operator)
                )
            )
        try:
            shape_calc = _registration.get_shape_calculator(self.type)
        except ValueError:
            raise MissingShapeCalculator(  # noqa: B904
                "Unable to find a shape calculator for alias '{}' "
                "and type '{}'.".format(self.type, type(self.raw_operator))
            )
        if shape_calc is None:
            raise MissingShapeCalculator(
                "Unexpected shape calculator for alias '{}' "
                "and type '{}'.".format(self.type, type(self.raw_operator))
            )
        logger.debug(
            "[Shape-a] %r fed %r - %r",
            self,
            "".join(str(i.is_fed) for i in self.inputs),
            "".join(str(i.is_fed) for i in self.outputs),
        )
        shape_calc(self)
        logger.debug(
            "[Shape-b] %r inputs=%r - outputs=%r", self, self.inputs, self.outputs
        )




[docs]
class Scope:
    """
    Every node of an *ONNX* graph must be unique. This class holds the list
    of existing name for every node already defined in graph. It also
    provides functions to create a unique unused name.
    """

    def __init__(
        self,
        name,
        target_opset=None,
        custom_shape_calculators=None,
        options=None,
        registered_models=None,
        naming=None,
    ):
        """
        :param name: A string, the unique ID of this scope in a
                     Topology object
        :param target_opset: The target opset number for the converted
                             model.
        :param custom_conversion_functions: a dictionary for specifying
                                the user customized conversion function
        :param custom_shape_calculators: a dictionary for specifying
                                the user customized shape calculator
        :param options: see :ref:`l-conv-options`
        :param naming: the user may want to change the way intermediate
            are named, this parameter can be a string (a prefix) or a
            function, which signature is the following:
            `get_name(name, existing_names)`, the library will then
            check this name is unique and modify it if not
        :param registered_models: registered models

        .. versionchanged:: 1.10.0
            Parameter *naming* was added.
        """
        self.name = name
        self.onnx_variable_names = set()
        self.onnx_operator_names = set()
        self.target_opset = target_opset
        self.custom_shape_calculators = custom_shape_calculators

        # An one-to-many map from raw variable name to ONNX variable
        # names. It looks like
        # (key, value) = (raw_name, [onnx_name, onnx_name1, onnx_name2, ..., onnx_nameN]) # noqa
        # The last name may hide all other names in this scope.
        self.variable_name_mapping = {}

        # A map of local variables defined in this scope.
        # (key, value) = (onnx_name, variable)
        self.variables = OrderedDict()
        self.input_variables = []
        self.output_variables = []

        # A map of local operators defined in this scope.
        # (key, value) = (onnx_name, operator)
        self.operators = {}

        # Additional options given to converters.
        self.options = options

        # Registered models
        self.registered_models = registered_models
        self.naming = naming

        if naming is None:
            self._naming = Topology._generate_unique_name
        elif isinstance(naming, str):
            self._naming = lambda seed, names: Topology._generate_unique_name(
                self.naming + seed, names
            )
        elif callable(self.naming):
            self._naming = lambda seed, names: Topology._generate_unique_name(
                self.naming(seed, names), names
            )
        else:
            raise TypeError("Unexpected type for parameter naming: %r." % type(naming))

    def get(self, var_name, default_value):
        "Returns variable with 'name' or default value is not found."
        return self.variables.get(var_name, default_value)

    def has_variable_name(self, name):
        """
        Tells if a variable is already registered.
        """
        return name in self.onnx_variable_names

    def get_shape_calculator(self, model_type):
        """
        Returns the shape calculator for the given model type.

        :param model_type: model type such as *LogisticRegression*
        :return: alias or None if not found
        """
        return self.custom_shape_calculators.get(model_type, None)


[docs]
    def get_unique_variable_name(self, seed, rename=True):
        """
        Creates a unique variable ID based on the given seed.
        """
        if not isinstance(seed, str):
            raise TypeError(
                "Parameter seed must be a string not {}.".format(type(seed))
            )
        if rename:
            name = self._naming(seed, self.onnx_variable_names)
        else:
            name = Topology._generate_unique_name(seed, self.onnx_variable_names)
        return name



[docs]
    def get_unique_operator_name(self, seed):
        """
        Creates a unique operator ID based on the given seed.
        """
        return self._naming(seed, self.onnx_operator_names)


    def declare_local_variable(
        self, raw_name, type=None, prepend=False, missing_type=False, rename=True
    ):
        """
        This function may create a new variable in this scope. If
        *raw_name* has been used to create other variables, the new
        variable will hide all other variables created using *raw_name*.
        """
        if type is None and not missing_type:
            raise RuntimeError("Unknown type for %r (type=%r)." % (raw_name, type))
        # Get unique ID for the new variable
        onnx_name = self.get_unique_variable_name(raw_name, rename=rename)

        # Create the variable
        variable = Variable(raw_name, onnx_name, self.name, type)
        self.register_variable(variable, prepend=prepend)
        return variable

    def register_variable(self, var, prepend=False):
        "Adds a variable to the scope."
        if var.onnx_name in self.variables:
            raise RuntimeError(
                "Variable %r already registered (other=%r)."
                % (var, self.variables[var.onnx_name])
            )

        if var.raw_name in self.variable_name_mapping:
            # Hide existing variables with the same raw_name
            if not prepend:
                self.variable_name_mapping[var.raw_name].append(var.onnx_name)
            else:
                self.variable_name_mapping[var.raw_name].insert(0, var.onnx_name)
        else:
            self.variable_name_mapping[var.raw_name] = [var.onnx_name]

        self.variables[var.onnx_name] = var

    def declare_existing_subgraph_name(self, graph_proto):
        """
        Declare all name from a subgraph in order to avoid being picked twice.
        """
        output_name = {o.name for o in graph_proto.output}
        for node in graph_proto.node:
            for name in node.output:
                if name in output_name:
                    continue
                if self.has_variable_name(name):
                    raise NameError(
                        "Result name %r is already taken (outputs=%r) "
                        "(node=%r)." % (name, output_name, node)
                    )
                self.onnx_variable_names.add(name)
            if node.name in self.onnx_operator_names:
                raise NameError(
                    "Operator name %r is already taken "
                    "(node=%r)." % (node.name, node)
                )
            self.onnx_operator_names.add(node.name)

    def rename_onnx_name(self, old_name, new_name):
        if new_name in self.variables:
            raise RuntimeError(
                "Name %r already in variables (%r)."
                % (new_name, self.variables[new_name])
            )
        if old_name not in self.variables:
            raise RuntimeError("Unable to find name %r in variables." % old_name)
        logger.debug("[Scope] update onnx_name, from %r to %r", old_name, new_name)
        self.variables[new_name] = self.variables[old_name]
        del self.variables[old_name]

    def declare_local_input(self, raw_name, type=None, prepend=False, rename=True):
        """
        Calls `declare_local_variable`. Registers this variable
        as an input.
        """
        var = self.declare_local_variable(
            raw_name, type=type, prepend=prepend, rename=rename
        )
        self.input_variables.append(var)
        return var

    def declare_local_output(
        self, raw_name, type=None, prepend=False, missing_type=False
    ):
        """
        Calls `declare_local_variable`. Registers this variable
        as an output.
        """
        var = self.declare_local_variable(
            raw_name, type=type, prepend=prepend, missing_type=missing_type
        )
        self.output_variables.append(var)
        return var

    def declare_local_operator(self, type, raw_model=None):
        """
        This function is used to declare new local operator.
        """
        onnx_name = self.get_unique_operator_name(str(type))
        operator = Operator(
            onnx_name, self.name, type, raw_model, self.target_opset, scope_inst=self
        )
        self.operators[onnx_name] = operator
        return operator

    def _get_allowed_options(self, model, fail=True):
        if self.registered_models is not None:
            if type(model) not in self.registered_models["aliases"]:
                if fail:
                    raise NotImplementedError(
                        "No registered models, no known allowed options "
                        "for model '{}'.".format(model.__class__.__name__)
                    )
                return {}
            alias = self.registered_models["aliases"][type(model)]
            conv = self.registered_models["conv"][alias]
            allowed = conv.get_allowed_options()
            return allowed
        raise NotImplementedError(
            "No registered models, no known allowed options "
            "for model '{}'.".format(model.__class__.__name__)
        )

    def add_options(self, model_id, options):
        """
        Adds an option, for example,
        ``add_options(id(clr), {'raw_scores': True})``
        tells the converter associated to ``clr`` to
        use raw score instead of probabilities.

        :param model_id: class or ``id(instance)``
        :param options: dictionary with the new values
        """
        if options is None:
            return
        if self.options is None:
            self.options = {}
        if model_id not in self.options:
            self.options[model_id] = None
        if self.options[model_id] is None:
            self.options[model_id] = {}
        self.options[model_id].update(options)

    def get_options(self, model, default_values=None, fail=True):
        """
        Returns additional options for a model.
        It first looks by class then by id (``id(model)``).
        :param model: model being converted
        :param default_values: default options (it is modified by
                               the function)
        :param fail: fails if option it not found
        :return: dictionary
        """
        return _build_options(
            model,
            self.options,
            default_values,
            self._get_allowed_options(model, fail=fail),
            fail=fail,
        )

    def replace_raw_operator(self, op1, op2, alias):
        """
        Replaces every raw operator op1 by op2.
        The function uses `id()` to detect op1.
        """
        for v in self.operators.values():
            if id(v.raw_operator) == id(op1):
                logger.debug(
                    "[Scope] replace %d by %d in %r.", id(v.raw_operator), id(op1), v
                )
                v.raw_operator = op2
                v.type = alias




[docs]
class Topology:
    """
    Holds instances on :class:`Scope <skl2onnx.common._topology.Scope>` and
    :class:`SklearnModelContainer
    <skl2onnx.common._container.SklearnModelContainer>`.
    These are filled by the converters while a pipeline is being converted.
    """

    def __init__(
        self,
        model,
        default_batch_size=1,
        initial_types=None,
        target_opset=None,
        custom_conversion_functions=None,
        custom_shape_calculators=None,
        registered_models=None,
    ):
        """
        Initializes a *Topology* object, which is an intermediate
        representation of a computational graph.

        :param model: RawModelContainer object or one of its derived
                      classes. It contains the original model.
        :param default_batch_size: batch_size prepend to scalar and
                                   array types from CoreML. It's usually
                                   1 or None.
        :param initial_types: A list providing some types for some
            root variables.
            Each element is a tuple of a variable name and a type defined
            in *data_types.py*.
        :param custom_conversion_functions: a dictionary for specifying
                                the user customized conversion function
        :param custom_shape_calculators: a dictionary for specifying the
                                        user customized shape calculator
        :param registered_models: registered models
        """
        self.scopes = []
        self.raw_model = model
        self.scope_names = set()
        self.initial_types = initial_types if initial_types else list()
        self.default_batch_size = default_batch_size
        self.target_opset = target_opset
        self.custom_conversion_functions = (
            custom_conversion_functions if custom_conversion_functions else {}
        )
        self.custom_shape_calculators = (
            custom_shape_calculators if custom_shape_calculators else {}
        )

        for k in self.custom_conversion_functions:
            if not callable(k):
                raise TypeError(
                    "Keys in custom_conversion_functions must be types not strings."
                )
        for k in self.custom_shape_calculators:
            if not callable(k):
                raise TypeError(
                    "Keys in custom_shape_calculators must be types not strings."
                )

        # A map of local overwritten model aliases.
        self.model_aliases = {}
        all_model_types = set(self.custom_conversion_functions) | set(
            self.custom_shape_calculators
        )
        for mtype in all_model_types:
            alias = "{}_{}".format(mtype.__name__, id(self))
            self.model_aliases[mtype] = alias

        # Registered models
        if registered_models is None:
            raise AssertionError()
        self.registered_models = registered_models

    @property
    def scope(self):
        if len(self.scopes) != 1:
            raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes))
        return self.scopes[0]

    @staticmethod
    def _generate_unique_name(seed, existing_names):
        """
        Produce an unique string based on the seed
        :param seed: a string
        :param existing_names: a set containing strings which cannot be
                               produced
        :return: a string similar to the seed
        """
        if seed == "":
            raise ValueError("Name seed must be a non-empty string.")

        # Make the seed meet C-style naming convention
        # Only alphabets and numbers are allowed
        seed = re.sub("[^\\w+]", "_", seed)
        # The first symbol cannot be a number
        if re.match("^[0-9]", seed):
            seed = "_" + seed

        # If seed has never been seen, we return it as it is. Otherwise,
        # we will append an number to make it unique.
        if seed not in existing_names:
            existing_names.add(seed)
            return seed
        else:
            i = 1
            while seed + str(i) in existing_names:
                i += 1
            new_name = seed + str(i)
            existing_names.add(new_name)
            return new_name

    def get_unique_scope_name(self, seed):
        return Topology._generate_unique_name(seed, self.scope_names)


[docs]
    def declare_scope(self, seed, parent_scopes=None, options=None, naming=None):
        """
        Creates a new :class:`Scope <skl2onnx.common._topology.Scope>`
        and appends it to the list of existing scopes.
        """
        if len(self.scopes) != 0:
            raise RuntimeError("Only one scope can be created.")
        scope = Scope(
            self.get_unique_scope_name(seed),
            target_opset=self.target_opset,
            custom_shape_calculators=self.custom_shape_calculators,
            options=options,
            registered_models=self.registered_models,
            naming=naming,
        )

        # Declare input variables.
        # They should be the inputs of the scikit-learn
        # model you want to convert into ONNX.
        for var_name, initial_type in self.initial_types:
            scope.declare_local_input(var_name, initial_type, rename=False)
        self.scopes.append(scope)
        return scope


    def unordered_operator_iterator(self):
        for scope in self.scopes:
            for operator in scope.operators.values():
                yield operator

    def unordered_variable_iterator(self):
        for scope in self.scopes:
            for variable in scope.variables.values():
                yield variable


[docs]
    def call_converter(self, operator, container, verbose=0):
        "Calls converter for operator *operator*."
        mtype = type(operator.raw_operator)
        if mtype in self.custom_conversion_functions:
            conv = self.custom_conversion_functions[mtype]
        elif operator.type in self.custom_conversion_functions:
            conv = self.custom_conversion_functions[operator.type]
        elif hasattr(operator.raw_operator, "onnx_converter"):
            conv = operator.raw_operator.onnx_converter()
        else:
            # Convert the selected operator into some ONNX objects and
            # save them into the container
            try:
                conv = _registration.get_converter(operator.type)
            except ValueError:
                raise MissingConverter(  # noqa: B904
                    "Unable to find converter for alias '{}' type "
                    "'{}'. You may raise an issue at "
                    "https://github.com/onnx/sklearn-onnx/issues."
                    "".format(operator.type, type(getattr(operator, "raw_model", None)))
                )

        container.validate_options(operator)
        if verbose > 0:
            print("[call_converter] call converter for %r." % operator.type)
        logger.debug(
            "[Conv] call %r fed %r - %r",
            operator,
            "".join(str(i.is_fed) for i in operator.inputs),
            "".join(str(i.is_fed) for i in operator.outputs),
        )
        conv(self.scopes[0], operator, container)
        logger.debug("[Conv] end - %r", operator)



[docs]
    def call_shape_calculator(self, operator):
        "Calls shape_calculator for operator *operator*."
        mtype = type(operator.raw_operator)
        if mtype in self.custom_shape_calculators:
            # overwritten operator.
            source = "custom"
            shape_calc = self.custom_shape_calculators[mtype]
        elif operator.type in self.custom_shape_calculators:
            source = "custom"
            shape_calc = self.custom_shape_calculators[operator.type]
        elif hasattr(operator.raw_operator, "onnx_shape_calculator"):
            source = "onnx_shape_calculator"
            shape_calc = operator.raw_operator.onnx_shape_calculator()
        else:
            source = ""
            shape_calc = None

        if shape_calc is not None:
            logger.debug(
                "[Shape1] %r fed %r - %r (source=%r)",
                operator,
                ",".join(str(i.is_fed) for i in operator.inputs),
                ",".join(str(i.is_fed) for i in operator.outputs),
                source,
            )
            shape_calc(operator)
        else:
            logger.debug("[Shape2] call infer_types for %r", operator)
            operator.infer_types()


    def _initialize_graph_status_for_traversing(self):
        """
        Initialize the status of all variables and operators before
        traversing the graph. Only used by convert_operators.
        """
        if len(self.scopes) != 1:
            raise RuntimeError("Only one scope is allowed not %d." % len(self.scopes))
        input_names = {v.onnx_name for v in self.scopes[0].input_variables}
        if len(input_names) == 0:
            raise RuntimeError("No detected inputs.")
        for variable in self.unordered_variable_iterator():
            is_input = variable.onnx_name in input_names
            variable.init_status(is_fed=is_input)

        for operator in self.unordered_operator_iterator():
            operator.init_status(is_evaluated=False)

    def _propagate_status(self, operator, container, fed_variables, verbose=0):
        """
        Propagates status *is_fed* based on output variable
        and node added in the container.
        """
        if verbose > 1:
            print("[_propagate_status] after op=%r" % operator)
        vars = {}
        for node in container.nodes:
            for i in node.input:
                if i not in vars:
                    vars[i] = []
                vars[i].append(node)

        if verbose > 1:
            print(
                "[_propagate_status] newly fed=%r"
                % [v.onnx_name for v in operator.outputs if v.is_fed]
            )
        stack = list(fed_variables)
        scope = self.scopes[0]
        while len(stack) > 0:
            nodes = {}
            for name in stack:
                if name not in vars:
                    continue
                for n in vars[name]:
                    nodes[id(n)] = n
            stack = []
            for node in nodes.values():
                if all(fed_variables.get(n, False) for n in node.input):
                    for o in node.output:
                        if o not in fed_variables:
                            if verbose > 1:
                                print("[_propagate_status] add=%r" % o)
                            fed_variables[o] = o
                            stack.append(o)
                            if o in scope.variables:
                                var = scope.variables[o]
                                var.init_status(is_fed=True)
                                if verbose > 1:
                                    print("[_propagate_status] fed=%r" % var)


[docs]
    def convert_operators(self, container=None, verbose=0):
        """
        Calls all converters and shape_calculator for existing
        operators. It also processes new operators created by
        converters.
        """

        def _check_operator_(operator):
            if not isinstance(operator.inputs, Operator.OperatorList):
                raise TypeError(
                    "operator.inputs must be a Operator.OperatorList "
                    "not %r." % type(operator.inputs)
                )
            if not isinstance(operator.outputs, Operator.OperatorList):
                raise TypeError(
                    "operator.outputs must be a Operator.OperatorList "
                    "not %r." % type(operator.outputs)
                )
            if any(not isinstance(i, Variable) for i in operator.inputs):
                raise TypeError(
                    "One input is not a Variable for operator %r - %r."
                    "" % (type(operator.raw_operator), operator)
                )
            if any(not isinstance(i, Variable) for i in operator.outputs):
                raise TypeError(
                    "One output is not a Variable for operator %r - %r."
                    "" % (type(operator.raw_operator), operator)
                )

        def _check_variable_in_(variable, operator):
            idop = id(operator)
            ids = {id(op) for op in variable.operators_inputs_}
            if idop not in ids:
                raise RuntimeError(
                    "Operator %r not registered in the list of operators "
                    "of %r taking it as an input [\n%s]."
                    % (
                        operator,
                        variable,
                        "\n".join(map(str, variable.operators_inputs_)),
                    )
                )

        def _check_variable_out_(variable, operator):
            if variable.is_fed:
                add = ["", "--DEBUG-INFO--"]
                for scope in self.scopes:
                    add.append("---")
                    add.append(pprint.pformat(scope.variable_name_mapping))
                    add.append("---")
                    for var in scope.variables.values():
                        add.append(
                            "   is_fed=%s %s - n_in=%d n_out=%d"
                            % (
                                getattr(var, "is_fed", "?"),
                                var,
                                len(var.operators_inputs_),
                                len(var.operators_outputs_),
                            )
                        )
                    add.append("---")
                    for op in scope.operators.values():
                        add.append(
                            "   is_evaluated=%s %s"
                            % (getattr(op, "is_evaluated", "?"), op)
                        )
                add.append("---")
                for v in operator.inputs:
                    add.append(" inputs={}".format(v))
                for v in operator.outputs:
                    add.append(" outputs={}".format(v))
                add.append("--- operator producing this variable--")
                for op in variable.operators_outputs_:
                    add.append(str(op))
                raise RuntimeError(
                    "A variable is already assigned ({}) "
                    "for operator '{}' (name='{}'). "
                    "operator.is_evaluated={}, inputs.is_fed={}, "
                    "outputs.is_fed={}. "
                    "This may still happen if a converter is a "
                    "combination of sub-estimators and one "
                    "of them is producing this output. "
                    "In that case, an identity node must be "
                    "added.{}".format(
                        variable,
                        operator.type,
                        operator.onnx_name,
                        operator.is_evaluated,
                        [v.is_fed for v in operator.inputs],
                        [v.is_fed for v in operator.outputs],
                        "\n".join(add),
                    )
                )

        if verbose > 0:
            print("[convert_operators] begin")
        self._initialize_graph_status_for_traversing()
        fed_variables = {i.name: i for i in container.initializers}
        changes = 1
        n_iter = 0
        while changes > 0:
            n_iter += 1
            changes = 0
            ops = list(self.unordered_operator_iterator())
            if verbose > 0:
                print(
                    "[convert_operators] iteration %d - n_vars=%d "
                    "n_ops=%d" % (n_iter, len(fed_variables), len(ops))
                )
            for operator in ops:
                _check_operator_(operator)
                for var in operator.inputs:
                    if var.is_fed:
                        fed_variables[var.onnx_name] = var
                if (
                    all(variable.is_fed for variable in operator.inputs)
                    and not operator.is_evaluated
                ):
                    for variable in operator.inputs:
                        _check_variable_in_(variable, operator)
                    for variable in operator.outputs:
                        _check_variable_out_(variable, operator)

                    self.call_shape_calculator(operator)
                    self.call_converter(operator, container, verbose=verbose)

                    # If an operator contains a sequence of operators,
                    # output variables are not necessarily known at this stage.
                    operator.init_status(is_evaluated=True)
                    for variable in operator.outputs:
                        if all(op.is_evaluated for op in variable.operators_outputs_):
                            variable.init_status(is_fed=True)
                            fed_variables[variable.onnx_name] = variable
                    fed_variables.update(
                        {
                            i.name: i
                            for i in container.initializers
                            if i.name not in fed_variables
                        }
                    )
                    self._propagate_status(
                        operator, container, fed_variables, verbose=verbose
                    )

                    # unfed some variables (it happens when a node
                    # shares an output with another node)
                    rem = []
                    for n, var in fed_variables.items():
                        if not hasattr(var, "operators_outputs_"):
                            # initializer
                            continue
                        if any(not o.is_evaluated for o in var.operators_outputs_):
                            rem.append(n)
                    for r in rem:
                        v = fed_variables[r]
                        v.init_status(is_fed=False)
                        del fed_variables[v.onnx_name]
                    changes += 1

            if verbose > 0:
                print(
                    "[convert_operators] end iter: %d - n_vars=%d"
                    % (n_iter, len(fed_variables))
                )
        if verbose > 0:
            print("[convert_operators] end.")

        # Last verification.
        not_evaluated = []
        for op in self.unordered_operator_iterator():
            if not op.is_evaluated:
                not_evaluated.append(op)
        if len(not_evaluated) > 0:
            rows = ["---VARS---"]
            for var in self.unordered_variable_iterator():
                rows.append(
                    "is_fed=%r is_leaf=%r is_root=%r - %r - n_in=%d n_out=%d"
                    ""
                    % (
                        var.is_fed,
                        var.is_leaf,
                        var.is_root,
                        var,
                        len(var.operators_inputs_),
                        len(var.operators_outputs_),
                    )
                )
            rows.append("---OPERATORS---")
            for op in self.unordered_operator_iterator():
                rows.append("is_eval=%r - %r" % (op.is_evaluated, op))
            rows.append("---NODES---")
            for node in container.nodes:
                rows.append("%s: %r -> %r" % (node.op_type, node.input, node.output))
            raise RuntimeError(
                "Not all operators have been evaluated. A variable name "
                "is probably misspelled.\n%s"
                "" % "\n".join(rows)
            )

        # Input and output
        if len(self.scopes[0].input_variables) > 0:
            inputs = self.scopes[0].input_variables
        else:
            inputs = [v for v in self.unordered_variable_iterator() if v.is_root]
        for i in inputs:
            container.add_input(i)
        outputs = [v for v in self.unordered_variable_iterator() if v.is_leaf]

        # The function checks that for output variable,
        # raw_name equal onnx_name. It swaps names if it is not the case.
        to_swap = []
        for out in outputs:
            if out.raw_name != out.onnx_name:
                to_swap.append(out)
        if len(to_swap) != 0:
            swaped = set()
            for var in to_swap:
                if var.raw_name in swaped:
                    continue
                swaped.add(var.raw_name)
                if verbose > 1:
                    print(
                        "[convert_operators] %r <-> %r." % (var.raw_name, var.onnx_name)
                    )
                old_name = var.onnx_name
                new_name = var.raw_name

                try:
                    container.swap_names(old_name, new_name)
                except NotImplementedError as e:
                    logger.debug(
                        "[Topo] unable to swap %r and %r (%r).", old_name, new_name, e
                    )
                    continue

                for v in self.unordered_variable_iterator():
                    if v.onnx_name == old_name:
                        v.set_onnx_name(new_name)
                    elif v.onnx_name == new_name:
                        v.set_onnx_name(old_name)

        for o in outputs:
            container.add_output(o)




def make_model_from_container(
    container: ModelComponentContainer,
    remove_identity=True,
    doc_string="",
    verbose: int = 0,
    onnx_target_opset=None,
    model_name="",
    as_function=False,
    domain_name="",
):
    container.ensure_topological_order()

    if len(container.inputs) == 0:
        raise RuntimeError("No detected inputs after conversion.")
    if len(container.outputs) == 0:
        raise RuntimeError("No detected outputs after conversion.")
    if verbose >= 2:
        print("---NODES---")
        for node in container.nodes:
            print(
                "  %s - %s: %r -> %r"
                % (node.op_type, node.name, node.input, node.output)
            )

    # Create a graph from its main components
    if container.target_opset_onnx < 9:
        assert not as_function, "Functions not supported for opset < 9."
        # When calling ModelComponentContainer's add_initializer(...),
        # nothing is added into the input list. However, for ONNX target
        # opset < 9, initializers should also be a part of model's
        # (GraphProto) inputs. Thus, we create ValueInfoProto objects
        # from initializers (type: TensorProto) directly and then add
        # them into model's input list.
        extra_inputs = []  # ValueInfoProto list of the initializers
        for tensor in container.initializers:
            # Sometimes (especially when creating optional input values
            # such as RNN's initial hidden state), an initializer is also
            # one of the original model's input, so it has been added into
            # the container's input list. If this is the case, we need to
            # skip one iteration to avoid duplicated inputs.
            if tensor.name in [value_info.name for value_info in container.inputs]:
                continue

            # Initializers are always tensors so we can just call
            # make_tensor_value_info(...).
            value_info = make_tensor_value_info(
                tensor.name, tensor.data_type, tensor.dims
            )
            extra_inputs.append(value_info)

        # Before ONNX opset 9, initializers were needed to be passed in
        # with inputs.
        graph = make_graph(
            container.nodes,
            model_name,
            container.inputs + extra_inputs,
            container.outputs,
            container.initializers,
        )
    else:
        # In ONNX opset 9 and above, initializers are included as
        # operator inputs and therefore do not need to be passed as
        # extra_inputs.
        if as_function:
            graph = make_function(
                domain_name,
                model_name,
                [i.name for i in container.inputs],
                [i.name for i in container.outputs],
                [
                    *[
                        make_node("Constant", [], [init.name], value=init)
                        for init in container.initializers
                    ],
                    *container.nodes,
                ],
                [],  # opsets
            )
        else:
            graph = make_graph(
                container.nodes,
                model_name,
                container.inputs,
                container.outputs,
                container.initializers,
            )

    # Create model
    if as_function:
        onnx_model = graph
    else:
        # Add extra information related to the graph
        graph.value_info.extend(container.value_info)
        onnx_model = make_model(graph)

    # Update domain version
    opv = min(
        onnx_target_opset, _get_main_opset_version(onnx_model) or onnx_target_opset
    )
    if not _update_domain_version(container, onnx_model, verbose=verbose):
        # Main opset was not added. Doing it here.
        op_set = onnx_model.opset_import.add()
        op_set.domain = ""
        op_set.version = opv
        if verbose > 0:
            print("[convert_topology] +opset: name=%r, version=%s" % ("", opv))

    # Add extra information
    if not as_function:
        irv = OPSET_TO_IR_VERSION.get(opv, onnx_proto.IR_VERSION)
        onnx_model.ir_version = irv
        onnx_model.producer_name = utils.get_producer()
        onnx_model.producer_version = utils.get_producer_version()
        onnx_model.domain = utils.get_domain()
        onnx_model.model_version = utils.get_model_version()
    onnx_model.doc_string = doc_string

    # Removes many identity nodes,
    # the converter may introduct identity nodes
    # after a zipmap operator and onnx <= 1.7 does not
    # support that.
    if remove_identity:
        onnx_model = onnx_remove_node_identity(onnx_model)

    # process functions
    for key, cont in container.local_functions.items():
        domain, name = key
        proto = make_model_from_container(
            cont,
            remove_identity=remove_identity,
            verbose=verbose,
            as_function=True,
            model_name=name,
            domain_name=domain,
            onnx_target_opset=container.target_opset_onnx,
        )
        onnx_model.functions.append(proto)

    return onnx_model



[docs]
def convert_topology(
    topology,
    model_name,
    doc_string,
    target_opset,
    options=None,
    remove_identity=True,
    verbose=0,
):
    """
    This function is used to convert our Topology object defined in
    _parser.py into a ONNX model (type: ModelProto).

    :param topology: The Topology object we are going to convert
    :param model_name: GraphProto's name. Let "model" denote the
        returned model. The string "model_name" would be
        assigned to "model.graph.name."
    :param doc_string: A string attached to the produced model
    :param target_opset: number or dictionary,
        for example, 7 for ONNX 1.2, and 8 for ONNX 1.3,
        a dictionary is used to indicate different opset for
        different domains
    :param options: see :ref:`l-conv-options`
    :param remove_identity: removes identity nodes
        include '1.1.2', '1.2', and so on.
    :param verbose: displays information while converting
    :return: a ONNX ModelProto
    """
    if target_opset is None:
        target_opset = get_latest_tested_opset_version()
    if isinstance(target_opset, dict):
        onnx_target_opset = target_opset.get("", get_latest_tested_opset_version())
    else:
        onnx_target_opset = target_opset
    if onnx_target_opset > get_opset_number_from_onnx():
        found = get_opset_number_from_onnx()
        raise RuntimeError(
            "Parameter target_opset {} > {} is higher than the "
            "version of the installed onnx package. See "
            "https://github.com/onnx/onnx/blob/main/docs/"
            "Versioning.md#released-versions"
            ".".format(onnx_target_opset, found)
        )
    if onnx_target_opset > get_latest_tested_opset_version():
        warnings.warn(
            "Parameter target_opset {} > {} is higher than the "
            "the latest tested version"
            ".".format(onnx_target_opset, get_latest_tested_opset_version()),
            stacklevel=0,
        )

    container = ModelComponentContainer(
        target_opset,
        options=options,
        registered_models=topology.registered_models,
        white_op=topology.raw_model._white_op,
        black_op=topology.raw_model._black_op,
        verbose=verbose,
    )
    # Traverse the graph from roots to leaves
    # This loop could eventually be parallelized.
    topology.convert_operators(container=container, verbose=verbose)
    return make_model_from_container(
        container,
        remove_identity=remove_identity,
        doc_string=doc_string,
        verbose=verbose,
        onnx_target_opset=onnx_target_opset,
        model_name=model_name,
    )



def _update_domain_version(container, onnx_model, verbose=0):
    # Merge operator sets for the same domain, the largest version
    # number would be kept
    purified_operator_set = dict()
    for op_domain, op_version in container.node_domain_version_pair_sets:
        if op_domain not in purified_operator_set:
            purified_operator_set[op_domain] = op_version
        else:
            purified_operator_set[op_domain] = max(
                purified_operator_set[op_domain], op_version
            )

    # Fill operator sets
    done = set()
    i = 0
    for op_domain, op_version in purified_operator_set.items():
        if op_version is None:
            continue
        done.add(op_domain)
        if i == 0 and len(onnx_model.opset_import) == 1:
            # Overwrite the default operator set created by
            # make_model(...)
            op_set = onnx_model.opset_import[0]
        else:
            # Just create one ONNX element in opset_import
            op_set = onnx_model.opset_import.add()
        if verbose > 0:
            print(
                "[_update_domain_version] +opset %d: name=%r, version=%s"
                % (i, op_domain, op_version)
            )
        op_set.domain = op_domain
        if op_set != "":
            max_supported = get_default_opset_for_domain(op_domain)
            if max_supported is not None and max_supported < op_version:
                raise RuntimeError(
                    "The model is using version %d of domain %r not supported "
                    "yet by this library. You need to specify "
                    "target_opset={%r: %r}."
                    % (op_version, op_domain, op_domain, max_supported)
                )
        op_set.version = op_version

        i += 1
        if container.target_opset_any_domain(op_domain) < op_version:
            raise RuntimeError(
                "The specified opset %d is too low to convert "
                "this model, which requires at least opset "
                "%d." % (container.target_opset_any_domain(op_domain), op_version)
            )
    for k, v in container.target_opset_all.items():
        if k in done:
            continue
        # Some opsets may be forgotten if they appear in subgraphs.
        op_set = onnx_model.opset_import.add()
        op_set.domain = k
        op_set.version = v

    return "" in purified_operator_set


def _get_main_opset_version(model):
    """
    Returns the main opset version.
    """
    mld = None
    for op in model.opset_import:
        if op.domain == "":
            return op.version
        if op.domain == "ai.onnx.ml":
            mld = op.version
    if mld is not None:
        return OPSET_ML_TO_OPSET.get(mld, None)
    return None