From 6eb6adff2f1a2fe988e79c53e622b18f9ff7ab26 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Fri, 2 Feb 2024 17:22:54 +0100 Subject: [PATCH 01/13] update requirements --- azure-pipelines.yml | 4 ++-- requirements-dev.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 907bb9f..61587f4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -4,8 +4,8 @@ jobs: vmImage: 'ubuntu-latest' strategy: matrix: - Python311-Linux: - python.version: '3.11' + Python312-Linux: + python.version: '3.12' maxParallel: 3 steps: diff --git a/requirements-dev.txt b/requirements-dev.txt index 5804529..5e262e3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -11,7 +11,7 @@ lightgbm matplotlib ml-dtypes git+https://github.com/onnx/onnxmltools.git -onnxruntime>=1.16.1 +onnxruntime>=1.17.0 openpyxl packaging pandas From d520cfa8f996aaf5c7224efd00dd0cd76e9775f6 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 13:03:25 +0100 Subject: [PATCH 02/13] Add class to yield results --- _doc/api/reference.rst | 10 ++ .../ut_reference/test_evaluator_yield.py | 77 +++++++++++++++ onnx_array_api/reference/__init__.py | 1 + onnx_array_api/reference/evaluator_yield.py | 98 +++++++++++++++++++ 4 files changed, 186 insertions(+) create mode 100644 _unittests/ut_reference/test_evaluator_yield.py create mode 100644 onnx_array_api/reference/evaluator_yield.py diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst index acbf90a..733a7fd 100644 --- a/_doc/api/reference.rst +++ b/_doc/api/reference.rst @@ -5,3 +5,13 @@ ExtendedReferenceEvaluator ++++++++++++++++++++++++++ .. autoclass:: onnx_array_api.reference.ExtendedReferenceEvaluator + :members: + +YieldEvaluator +++++++++++++++ + +.. autoclass:: onnx_array_api.reference.ResultType + :members: + +.. autoclass:: onnx_array_api.reference.YieldEvaluator + :members: diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py new file mode 100644 index 0000000..667ba01 --- /dev/null +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -0,0 +1,77 @@ +import unittest +import numpy as np +from onnx import TensorProto +from onnx.helper import ( + make_function, + make_graph, + make_model, + make_node, + make_opsetid, + make_tensor_value_info, +) +from onnx_array_api.ext_test_case import ExtTestCase +from onnx_array_api.reference import YieldEvaluator, ResultType + + +class TestArrayTensor(ExtTestCase): + def test_evaluator_yield(self): + new_domain = "custom_domain" + opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)] + + node1 = make_node("MatMul", ["X", "A"], ["XA"]) + node2 = make_node("Add", ["XA", "B"], ["Y"]) + + linear_regression = make_function( + new_domain, + "LinearRegression", + ["X", "A", "B"], + ["Y"], + [node1, node2], + opset_imports, + [], + ) + + X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None]) + A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None]) + B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None]) + Y = make_tensor_value_info("Y", TensorProto.FLOAT, None) + + graph = make_graph( + [ + make_node( + "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain + ), + make_node("Abs", ["Y1"], ["Y"]), + ], + "example", + [X, A, B], + [Y], + ) + + onnx_model = make_model( + graph, opset_imports=opset_imports, functions=[linear_regression] + ) + + cst = np.arange(4).reshape((-1, 2)).astype(np.float32) + yield_eval = YieldEvaluator(onnx_model) + results = list( + yield_eval.enumerate_results(None, {"A": cst, "B": cst, "X": cst}) + ) + expected = [ + (ResultType.INPUT, "A", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), + (ResultType.INPUT, "B", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), + (ResultType.INPUT, "X", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), + (ResultType.RESULT, "Y1", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), + (ResultType.RESULT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), + (ResultType.OUTPUT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), + ] + self.assertEqual(len(expected), len(results)) + for a, b in zip(expected, results): + self.assertEqual(len(a), len(b)) + self.assertEqual(a[0], b[0]) + self.assertEqual(a[1], b[1]) + self.assertEqual(a[2].tolist(), b[2].tolist()) + + +if __name__ == "__main__": + unittest.main(verbosity=2) diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py index d8c5aa5..8f956ab 100644 --- a/onnx_array_api/reference/__init__.py +++ b/onnx_array_api/reference/__init__.py @@ -11,6 +11,7 @@ ) from onnx.reference.op_run import to_array_extended from .evaluator import ExtendedReferenceEvaluator +from .evaluator_yield import YieldEvaluator, ResultType def from_array_extended(tensor: np.array, name: Optional[str] = None) -> TensorProto: diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py new file mode 100644 index 0000000..d10ccbc --- /dev/null +++ b/onnx_array_api/reference/evaluator_yield.py @@ -0,0 +1,98 @@ +from typing import Any, Dict, List, Iterator, Optional, Tuple +from enum import IntEnum +from onnx import ModelProto +from .evaluator import ExtendedReferenceEvaluator + + +class ResultType(IntEnum): + RESULT = 1 + INITIALIZER = 2 + SPARSE_INITIALIZER = 4 + INPUT = 8 + OUTPUT = 16 + + def __repr__(self): + return f"{self.__class__.__name__}.{self._name_}" + + +class YieldEvaluator: + """ + This class implements method `enumerate_results` which iterates on + intermediates results. By default, it uses + :class:`onnx_array_api.evaluator.ExtendedReferenceEvaluator`. + + :param onnx_model: model to run + :param recursive: dig into subgraph and functions as well + """ + + def __init__( + self, + onnx_model: ModelProto, + recursive: bool = False, + cls=ExtendedReferenceEvaluator, + ): + assert not recursive, "recursive=True is not yet implemented" + self.onnx_model = onnx_model + self.evaluator = cls(onnx_model) if cls is not None else None + + def enumerate_results( + self, + output_names: Optional[List[str]] = None, + feed_inputs: Optional[Dict[str, Any]] = None, + ) -> Iterator[Tuple[ResultType, str, Any]]: + """ + Executes the onnx model. + + Args: + output_names: requested outputs by names, None for all + feed_inputs: dictionary `{ input name: input value }` + + Returns: + iterator on tuple(result kind, name, value) + """ + assert isinstance(self.evaluator, ExtendedReferenceEvaluator), ( + f"This implementation only works with " + f"ExtendedReferenceEvaluator not {type(self.evaluator)}" + ) + attributes = {} + if output_names is None: + output_names = self.evaluator.output_names + + results = {"": None} + results.update(self.evaluator.rt_inits_) + results.update(feed_inputs) + # step 0: initializer + for k, v in self.evaluator.rt_inits_.items(): + yield ResultType.INITIALIZER, k, v + # step 1: inputs + for k, v in feed_inputs.items(): + yield ResultType.INPUT, k, v + + # step 2: execute nodes + for node in self.evaluator.rt_nodes_: + for i in node.input: + if i not in results: + raise RuntimeError( + f"Unable to find input {i!r} in known results {sorted(results)}, " + f"self.rt_inits_ has {sorted(self.evaluator.rt_inits_)}, " + f"feed_inputs has {sorted(feed_inputs)}." + ) + inputs = [results[i] for i in node.input] + linked_attributes = {} + if node.has_linked_attribute and attributes: + linked_attributes["linked_attributes"] = attributes + if node.need_context(): + outputs = node.run(*inputs, context=results, **linked_attributes) + else: + outputs = node.run(*inputs, **linked_attributes) + for name, value in zip(node.output, outputs): + yield ResultType.RESULT, name, value + results[name] = value + + # step 3: outputs + for name in output_names: + if name not in results: + raise RuntimeError( + f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}" + ) + yield ResultType.OUTPUT, name, results[name] From d616c58f60c38f382dfa73481310abba8c6ed8d4 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 13:04:31 +0100 Subject: [PATCH 03/13] black --- .../ut_reference/test_evaluator_yield.py | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index 667ba01..952a782 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -58,12 +58,36 @@ def test_evaluator_yield(self): yield_eval.enumerate_results(None, {"A": cst, "B": cst, "X": cst}) ) expected = [ - (ResultType.INPUT, "A", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), - (ResultType.INPUT, "B", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), - (ResultType.INPUT, "X", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32)), - (ResultType.RESULT, "Y1", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), - (ResultType.RESULT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), - (ResultType.OUTPUT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32)), + ( + ResultType.INPUT, + "A", + np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + ), + ( + ResultType.INPUT, + "B", + np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + ), + ( + ResultType.INPUT, + "X", + np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + ), + ( + ResultType.RESULT, + "Y1", + np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + ), + ( + ResultType.RESULT, + "Y", + np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + ), + ( + ResultType.OUTPUT, + "Y", + np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + ), ] self.assertEqual(len(expected), len(results)) for a, b in zip(expected, results): From fe32241f9d4b079a701a52ddc19d8621b0175f57 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 13:19:31 +0100 Subject: [PATCH 04/13] add sumarry --- .../ut_reference/test_evaluator_yield.py | 73 +++++++++++++++++++ onnx_array_api/reference/evaluator_yield.py | 59 +++++++++++++-- 2 files changed, 126 insertions(+), 6 deletions(-) diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index 952a782..daf9a93 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -62,31 +62,37 @@ def test_evaluator_yield(self): ResultType.INPUT, "A", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + None, ), ( ResultType.INPUT, "B", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + None, ), ( ResultType.INPUT, "X", np.array([[0.0, 1.0], [2.0, 3.0]], dtype=np.float32), + None, ), ( ResultType.RESULT, "Y1", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + "LinearRegression", ), ( ResultType.RESULT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + "Abs", ), ( ResultType.OUTPUT, "Y", np.array([[2.0, 4.0], [8.0, 14.0]], dtype=np.float32), + None, ), ] self.assertEqual(len(expected), len(results)) @@ -95,6 +101,73 @@ def test_evaluator_yield(self): self.assertEqual(a[0], b[0]) self.assertEqual(a[1], b[1]) self.assertEqual(a[2].tolist(), b[2].tolist()) + self.assertEqual(a[3], b[3]) + + def test_evaluator_yield_summary(self): + new_domain = "custom_domain" + opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)] + + node1 = make_node("MatMul", ["X", "A"], ["XA"]) + node2 = make_node("Add", ["XA", "B"], ["Y"]) + + linear_regression = make_function( + new_domain, + "LinearRegression", + ["X", "A", "B"], + ["Y"], + [node1, node2], + opset_imports, + [], + ) + + X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None]) + A = make_tensor_value_info("A", TensorProto.FLOAT, [None, None]) + B = make_tensor_value_info("B", TensorProto.FLOAT, [None, None]) + Y = make_tensor_value_info("Y", TensorProto.FLOAT, None) + + graph = make_graph( + [ + make_node( + "LinearRegression", ["X", "A", "B"], ["Y1"], domain=new_domain + ), + make_node("Abs", ["Y1"], ["Y"]), + ], + "example", + [X, A, B], + [Y], + ) + + onnx_model = make_model( + graph, opset_imports=opset_imports, functions=[linear_regression] + ) + + cst = np.arange(4).reshape((-1, 2)).astype(np.float32) + yield_eval = YieldEvaluator(onnx_model) + results = list( + yield_eval.enumerate_summarized(None, {"A": cst, "B": cst, "X": cst}) + ) + expected = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None), + ] + self.assertEqual(len(expected), len(results)) + for a, b in zip(expected, results): + self.assertEqual(len(a), len(b)) + self.assertEqual(a[0], b[0]) + self.assertEqual(a[1], b[1]) + self.assertEqual(a[2], b[2]) + self.assertEqual(a[3], b[3]) + self.assertEqual(a[4], b[4]) if __name__ == "__main__": diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index d10ccbc..865bd00 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -1,5 +1,6 @@ from typing import Any, Dict, List, Iterator, Optional, Tuple from enum import IntEnum +import numpy as np from onnx import ModelProto from .evaluator import ExtendedReferenceEvaluator @@ -41,14 +42,14 @@ def enumerate_results( feed_inputs: Optional[Dict[str, Any]] = None, ) -> Iterator[Tuple[ResultType, str, Any]]: """ - Executes the onnx model. + Executes the onnx model and enumerate all the intermediate results. Args: output_names: requested outputs by names, None for all feed_inputs: dictionary `{ input name: input value }` Returns: - iterator on tuple(result kind, name, value) + iterator on tuple(result kind, name, value, node.op_type or None) """ assert isinstance(self.evaluator, ExtendedReferenceEvaluator), ( f"This implementation only works with " @@ -63,10 +64,10 @@ def enumerate_results( results.update(feed_inputs) # step 0: initializer for k, v in self.evaluator.rt_inits_.items(): - yield ResultType.INITIALIZER, k, v + yield ResultType.INITIALIZER, k, v, None # step 1: inputs for k, v in feed_inputs.items(): - yield ResultType.INPUT, k, v + yield ResultType.INPUT, k, v, None # step 2: execute nodes for node in self.evaluator.rt_nodes_: @@ -86,7 +87,7 @@ def enumerate_results( else: outputs = node.run(*inputs, **linked_attributes) for name, value in zip(node.output, outputs): - yield ResultType.RESULT, name, value + yield ResultType.RESULT, name, value, node.op_type results[name] = value # step 3: outputs @@ -95,4 +96,50 @@ def enumerate_results( raise RuntimeError( f"Unable to find output name {name!r} in {sorted(results)}, proto is\n{self.proto_}" ) - yield ResultType.OUTPUT, name, results[name] + yield ResultType.OUTPUT, name, results[name], None + + def enumerate_summarized( + self, + output_names: Optional[List[str]] = None, + feed_inputs: Optional[Dict[str, Any]] = None, + ) -> Iterator[Tuple[ResultType, str, Any]]: + """ + Executes the onnx model and enumerate intermediate results without their names. + + Args: + output_names: requested outputs by names, None for all + feed_inputs: dictionary `{ input name: input value }` + + Returns: + iterator on tuple(result kind, node.type, dtype, shape, value) + """ + for kind, name, value, op_type in self.enumerate_results( + output_names, feed_inputs + ): + summary = self.make_summary(value) + yield kind, value.dtype, value.shape, summary, op_type + + def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: + """ + Create a short string summarizing the value (discretization). + + :param value: array + :param length: number of value to produce + :param module: discretization parameter + :return: short string + """ + value4 = np.zeros(4, dtype=np.float64) + if value.size <= length: + value4[: value.size] = value.flatten().astype(np.float64) + else: + if value.size % length != 2: + value2 = np.zeros( + value.size + length - value.size % length, dtype=np.float64 + ) + value2[: value.size] = value.flatten().astype(np.float64) + else: + value2 = value.flatten().astype(np.float64) + value4 = value2.reshape((4, -1)).mean(axis=1) + value4i = value4.astype(np.int64) % modulo + s = "".join([chr(65 + i) for i in value4i]) + return s From a8c2932e1c65a392fe127b168fa9e3cba27af9c1 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 14:40:32 +0100 Subject: [PATCH 05/13] add distance --- CHANGELOGS.rst | 1 + _doc/api/reference.rst | 7 + .../ut_reference/test_evaluator_yield.py | 198 +++++++++++++++++- onnx_array_api/reference/__init__.py | 2 +- onnx_array_api/reference/evaluator_yield.py | 123 ++++++++++- 5 files changed, 320 insertions(+), 11 deletions(-) diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst index dad0930..d0b6445 100644 --- a/CHANGELOGS.rst +++ b/CHANGELOGS.rst @@ -4,6 +4,7 @@ Change Logs 0.2.0 +++++ +* :pr:`71`: adds tools to compare two onnx graphs * :pr:`61`: adds function to plot onnx model as graphs * :pr:`60`: supports translation of local functions * :pr:`59`: add methods to update nodes in GraphAPI diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst index 733a7fd..a1507c8 100644 --- a/_doc/api/reference.rst +++ b/_doc/api/reference.rst @@ -15,3 +15,10 @@ YieldEvaluator .. autoclass:: onnx_array_api.reference.YieldEvaluator :members: + +DistanceExecution ++++++++++++++++++ + +.. autoclass:: onnx_array_api.reference.DistanceExecution + :members: + diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index daf9a93..60da2ee 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -10,7 +10,7 @@ make_tensor_value_info, ) from onnx_array_api.ext_test_case import ExtTestCase -from onnx_array_api.reference import YieldEvaluator, ResultType +from onnx_array_api.reference import YieldEvaluator, ResultType, DistanceExecution class TestArrayTensor(ExtTestCase): @@ -147,18 +147,19 @@ def test_evaluator_yield_summary(self): yield_eval.enumerate_summarized(None, {"A": cst, "B": cst, "X": cst}) ) expected = [ - (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), - (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), - (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), ( ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "LinearRegression", + "Y1", ), - (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"), - (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), ] self.assertEqual(len(expected), len(results)) for a, b in zip(expected, results): @@ -168,6 +169,191 @@ def test_evaluator_yield_summary(self): self.assertEqual(a[2], b[2]) self.assertEqual(a[3], b[3]) self.assertEqual(a[4], b[4]) + self.assertEqual(a[5], b[5]) + + def test_distance_pair(self): + el1 = (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None) + el2 = el1 + dc = DistanceExecution() + self.assertEqual(dc.distance_pair(el1, el2), 0) + el2 = (ResultType.INPUT, np.dtype("float16"), (2, 2), "ABCD", None) + self.assertEqual(dc.distance_pair(el1, el2), 2) + el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 2, 4), "GBCD", "Abs") + self.assertEqual(dc.distance_pair(el1, el2), 1130) + el2 = (ResultType.OUTPUT, np.dtype("float16"), (2, 3), "GBCD", "Abs") + self.assertEqual(dc.distance_pair(el1, el2), 1021) + + def test_distance_sequence_0(self): + expected = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + + dc = DistanceExecution() + d, align = dc.distance_sequence(expected, expected) + self.assertEqual(d, 0) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + + def test_distance_sequence_ins(self): + s1 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + s2 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + + dc = DistanceExecution() + d, align = dc.distance_sequence(s1, s2) + self.assertEqual(d, dc.insert_cost) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3)]) + d, align = dc.distance_sequence(s2, s1) + self.assertEqual(d, dc.insert_cost) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (3, 4)]) + + def test_distance_sequence_equal(self): + s1 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + s2 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Z"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + + dc = DistanceExecution() + d, align = dc.distance_sequence(s1, s2) + self.assertEqual(d, 0) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + + def test_distance_sequence_diff(self): + s1 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + s2 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIP", "Abs", "Z"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + + dc = DistanceExecution() + d, align = dc.distance_sequence(s1, s2) + self.assertEqual(d, 1) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + + def test_distance_sequence_diff2(self): + s1 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + s2 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"), + ] + + dc = DistanceExecution() + d, align = dc.distance_sequence(s1, s2) + self.assertEqual(d, 5) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) if __name__ == "__main__": diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py index 8f956ab..6f6fc58 100644 --- a/onnx_array_api/reference/__init__.py +++ b/onnx_array_api/reference/__init__.py @@ -11,7 +11,7 @@ ) from onnx.reference.op_run import to_array_extended from .evaluator import ExtendedReferenceEvaluator -from .evaluator_yield import YieldEvaluator, ResultType +from .evaluator_yield import DistanceExecution, ResultType, YieldEvaluator def from_array_extended(tensor: np.array, name: Optional[str] = None) -> TensorProto: diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index 865bd00..d347f39 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -16,11 +16,14 @@ def __repr__(self): return f"{self.__class__.__name__}.{self._name_}" +RESULT_TYPE = Tuple[ResultType, "np.dtype", Tuple[int, ...], str, str, str] + + class YieldEvaluator: """ This class implements method `enumerate_results` which iterates on intermediates results. By default, it uses - :class:`onnx_array_api.evaluator.ExtendedReferenceEvaluator`. + :class:`onnx_array_api.reference.ExtendedReferenceEvaluator`. :param onnx_model: model to run :param recursive: dig into subgraph and functions as well @@ -102,7 +105,7 @@ def enumerate_summarized( self, output_names: Optional[List[str]] = None, feed_inputs: Optional[Dict[str, Any]] = None, - ) -> Iterator[Tuple[ResultType, str, Any]]: + ) -> Iterator[RESULT_TYPE]: """ Executes the onnx model and enumerate intermediate results without their names. @@ -111,13 +114,13 @@ def enumerate_summarized( feed_inputs: dictionary `{ input name: input value }` Returns: - iterator on tuple(result kind, node.type, dtype, shape, value) + iterator on tuple(result kind, node.type, dtype, shape, value, result name) """ for kind, name, value, op_type in self.enumerate_results( output_names, feed_inputs ): summary = self.make_summary(value) - yield kind, value.dtype, value.shape, summary, op_type + yield kind, value.dtype, value.shape, summary, op_type, name def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: """ @@ -143,3 +146,115 @@ def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: value4i = value4.astype(np.int64) % modulo s = "".join([chr(65 + i) for i in value4i]) return s + + +class DistanceExecution: + """ + Computes a distance between two results. + """ + + float_types = { + np.float16, + np.float32, + np.float64, + np.dtype("float16"), + np.dtype("float32"), + np.dtype("float64"), + } + + def __init__(self, max_lag: int = 50): + self.kind_cost = 1000 + self.type_cost = 10 + self.rank_cost = 100 + self.op_type_cost = 10 + self.max_lag = max_lag + self.insert_cost = 1000 + + def distance_pair(self, r1: RESULT_TYPE, r2: RESULT_TYPE) -> float: + """ + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"), + + :param r1: first result + :param r2: second result + :return: distance + """ + d = 0 + if r1[0] != r2[0]: + # difference type + d += self.kind_cost + if r1[1] != r2[1]: + d += self._cost_type(r1[1], r2[1]) * self.type_cost + if r1[2] != r2[2]: + d += self._cost_shape(r1[2], r2[2]) + if r1[3] != r2[3]: + d += self._cost_summary(r1[3], r2[3]) + if r1[4] != r2[4]: + d += self.op_type_cost + return d + + def _cost_type(self, t1: "np.dtype", t2: "np.dtype") -> float: + if t1 in self.float_types and t2 in self.float_types: + return 0.2 + return 1 + + def _cost_shape(self, s1: Tuple[int, ...], s2: Tuple[int, ...]) -> float: + d = abs(np.prod(s1) - np.prod(s2)) + if len(s1) != len(s2): + return self.rank_cost + d + for i, j in zip(s1, s2): + d += abs(i - j) + return d + + def _cost_summary(self, s1: str, s2: str) -> float: + if len(s1) != len(s2): + return 1e6 + d = 0 + for a, b in zip(s1, s2): + d += abs(ord(a) - ord(b)) + return d + + def distance_sequence( + self, s1: List[RESULT_TYPE], s2: List[RESULT_TYPE] + ) -> Tuple[float, List[Tuple[int, int]]]: + """ + Computes the distance between two sequences of results. + + :param s1: first sequence + :param s2: second sequence + :return: distance and alignment + """ + delay = self.max_lag + distance = {(-1, -1): 0} + predecessor = {(-1, -1): None} + for i in range(len(s1)): + for j in range(max(0, i - delay), min(len(s2), i + delay)): + best = 1e100 + pred = None + ki, kj = i - 1, j - 1 + if (ki, kj) in distance: + d = distance[ki, kj] + self.distance_pair(s1[i], s2[j]) + if d < best: + best = d + pred = (ki, kj) + ki, kj = i - 1, j + if (ki, kj) in distance: + d = distance[ki, kj] + self.insert_cost + if d < best: + best = d + pred = (ki, kj) + ki, kj = i, j - 1 + if (ki, kj) in distance: + d = distance[ki, kj] + self.insert_cost + if d < best: + best = d + pred = (ki, kj) + distance[i, j] = best + predecessor[i, j] = pred + + # reverse + way = [] + last = predecessor[len(s1) - 1, len(s2) - 1] + while last is not None: + way.append(last) + last = predecessor[last] + return distance[len(s1) - 1, len(s2) - 1], list(reversed(way))[1:] From 09e79d2f6902705b921799d3f5461a5e24e40164 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 15:35:29 +0100 Subject: [PATCH 06/13] text --- .../ut_reference/test_evaluator_yield.py | 61 +++++++- onnx_array_api/reference/__init__.py | 7 +- onnx_array_api/reference/evaluator_yield.py | 142 +++++++++++++++--- 3 files changed, 184 insertions(+), 26 deletions(-) diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index 60da2ee..d4ef8a6 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -10,7 +10,12 @@ make_tensor_value_info, ) from onnx_array_api.ext_test_case import ExtTestCase -from onnx_array_api.reference import YieldEvaluator, ResultType, DistanceExecution +from onnx_array_api.reference import ( + YieldEvaluator, + ResultType, + DistanceExecution, + ResultExecution, +) class TestArrayTensor(ExtTestCase): @@ -355,6 +360,60 @@ def test_distance_sequence_diff2(self): self.assertEqual(d, 5) self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + def test_distance_sequence_str(self): + s1 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 3), "ABCD", None, "X"), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Exp", "H"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs", "Y"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIO", None, "Y"), + ] + s2 = [ + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "A"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "B"), + (ResultType.INPUT, np.dtype("float32"), (2, 2), "ABCD", None, "X"), + ( + ResultType.RESULT, + np.dtype("float32"), + (2, 2), + "CEIO", + "LinearRegression", + "Y1", + ), + (ResultType.RESULT, np.dtype("float32"), (2, 3), "CEIP", "Abs", "Z"), + (ResultType.OUTPUT, np.dtype("float32"), (2, 2), "CEIP", None, "Y"), + ] + s1 = [ResultExecution(*s) for s in s1] + s2 = [ResultExecution(*s) for s in s2] + + dc = DistanceExecution() + d, align = dc.distance_sequence(s1, s2) + self.assertEqual(d, 1008) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)]) + text = dc.to_str(s1, s2, align) + expected = """ + =|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA + =|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB + ~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX + -|RESULTfloat322x2CEIOExpH| + =|RESULTfloat322x2CEIOLinearReY1|RESULTfloat322x2CEIOLinearReY1 + ~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ + """.replace( + " ", "" + ).strip( + "\n " + ) + self.assertEqual(expected, text.replace(" ", "").strip("\n")) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py index 6f6fc58..6fa2a33 100644 --- a/onnx_array_api/reference/__init__.py +++ b/onnx_array_api/reference/__init__.py @@ -11,7 +11,12 @@ ) from onnx.reference.op_run import to_array_extended from .evaluator import ExtendedReferenceEvaluator -from .evaluator_yield import DistanceExecution, ResultType, YieldEvaluator +from .evaluator_yield import ( + DistanceExecution, + ResultExecution, + ResultType, + YieldEvaluator, +) def from_array_extended(tensor: np.array, name: Optional[str] = None) -> TensorProto: diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index d347f39..e63feaf 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any, Dict, List, Iterator, Optional, Tuple from enum import IntEnum import numpy as np @@ -5,6 +6,14 @@ from .evaluator import ExtendedReferenceEvaluator +def _align(res: str, limit: int) -> str: + if len(res) == limit: + return res + if len(res) > limit: + return res[:limit] + return res + " " * (limit - len(res)) + + class ResultType(IntEnum): RESULT = 1 INITIALIZER = 2 @@ -16,7 +25,47 @@ def __repr__(self): return f"{self.__class__.__name__}.{self._name_}" -RESULT_TYPE = Tuple[ResultType, "np.dtype", Tuple[int, ...], str, str, str] +@dataclass +class ResultExecution: + """ + The description of a result. + """ + + kind: ResultType + dtype: object + shape: tuple + summary: str + op_type: str + name: str + + def __len__(self) -> int: + return 6 + + def __getitem__(self, i: int) -> Any: + if i == 0: + return self.kind + if i == 1: + return self.dtype + if i == 2: + return self.shape + if i == 3: + return self.summary + if i == 4: + return self.op_type + if i == 5: + return self.name + raise IndexError(f"i={i} out of boundary") + + def __str__(self): + els = [ + _align(self.kind._name_, 6), + _align(str(self.dtype).replace("dtype(", "").replace(")", ""), 8), + _align("x".join(map(str, self.shape)), 15), + self.summary, + _align(self.op_type or "", 8), + self.name or "", + ] + return " ".join(els) class YieldEvaluator: @@ -101,27 +150,6 @@ def enumerate_results( ) yield ResultType.OUTPUT, name, results[name], None - def enumerate_summarized( - self, - output_names: Optional[List[str]] = None, - feed_inputs: Optional[Dict[str, Any]] = None, - ) -> Iterator[RESULT_TYPE]: - """ - Executes the onnx model and enumerate intermediate results without their names. - - Args: - output_names: requested outputs by names, None for all - feed_inputs: dictionary `{ input name: input value }` - - Returns: - iterator on tuple(result kind, node.type, dtype, shape, value, result name) - """ - for kind, name, value, op_type in self.enumerate_results( - output_names, feed_inputs - ): - summary = self.make_summary(value) - yield kind, value.dtype, value.shape, summary, op_type, name - def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: """ Create a short string summarizing the value (discretization). @@ -147,6 +175,29 @@ def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: s = "".join([chr(65 + i) for i in value4i]) return s + def enumerate_summarized( + self, + output_names: Optional[List[str]] = None, + feed_inputs: Optional[Dict[str, Any]] = None, + ) -> Iterator[ResultExecution]: + """ + Executes the onnx model and enumerate intermediate results without their names. + + Args: + output_names: requested outputs by names, None for all + feed_inputs: dictionary `{ input name: input value }` + + Returns: + iterator on tuple(result kind, node.type, dtype, shape, value, result name) + """ + for kind, name, value, op_type in self.enumerate_results( + output_names, feed_inputs + ): + summary = self.make_summary(value) + yield ResultExecution( + kind, value.dtype, value.shape, summary, op_type, name + ) + class DistanceExecution: """ @@ -170,7 +221,7 @@ def __init__(self, max_lag: int = 50): self.max_lag = max_lag self.insert_cost = 1000 - def distance_pair(self, r1: RESULT_TYPE, r2: RESULT_TYPE) -> float: + def distance_pair(self, r1: ResultExecution, r2: ResultExecution) -> float: """ (ResultType.RESULT, np.dtype("float32"), (2, 2), "CEIO", "Abs"), @@ -214,7 +265,7 @@ def _cost_summary(self, s1: str, s2: str) -> float: return d def distance_sequence( - self, s1: List[RESULT_TYPE], s2: List[RESULT_TYPE] + self, s1: List[ResultExecution], s2: List[ResultExecution] ) -> Tuple[float, List[Tuple[int, int]]]: """ Computes the distance between two sequences of results. @@ -258,3 +309,46 @@ def distance_sequence( way.append(last) last = predecessor[last] return distance[len(s1) - 1, len(s2) - 1], list(reversed(way))[1:] + + def to_str( + self, + s1: List[ResultExecution], + s2: List[ResultExecution], + alignment: List[Tuple[int, int]], + column_size: int = 50, + ) -> str: + """ + Prints out the alignment between two sequences into a string. + :param s1: first sequence + :param s2: second sequence + :param alignment: alignment + :param column_size: column size + :return: test + """ + rows = [] + last = -1, -1 + for i, j in alignment: + assert i < len(s1), f"Unexpected value i={i} >= len(s1)={len(s1)}" + assert j < len(s2), f"Unexpected value i={j} >= len(s2)={len(s2)}" + expected = last[0] + 1, last[1] + 1 + + if expected == (i, j): + d1 = s1[i] + d2 = s2[j] + d = self.distance_pair(d1, d2) + symbol = "=" if d == 0 else "~" + rows.append( + f"{symbol} | {_align(str(d1), column_size)} | {_align(str(d2), column_size)}" + ) + elif i == last[0]: + d2 = s2[j] + rows.append( + f"+ | {_align('', column_size)} | {_align(str(d2), column_size)} " + ) + else: + d1 = s1[i] + rows.append( + f"- | {_align(str(d1), column_size)} | {_align('', column_size)}" + ) + last = i, j + return "\n".join(rows) From e1e30dcdc8a422ffad459fbcd8e7a0fc2bc48046 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 16:30:37 +0100 Subject: [PATCH 07/13] compare function --- _doc/api/reference.rst | 17 +++- .../ut_reference/test_evaluator_yield.py | 25 ++++++ onnx_array_api/reference/__init__.py | 1 + onnx_array_api/reference/evaluator_yield.py | 89 ++++++++++++++++++- 4 files changed, 129 insertions(+), 3 deletions(-) diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst index a1507c8..7752ce6 100644 --- a/_doc/api/reference.rst +++ b/_doc/api/reference.rst @@ -7,12 +7,21 @@ ExtendedReferenceEvaluator .. autoclass:: onnx_array_api.reference.ExtendedReferenceEvaluator :members: -YieldEvaluator -++++++++++++++ +ResultType +++++++++++ .. autoclass:: onnx_array_api.reference.ResultType :members: +ResultExecution ++++++++++++++++ + +.. autoclass:: onnx_array_api.reference.ResultExecution + :members: + +YieldEvaluator +++++++++++++++ + .. autoclass:: onnx_array_api.reference.YieldEvaluator :members: @@ -22,3 +31,7 @@ DistanceExecution .. autoclass:: onnx_array_api.reference.DistanceExecution :members: +compare_execution ++++++++++++++++++ + +.. autofunction:: onnx_array_api.reference.compare_execution diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index d4ef8a6..57d39a4 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -9,12 +9,14 @@ make_opsetid, make_tensor_value_info, ) +from onnx.parser import parse_model from onnx_array_api.ext_test_case import ExtTestCase from onnx_array_api.reference import ( YieldEvaluator, ResultType, DistanceExecution, ResultExecution, + compare_onnx_execution, ) @@ -414,6 +416,29 @@ def test_distance_sequence_str(self): ) self.assertEqual(expected, text.replace(" ", "").strip("\n")) + def test_compare_execution(self): + m1 = parse_model( + """ + + agraph (float[N] x) => (float[N] z) { + two = Constant () + four = Add(two, two) + z = Mul(x, x) + }""" + ) + m2 = parse_model( + """ + + agraph (float[N] x) => (float[N] z) { + two = Constant () + z = Mul(x, x) + }""" + ) + res1, res2, align, dc = compare_onnx_execution(m1, m2) + text = dc.to_str(res1, res2, align) + self.assertIn("CAAA Constant", text) + self.assertEqual(len(align), 4) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/onnx_array_api/reference/__init__.py b/onnx_array_api/reference/__init__.py index 6fa2a33..fd1d27c 100644 --- a/onnx_array_api/reference/__init__.py +++ b/onnx_array_api/reference/__init__.py @@ -16,6 +16,7 @@ ResultExecution, ResultType, YieldEvaluator, + compare_onnx_execution, ) diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index e63feaf..cb9e5fb 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Iterator, Optional, Tuple from enum import IntEnum import numpy as np -from onnx import ModelProto +from onnx import ModelProto, TensorProto, ValueInfoProto from .evaluator import ExtendedReferenceEvaluator @@ -352,3 +352,90 @@ def to_str( ) last = i, j return "\n".join(rows) + + +def generate_input(info: ValueInfoProto) -> np.ndarray: + """ + Generates one input. + """ + elem_type = info.type.tensor_type.elem_type + shape = [ + (getattr(d, "dim_value", None) or getattr(d, "dim_param")) + for d in info.type.tensor_type.shape.dim + ] + new_shape = [] + for sh in shape: + if isinstance(sh, str): + if len(new_shape) == 0: + new_shape.append(1) + else: + new_shape.append(16) + else: + new_shape.append(sh) + new_shape = tuple(new_shape) + p = np.prod(new_shape) + value = np.arange(p) + if elem_type == TensorProto.INT32: + return value.astype(np.int32).reshape(new_shape) + if elem_type == TensorProto.INT64: + return value.astype(np.int64).reshape(new_shape) + if elem_type == TensorProto.FLOAT: + return (value.astype(np.float32) / p).astype(np.float32).reshape(new_shape) + if elem_type == TensorProto.FLOAT16: + return (value.astype(np.float16) / p).astype(np.float16).reshape(new_shape) + if elem_type == TensorProto.FLOAT64: + return (value.astype(np.float64) / p).astype(np.float64).reshape(new_shape) + raise RuntimeError(f"Unexpected element_type {elem_type} for info={info}") + + +def generate_inputs(model: ModelProto) -> List[np.ndarray]: + """ + Generates inputs for a specific model. + + :param model: ModelProto + :return: list of inputs + """ + inputs = [] + inits = set(i.name for i in model.graph.initializer) + for inp in model.graph.input: + if inp.name in inits: + break + inputs.append(generate_input(inp)) + return inputs + + +def compare_onnx_execution( + model1: ModelProto, model2: ModelProto, verbose: int = 0 +) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]: + """ + Compares the execution of two onnx models. + The function assumes both models takes the same inputs. + + :param model1: first model + :param model2: second model + :param verbose: verbosity + :return: four results, a sequence of results for the first model and the second model, + the alignment between the two, DistanceExecution + """ + if verbose: + print("[compare_onnx_execution] generate inputs") + inputs = generate_inputs(model1) + feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)} + feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)} + if verbose: + print(f"[compare_onnx_execution] got {len(inputs)} inputs") + print("[compare_onnx_execution] execute first model") + res1 = list(YieldEvaluator(model1).enumerate_summarized(None, feeds1)) + if verbose: + print(f"[compare_onnx_execution] got {len(res1)} results") + print("[compare_onnx_execution] execute second model") + res2 = list(YieldEvaluator(model2).enumerate_summarized(None, feeds2)) + if verbose: + print(f"[compare_onnx_execution] got {len(res2)} results") + print("[compare_onnx_execution] compute edit distance") + dc = DistanceExecution() + _, align = dc.distance_sequence(res1, res2) + if verbose: + print(f"[compare_onnx_execution] got {len(align)} pairs") + print("[compare_onnx_execution] done") + return res1, res2, align, dc From 12b103159e1ec382a4fcc4fed3483bdc90b14c5d Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 18:19:13 +0100 Subject: [PATCH 08/13] fix FusedMatMul --- _doc/command_lines.rst | 52 +++++++++++++++ _doc/index.rst | 1 + _unittests/ut_reference/test_array_tensor.py | 26 +++++++- .../ut_reference/test_evaluator_yield.py | 36 ++++++++--- _unittests/ut_xrun_doc/test_command_lines1.py | 37 +++++++++++ onnx_array_api/_command_lines_parser.py | 58 ++++++++++++++++- onnx_array_api/reference/evaluator.py | 2 + onnx_array_api/reference/evaluator_yield.py | 64 ++++++++++--------- .../reference/ops/op_fused_matmul.py | 29 +++++++++ 9 files changed, 264 insertions(+), 41 deletions(-) create mode 100644 _doc/command_lines.rst create mode 100644 onnx_array_api/reference/ops/op_fused_matmul.py diff --git a/_doc/command_lines.rst b/_doc/command_lines.rst new file mode 100644 index 0000000..b7ff104 --- /dev/null +++ b/_doc/command_lines.rst @@ -0,0 +1,52 @@ +============= +command lines +============= + +compare +======= + +The function convers an onnx file into some code. + +:: + + python -m compare -m1 model1.onnx -m2 model2.onnx -v 1 + +Output example:: + + [compare_onnx_execution] got 2 inputs + [compare_onnx_execution] execute first model + [compare_onnx_execution] got 5 results + [compare_onnx_execution] execute second model + [compare_onnx_execution] got 5 results + [compare_onnx_execution] compute edit distance + [compare_onnx_execution] got 4 pairs + [compare_onnx_execution] done + = | INPUT float32 5x6 AAAA X | INPUT float32 5x6 AAAA X + = | INPUT float32 5x6 AAAA Y | INPUT float32 5x6 AAAA Y + = | RESULT float32 5x6 AABB Add res | RESULT float32 5x6 AABB Add res + = | RESULT float32 5x6 AAAA Cos Z | RESULT float32 5x6 AAAA Cos Z + +.. runpython:: + + from onnx_extended._command_lines_parser import get_parser_compare + get_parser_compare().print_help() + +See function :func:`onnx_array_api.reference.compare_onnx_execution`. + +translate +========= + +The function convers an onnx file into some code. + +:: + + python -m translate ... + +Output example:: + + not yet ready + +.. runpython:: + + from onnx_extended._command_lines_parser import get_parser_translate + get_parser_translate().print_help() diff --git a/_doc/index.rst b/_doc/index.rst index 02c4eed..b81be4f 100644 --- a/_doc/index.rst +++ b/_doc/index.rst @@ -36,6 +36,7 @@ The objective is to speed up the implementation of converter libraries. tutorial/index api/index tech/index + command_lines auto_examples/index .. toctree:: diff --git a/_unittests/ut_reference/test_array_tensor.py b/_unittests/ut_reference/test_array_tensor.py index 59fe5f1..f13c3e5 100644 --- a/_unittests/ut_reference/test_array_tensor.py +++ b/_unittests/ut_reference/test_array_tensor.py @@ -1,7 +1,13 @@ import unittest import numpy as np from onnx import TensorProto -from onnx.helper import make_graph, make_model, make_node, make_tensor_value_info +from onnx.helper import ( + make_graph, + make_model, + make_node, + make_tensor_value_info, + make_opsetid, +) from onnx_array_api.ext_test_case import ExtTestCase from onnx_array_api.reference import ( to_array_extended, @@ -51,6 +57,24 @@ def make_model_f8(fr, to): back = from_array_extended(got, "a") self.assertEqual(to, back.data_type) + def test_fused_matmul(self): + model = make_model( + make_graph( + [make_node("FusedMatMul", ["X", "Y"], ["Z"], domain="com.microsoft")], + "name", + [ + make_tensor_value_info("X", TensorProto.FLOAT, None), + make_tensor_value_info("Y", TensorProto.FLOAT, None), + ], + [make_tensor_value_info("Z", TensorProto.FLOAT, None)], + ), + opset_imports=[make_opsetid("", 18), make_opsetid("com.microsoft", 1)], + ) + ref = ExtendedReferenceEvaluator(model) + a = np.arange(4).reshape(-1, 2) + got = ref.run(None, {"X": a, "Y": a}) + self.assertEqualArray(a @ a, got[0]) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index 57d39a4..8565628 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -18,9 +18,25 @@ ResultExecution, compare_onnx_execution, ) +from onnx_array_api.reference.evaluator_yield import make_summary class TestArrayTensor(ExtTestCase): + def test_make_summary(self): + a = np.arange(12).reshape(3, 4) + v = make_summary(a) + self.assertEqual(v, "DMVE") + a = np.arange(12) + v = make_summary(a) + self.assertEqual(v, "DMVE") + a = np.arange(12).astype(np.float32) + v = make_summary(a) + self.assertEqual(v, "DMVE") + a = np.arange(13) + a[-1] = 0 + v = make_summary(a) + self.assertEqual(v, "GWMA") + def test_evaluator_yield(self): new_domain = "custom_domain" opset_imports = [make_opsetid("", 14), make_opsetid(new_domain, 1)] @@ -210,7 +226,7 @@ def test_distance_sequence_0(self): dc = DistanceExecution() d, align = dc.distance_sequence(expected, expected) self.assertEqual(d, 0) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]) def test_distance_sequence_ins(self): s1 = [ @@ -246,10 +262,10 @@ def test_distance_sequence_ins(self): dc = DistanceExecution() d, align = dc.distance_sequence(s1, s2) self.assertEqual(d, dc.insert_cost) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (5, 4)]) d, align = dc.distance_sequence(s2, s1) self.assertEqual(d, dc.insert_cost) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (3, 4)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (3, 4), (4, 5)]) def test_distance_sequence_equal(self): s1 = [ @@ -286,7 +302,7 @@ def test_distance_sequence_equal(self): dc = DistanceExecution() d, align = dc.distance_sequence(s1, s2) self.assertEqual(d, 0) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]) def test_distance_sequence_diff(self): s1 = [ @@ -323,7 +339,7 @@ def test_distance_sequence_diff(self): dc = DistanceExecution() d, align = dc.distance_sequence(s1, s2) self.assertEqual(d, 1) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]) def test_distance_sequence_diff2(self): s1 = [ @@ -360,7 +376,7 @@ def test_distance_sequence_diff2(self): dc = DistanceExecution() d, align = dc.distance_sequence(s1, s2) self.assertEqual(d, 5) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)]) + self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5)]) def test_distance_sequence_str(self): s1 = [ @@ -400,8 +416,11 @@ def test_distance_sequence_str(self): dc = DistanceExecution() d, align = dc.distance_sequence(s1, s2) self.assertEqual(d, 1008) - self.assertEqual(align, [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4)]) + self.assertEqual( + align, [(0, 0), (1, 1), (2, 2), (3, 2), (4, 3), (5, 4), (6, 5)] + ) text = dc.to_str(s1, s2, align) + self.assertIn("OUTPUT", text) expected = """ =|INPUTfloat322x2ABCDA|INPUTfloat322x2ABCDA =|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB @@ -409,6 +428,7 @@ def test_distance_sequence_str(self): -|RESULTfloat322x2CEIOExpH| =|RESULTfloat322x2CEIOLinearReY1|RESULTfloat322x2CEIOLinearReY1 ~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ + ~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY """.replace( " ", "" ).strip( @@ -437,7 +457,7 @@ def test_compare_execution(self): res1, res2, align, dc = compare_onnx_execution(m1, m2) text = dc.to_str(res1, res2, align) self.assertIn("CAAA Constant", text) - self.assertEqual(len(align), 4) + self.assertEqual(len(align), 5) if __name__ == "__main__": diff --git a/_unittests/ut_xrun_doc/test_command_lines1.py b/_unittests/ut_xrun_doc/test_command_lines1.py index 8aa17ee..1cd16bb 100644 --- a/_unittests/ut_xrun_doc/test_command_lines1.py +++ b/_unittests/ut_xrun_doc/test_command_lines1.py @@ -14,6 +14,7 @@ from onnx_array_api.ext_test_case import ExtTestCase from onnx_array_api._command_lines_parser import ( get_main_parser, + get_parser_compare, get_parser_translate, main, ) @@ -70,6 +71,42 @@ def test_command_translate(self): code = st.getvalue() self.assertIn("start(opset=", code) + def test_parser_compare(self): + st = StringIO() + with redirect_stdout(st): + get_parser_compare().print_help() + text = st.getvalue() + self.assertIn("model1", text) + + def test_command_compare(self): + X = make_tensor_value_info("X", TensorProto.FLOAT, [5, 6]) + Y = make_tensor_value_info("Y", TensorProto.FLOAT, [5, 6]) + Z = make_tensor_value_info("Z", TensorProto.FLOAT, [5, 6]) + graph = make_graph( + [ + make_node("Add", ["X", "Y"], ["res"]), + make_node("Cos", ["res"], ["Z"]), + ], + "g", + [X, Y], + [Z], + ) + onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)]) + + with tempfile.TemporaryDirectory() as root: + model_file = os.path.join(root, "model.onnx") + with open(model_file, "wb") as f: + f.write(onnx_model.SerializeToString()) + + args = ["compare", "-m1", model_file, "-m2", model_file, "-v", "1"] + st = StringIO() + with redirect_stdout(st): + main(args) + + code = st.getvalue() + self.assertIn("[compare_onnx_execution]", code) + self.assertIn("AAAA", code) + if __name__ == "__main__": unittest.main(verbosity=2) diff --git a/onnx_array_api/_command_lines_parser.py b/onnx_array_api/_command_lines_parser.py index 71f5a35..a180deb 100644 --- a/onnx_array_api/_command_lines_parser.py +++ b/onnx_array_api/_command_lines_parser.py @@ -14,12 +14,13 @@ def get_main_parser() -> ArgumentParser: ) parser.add_argument( "cmd", - choices=["translate"], + choices=["translate", "compare"], help=dedent( """ Selects a command. - 'translate' exports an onnx graph into a piece of code replicating it. + 'translate' exports an onnx graph into a piece of code replicating it, + 'compares' compares the execution of two onnx models """ ), ) @@ -65,8 +66,59 @@ def _cmd_translate(argv: List[Any]): print(code) +def get_parser_compare() -> ArgumentParser: + parser = ArgumentParser( + prog="compare", + description=dedent( + """ + Compares the execution of two onnx models. + """ + ), + epilog="This is used when two models are different but should produce the same results.", + ) + parser.add_argument( + "-m1", + "--model1", + type=str, + required=True, + help="first onnx model", + ) + parser.add_argument( + "-m2", + "--model2", + type=str, + required=True, + help="second onnx model", + ) + parser.add_argument( + "-v", + "--verbose", + default=0, + help="verbosity", + ) + parser.add_argument( + "-c", + "--column-size", + default=50, + help="column size when displaying the results", + ) + return parser + + +def _cmd_compare(argv: List[Any]): + from .reference import compare_onnx_execution + + parser = get_parser_compare() + args = parser.parse_args(argv[1:]) + onx1 = onnx.load(args.model1) + onx2 = onnx.load(args.model2) + res1, res2, align, dc = compare_onnx_execution(onx1, onx2, verbose=args.verbose) + text = dc.to_str(res1, res2, align, column_size=args.column_size) + print(text) + + def main(argv: Optional[List[Any]] = None): - fcts = dict(translate=_cmd_translate) + fcts = dict(translate=_cmd_translate, compare=_cmd_compare) if argv is None: argv = sys.argv[1:] diff --git a/onnx_array_api/reference/evaluator.py b/onnx_array_api/reference/evaluator.py index e20be76..54f0c26 100644 --- a/onnx_array_api/reference/evaluator.py +++ b/onnx_array_api/reference/evaluator.py @@ -7,6 +7,7 @@ from .ops.op_cast_like import CastLike_15, CastLike_19 from .ops.op_concat import Concat from .ops.op_constant_of_shape import ConstantOfShape +from .ops.op_fused_matmul import FusedMatMul logger = getLogger("onnx-array-api-eval") @@ -32,6 +33,7 @@ class ExtendedReferenceEvaluator(ReferenceEvaluator): CastLike_15, CastLike_19, ConstantOfShape, + FusedMatMul, ] @staticmethod diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index cb9e5fb..9c77deb 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -68,6 +68,32 @@ def __str__(self): return " ".join(els) +def make_summary(value: Any, length: int = 4, modulo: int = 26) -> str: + """ + Create a short string summarizing the value (discretization). + + :param value: array + :param length: number of value to produce + :param module: discretization parameter + :return: short string + """ + value4 = np.zeros(length, dtype=np.float64) + if value.size <= length: + value4[: value.size] = value.flatten().astype(np.float64) + else: + if value.size % length != 0: + value2 = np.zeros( + value.size + length - value.size % length, dtype=np.float64 + ) + value2[: value.size] = value.flatten().astype(np.float64) + else: + value2 = value.flatten().astype(np.float64) + value4 = value2.reshape((4, -1)).sum(axis=1) + value4i = value4.astype(np.int64) % modulo + s = "".join([chr(65 + i) for i in value4i]) + return s + + class YieldEvaluator: """ This class implements method `enumerate_results` which iterates on @@ -150,31 +176,6 @@ def enumerate_results( ) yield ResultType.OUTPUT, name, results[name], None - def make_summary(self, value: Any, length: int = 4, modulo: int = 26) -> str: - """ - Create a short string summarizing the value (discretization). - - :param value: array - :param length: number of value to produce - :param module: discretization parameter - :return: short string - """ - value4 = np.zeros(4, dtype=np.float64) - if value.size <= length: - value4[: value.size] = value.flatten().astype(np.float64) - else: - if value.size % length != 2: - value2 = np.zeros( - value.size + length - value.size % length, dtype=np.float64 - ) - value2[: value.size] = value.flatten().astype(np.float64) - else: - value2 = value.flatten().astype(np.float64) - value4 = value2.reshape((4, -1)).mean(axis=1) - value4i = value4.astype(np.int64) % modulo - s = "".join([chr(65 + i) for i in value4i]) - return s - def enumerate_summarized( self, output_names: Optional[List[str]] = None, @@ -193,7 +194,7 @@ def enumerate_summarized( for kind, name, value, op_type in self.enumerate_results( output_names, feed_inputs ): - summary = self.make_summary(value) + summary = make_summary(value) yield ResultExecution( kind, value.dtype, value.shape, summary, op_type, name ) @@ -304,7 +305,7 @@ def distance_sequence( # reverse way = [] - last = predecessor[len(s1) - 1, len(s2) - 1] + last = len(s1) - 1, len(s2) - 1 while last is not None: way.append(last) last = predecessor[last] @@ -405,7 +406,10 @@ def generate_inputs(model: ModelProto) -> List[np.ndarray]: def compare_onnx_execution( - model1: ModelProto, model2: ModelProto, verbose: int = 0 + model1: ModelProto, + model2: ModelProto, + inputs: Optional[List[Any]] = None, + verbose: int = 0, ) -> Tuple[List[ResultExecution], List[ResultExecution], List[Tuple[int, int]]]: """ Compares the execution of two onnx models. @@ -413,13 +417,15 @@ def compare_onnx_execution( :param model1: first model :param model2: second model + :param inputs: inputs to use :param verbose: verbosity :return: four results, a sequence of results for the first model and the second model, the alignment between the two, DistanceExecution """ if verbose: print("[compare_onnx_execution] generate inputs") - inputs = generate_inputs(model1) + if inputs is None: + inputs = generate_inputs(model1) feeds1 = {i.name: v for i, v in zip(model1.graph.input, inputs)} feeds2 = {i.name: v for i, v in zip(model2.graph.input, inputs)} if verbose: diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py new file mode 100644 index 0000000..0f5b495 --- /dev/null +++ b/onnx_array_api/reference/ops/op_fused_matmul.py @@ -0,0 +1,29 @@ +from onnx.reference.op_run import OpRun + + +class FusedMatMul(OpRun): + op_domain = "com.microsoft" + + def _run( + self, + A, + B, + alpha: float = 0, + transA: int = 0, + transB: int = 0, + transBatchA: int = 0, + transBatchB: int = 0, + ): + assert ( + transBatchA == 0 + ), f"Not implemented for transBatchA==1 and {A.shape}x{B.shape}" + assert ( + transBatchB == 0 + ), f"Not implemented for transBatchB==1 and {A.shape}x{B.shape}" + if transA: + dim = len(A.shape) + A = A.transpose(axes=(dim - 2, dim - 1)) + if transB: + dim = len(B.shape) + B = B.transpose(axes=(dim - 2, dim - 1)) + return (A @ B,) From 09863991172c330ff41c95d790161c4bac0d2c63 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Mon, 5 Feb 2024 23:43:58 +0100 Subject: [PATCH 09/13] fix alpha --- onnx_array_api/reference/ops/op_fused_matmul.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py index 0f5b495..9bafd96 100644 --- a/onnx_array_api/reference/ops/op_fused_matmul.py +++ b/onnx_array_api/reference/ops/op_fused_matmul.py @@ -1,3 +1,4 @@ +import numpy as np from onnx.reference.op_run import OpRun @@ -26,4 +27,5 @@ def _run( if transB: dim = len(B.shape) B = B.transpose(axes=(dim - 2, dim - 1)) - return (A @ B,) + a = np.array(alpha, dtype=A.dtype) + return (A @ B * a,) From d1f276b67608e7d9ec413c01f8f0dcf621c37e65 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Tue, 6 Feb 2024 00:46:01 +0100 Subject: [PATCH 10/13] example --- _doc/examples/plot_onnx_diff.py | 68 +++++++++++++++++++ _unittests/ut_xrun_doc/test_command_lines1.py | 2 +- onnx_array_api/reference/evaluator_yield.py | 8 ++- .../reference/ops/op_fused_matmul.py | 2 +- 4 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 _doc/examples/plot_onnx_diff.py diff --git a/_doc/examples/plot_onnx_diff.py b/_doc/examples/plot_onnx_diff.py new file mode 100644 index 0000000..7a5f1d3 --- /dev/null +++ b/_doc/examples/plot_onnx_diff.py @@ -0,0 +1,68 @@ +""" + +.. _l-onnx-diff-example: + +Compares the conversions of the same model with different options +================================================================= + +The script compares two onnx models obtained with the same trained +scikit-learn models but converted with different options. + +A model ++++++++ +""" + +from sklearn.mixture import GaussianMixture +from sklearn.datasets import load_iris +from sklearn.model_selection import train_test_split +from skl2onnx import to_onnx +from onnx_array_api.reference import compare_onnx_execution +from onnx_array_api.plotting.text_plot import onnx_simple_text_plot + + +data = load_iris() +X_train, X_test = train_test_split(data.data) +model = GaussianMixture() +model.fit(X_train) + +################################# +# Conversion to onnx +# ++++++++++++++++++ + +onx = to_onnx( + model, X_train[:1], options={id(model): {"score_samples": True}}, target_opset=12 +) + +print(onnx_simple_text_plot(onx)) + +################################## +# Conversion to onnx without ReduceLogSumExp +# ++++++++++++++++++++++++++++++++++++++++++ + +onx2 = to_onnx( + model, + X_train[:1], + options={id(model): {"score_samples": True}}, + black_op={"ReduceLogSumExp"}, + target_opset=12, +) + +print(onnx_simple_text_plot(onx2)) + + +############################################# +# Differences +# +++++++++++ +# +# Function :func:`onnx_array_api.reference.compare_onnx_execution` +# compares the intermediate results of two onnx models. Then it finds +# the best alignmet between the two models using an edit distance. + +res1, res2, align, dc = compare_onnx_execution(onx, onx2, verbose=1) +print("------------") +text = dc.to_str(res1, res2, align) +print(text) + +############################### +# The display shows that ReduceSumSquare was replaced by Mul + ReduceSum, +# and ReduceLogSumExp by ReduceMax + Sub + Exp + Log + Add. diff --git a/_unittests/ut_xrun_doc/test_command_lines1.py b/_unittests/ut_xrun_doc/test_command_lines1.py index 1cd16bb..02f84bd 100644 --- a/_unittests/ut_xrun_doc/test_command_lines1.py +++ b/_unittests/ut_xrun_doc/test_command_lines1.py @@ -105,7 +105,7 @@ def test_command_compare(self): code = st.getvalue() self.assertIn("[compare_onnx_execution]", code) - self.assertIn("AAAA", code) + self.assertIn("ADFF", code) if __name__ == "__main__": diff --git a/onnx_array_api/reference/evaluator_yield.py b/onnx_array_api/reference/evaluator_yield.py index 9c77deb..3935913 100644 --- a/onnx_array_api/reference/evaluator_yield.py +++ b/onnx_array_api/reference/evaluator_yield.py @@ -62,7 +62,7 @@ def __str__(self): _align(str(self.dtype).replace("dtype(", "").replace(")", ""), 8), _align("x".join(map(str, self.shape)), 15), self.summary, - _align(self.op_type or "", 8), + _align(self.op_type or "", 10), self.name or "", ] return " ".join(els) @@ -316,7 +316,7 @@ def to_str( s1: List[ResultExecution], s2: List[ResultExecution], alignment: List[Tuple[int, int]], - column_size: int = 50, + column_size: int = 60, ) -> str: """ Prints out the alignment between two sequences into a string. @@ -384,7 +384,7 @@ def generate_input(info: ValueInfoProto) -> np.ndarray: return (value.astype(np.float32) / p).astype(np.float32).reshape(new_shape) if elem_type == TensorProto.FLOAT16: return (value.astype(np.float16) / p).astype(np.float16).reshape(new_shape) - if elem_type == TensorProto.FLOAT64: + if elem_type == TensorProto.DOUBLE: return (value.astype(np.float64) / p).astype(np.float64).reshape(new_shape) raise RuntimeError(f"Unexpected element_type {elem_type} for info={info}") @@ -414,6 +414,8 @@ def compare_onnx_execution( """ Compares the execution of two onnx models. The function assumes both models takes the same inputs. + See :ref:`l-onnx-diff-example` to see a full example using + this function. :param model1: first model :param model2: second model diff --git a/onnx_array_api/reference/ops/op_fused_matmul.py b/onnx_array_api/reference/ops/op_fused_matmul.py index 9bafd96..0f738c7 100644 --- a/onnx_array_api/reference/ops/op_fused_matmul.py +++ b/onnx_array_api/reference/ops/op_fused_matmul.py @@ -9,7 +9,7 @@ def _run( self, A, B, - alpha: float = 0, + alpha: float = 1, transA: int = 0, transB: int = 0, transBatchA: int = 0, From 42c5e695a9f992af53154caaf192b0f3c10c4442 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Tue, 6 Feb 2024 00:51:08 +0100 Subject: [PATCH 11/13] documentation --- _doc/tutorial/index.rst | 1 + _doc/tutorial/tools.rst | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 _doc/tutorial/tools.rst diff --git a/_doc/tutorial/index.rst b/_doc/tutorial/index.rst index f4cce00..9fcc557 100644 --- a/_doc/tutorial/index.rst +++ b/_doc/tutorial/index.rst @@ -10,4 +10,5 @@ Tutorial graph_api light_api numpy_api + tools benchmarks diff --git a/_doc/tutorial/tools.rst b/_doc/tutorial/tools.rst new file mode 100644 index 0000000..fe673f7 --- /dev/null +++ b/_doc/tutorial/tools.rst @@ -0,0 +1,20 @@ +===== +Tools +===== + +Some of useful tools. + +Text representation +=================== + +Plotting a graph is great but difficult to read when +the graph is big and it is slow. +:func:`onnx_array_api.plotting.text_plot.onnx_simple_text_plot` +prints out a text representation. + +Differences between two models +============================== + +How to understand the differences between two models +assuming they are producing the same outputs? +Example :ref:`l-onnx-diff-example` shows how to do it. From e2224414336de7b86c2ec517c773127378717ea0 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Tue, 6 Feb 2024 00:52:11 +0100 Subject: [PATCH 12/13] fix length --- _unittests/ut_reference/test_evaluator_yield.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_unittests/ut_reference/test_evaluator_yield.py b/_unittests/ut_reference/test_evaluator_yield.py index 8565628..7181456 100644 --- a/_unittests/ut_reference/test_evaluator_yield.py +++ b/_unittests/ut_reference/test_evaluator_yield.py @@ -426,7 +426,7 @@ def test_distance_sequence_str(self): =|INPUTfloat322x2ABCDB|INPUTfloat322x2ABCDB ~|INPUTfloat322x3ABCDX|INPUTfloat322x2ABCDX -|RESULTfloat322x2CEIOExpH| - =|RESULTfloat322x2CEIOLinearReY1|RESULTfloat322x2CEIOLinearReY1 + =|RESULTfloat322x2CEIOLinearRegrY1|RESULTfloat322x2CEIOLinearRegrY1 ~|RESULTfloat322x2CEIOAbsY|RESULTfloat322x3CEIPAbsZ ~|OUTPUTfloat322x2CEIOY|OUTPUTfloat322x2CEIPY """.replace( From 04418d4b584139c08788735b0da4ded4abd24c49 Mon Sep 17 00:00:00 2001 From: Xavier Dupre Date: Tue, 6 Feb 2024 01:25:44 +0100 Subject: [PATCH 13/13] doc --- _doc/api/reference.rst | 6 +++--- _doc/command_lines.rst | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/_doc/api/reference.rst b/_doc/api/reference.rst index 7752ce6..3b4ae7d 100644 --- a/_doc/api/reference.rst +++ b/_doc/api/reference.rst @@ -31,7 +31,7 @@ DistanceExecution .. autoclass:: onnx_array_api.reference.DistanceExecution :members: -compare_execution -+++++++++++++++++ +compare_onnx_execution +++++++++++++++++++++++ -.. autofunction:: onnx_array_api.reference.compare_execution +.. autofunction:: onnx_array_api.reference.compare_onnx_execution diff --git a/_doc/command_lines.rst b/_doc/command_lines.rst index b7ff104..38ca5f2 100644 --- a/_doc/command_lines.rst +++ b/_doc/command_lines.rst @@ -28,7 +28,7 @@ Output example:: .. runpython:: - from onnx_extended._command_lines_parser import get_parser_compare + from onnx_array_api._command_lines_parser import get_parser_compare get_parser_compare().print_help() See function :func:`onnx_array_api.reference.compare_onnx_execution`. @@ -48,5 +48,5 @@ Output example:: .. runpython:: - from onnx_extended._command_lines_parser import get_parser_translate + from onnx_array_api._command_lines_parser import get_parser_translate get_parser_translate().print_help()