fastmachinelearning · calad0i · Oct 26, 2024 · Oct 26, 2024 · Oct 26, 2024 · Oct 26, 2024
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,13 +9,19 @@ repos:
     args: ['--line-length=125',
            '--skip-string-normalization']
 
+- repo: https://github.com/tox-dev/pyproject-fmt
+  rev: v2.5.0
+  hooks:
+    - id: pyproject-fmt
+
 - repo: https://github.com/pre-commit/pre-commit-hooks
   rev: v5.0.0
   hooks:
   - id: check-added-large-files
   - id: check-case-conflict
   - id: check-merge-conflict
   - id: check-symlinks
+  - id: check-toml
   - id: check-yaml
   - id: debug-statements
   - id: end-of-file-fixer
@@ -27,27 +33,25 @@ repos:
   rev: 5.13.2
   hooks:
   - id: isort
-    args: ["--profile", "black", --line-length=125]
 
 - repo: https://github.com/asottile/pyupgrade
   rev: v3.19.0
   hooks:
   - id: pyupgrade
     args: ["--py36-plus"]
 
-- repo: https://github.com/asottile/setup-cfg-fmt
-  rev: v2.7.0
-  hooks:
-  - id: setup-cfg-fmt
-
 - repo: https://github.com/pycqa/flake8
   rev: 7.1.1
   hooks:
   - id: flake8
     exclude: docs/conf.py
     additional_dependencies: [flake8-bugbear, flake8-print]
     args: ['--max-line-length=125',  # github viewer width
-           '--extend-ignore=E203,T201']  # E203 is not PEP8 compliant
+           '--extend-ignore=E203,T201',  # E203 is not PEP8 compliant
+           '--per-file-ignores=hls4ml/model/optimizer/passes/bit_exact.py:E741,hls4ml/converters/keras_v3/squark/_base.py:E741,__init__.py:F401',
+           # i for #int w/o sign, I for #int w/ sign when massively processing bw conversions ......
+           # ignore unused imports in __init__.py .....
+    ]
 
 - repo: https://github.com/mgedmin/check-manifest
   rev: "0.50"

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,7 +1,8 @@
-include LICENSE README.md CONTRIBUTING.md CITATION.cff pyproject.toml setup.py setup.cfg .clang-format
+include LICENSE README.md CONTRIBUTING.md CITATION.cff pyproject.toml .clang-format
 graft example-models
 graft test
 graft contrib
 recursive-include hls4ml/templates *
-global-exclude .git .gitmodules .gitlab-ci.yml
+recursive-include hls4ml *.py
+global-exclude .git .gitmodules .gitlab-ci.yml *.pyc
 include hls4ml/backends/vivado_accelerator/supported_boards.json
diff --git a/hls4ml/__init__.py b/hls4ml/__init__.py
@@ -1,33 +1,3 @@
-# Temporary workaround for QKeras installation requirement, will be removed after 1.0.0
-def maybe_install_qkeras():
-    import subprocess
-    import sys
-
-    QKERAS_PKG_NAME = 'QKeras'
-    # QKERAS_PKG_SOURCE = QKERAS_PKG_NAME
-    QKERAS_PKG_SOURCE = 'qkeras@git+https://github.com/fastmachinelearning/qkeras.git'
-
-    def pip_list():
-        p = subprocess.run([sys.executable, '-m', 'pip', 'list'], check=True, capture_output=True)
-        return p.stdout.decode()
-
-    def pip_install(package):
-        subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
-
-    all_pkgs = pip_list()
-    if QKERAS_PKG_NAME not in all_pkgs:
-        print('QKeras installation not found, installing one...')
-        pip_install(QKERAS_PKG_SOURCE)
-        print('QKeras installed.')
-
-
-try:
-    maybe_install_qkeras()
-except Exception:
-    print('Could not find QKeras installation, make sure you have QKeras installed.')
-
-# End of workaround
-
 from hls4ml import converters, report, utils  # noqa: F401, E402
 
 try:

diff --git a/hls4ml/backends/fpga/fpga_backend.py b/hls4ml/backends/fpga/fpga_backend.py
@@ -7,7 +7,7 @@
 import numpy as np
 
 from hls4ml.backends.backend import Backend
-from hls4ml.model.attributes import ChoiceAttribute, ConfigurableAttribute, TypeAttribute
+from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute
 from hls4ml.model.layers import (
     GRU,
     LSTM,
@@ -32,16 +32,13 @@
     SeparableConv1D,
     SeparableConv2D,
     SimpleRNN,
-    Softmax,
 )
 from hls4ml.model.optimizer import model_optimizer
 from hls4ml.model.types import (
     ExponentPrecisionType,
     FixedPrecisionType,
     IntegerPrecisionType,
     PrecisionType,
-    RoundingMode,
-    SaturationMode,
     UnspecifiedPrecisionType,
     XnorPrecisionType,
 )
@@ -109,34 +106,6 @@ def __init__(self, name):
         act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
         self.attribute_map[Activation] = act_attrs
 
-        softmax_attrs = self.attribute_map.get(Softmax, [])
-        softmax_attrs.append(
-            ChoiceAttribute(
-                'implementation',
-                ['latency', 'stable', 'argmax', 'legacy'],
-                default='stable',
-                description=descriptions.softmax_implementation,
-            )
-        )
-        softmax_attrs.append(
-            ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip)
-        )
-        softmax_attrs.append(
-            TypeAttribute(
-                'exp_table',
-                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
-                description=descriptions.table_type,
-            )
-        )
-        softmax_attrs.append(
-            TypeAttribute(
-                'inv_table',
-                default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
-                description=descriptions.table_type,
-            )
-        )
-        self.attribute_map[Softmax] = softmax_attrs
-
     def create_layer_class(self, layer_class):
         new_attrubutes = []
         for cls, attributes in self.attribute_map.items():

diff --git a/hls4ml/backends/fpga/passes/fix_softmax_table_size.py b/hls4ml/backends/fpga/passes/fix_softmax_table_size.py
@@ -6,7 +6,11 @@
 
 class FixSoftmaxTableSize(OptimizerPass):
     def match(self, node):
-        return isinstance(node, Softmax)
+        if not isinstance(node, Softmax):
+            return False
+        if 'inv_table_size' in node.attributes:
+            return False  # handler generating inv_table_size sets it properly
+        return True
 
     def transform(self, model, node: Layer):
         inp_layer = node.get_input_node()  # type: ignore

diff --git a/hls4ml/backends/fpga/passes/hgq_proxy_model.py b/hls4ml/backends/fpga/passes/hgq_proxy_model.py
@@ -52,10 +52,6 @@ def match(self, node: Layer):
         return isinstance(node, FixedPointQuantizer)
 
     def transform(self, model, node: FixedPointQuantizer):
-        if node.fusible:
-            model.remove_node(node, rewire=True)
-            return True
-
         if model.config.config['IOType'] != 'io_parallel':
             raise NotImplementedError('Heterogenous quantization for activations is only supported with IOType=io_parallel')
 
@@ -94,7 +90,6 @@ def __init__(self):
 
     def format(self, node):
         params = self._default_function_params(node)
-        node.attributes['result_t'].precision = node.attributes['table_t'].precision
         params['config'] = f'unary_lut_config{node.index}'
         params['table'] = node.get_weights('table').name
 

diff --git a/hls4ml/backends/vivado/passes/core_templates.py b/hls4ml/backends/vivado/passes/core_templates.py
@@ -150,13 +150,21 @@ def format(self, node):
 
 softmax_config_template = """struct {type}_config{index} : nnet::activ_config {{
     static const unsigned n_in = {n_in};
-    static const unsigned table_size = {table_size};
+    static const unsigned n_outer = {n_outer};
+    static const unsigned n_inner = {n_inner};
+    static const unsigned parallelization_factor = {parallelization_factor};
+    static const unsigned exp_table_size = {exp_table_size};
+    static const unsigned inv_table_size = {inv_table_size};
     static const unsigned io_type = nnet::{iotype};
     static const unsigned reuse_factor = {reuse};
     static const unsigned axis = {axis};
     static const nnet::softmax_implementation implementation = nnet::softmax_implementation::{implementation};
+    static constexpr float exp_scale = {exp_scale};
     typedef {exp_table_t.name} exp_table_t;
     typedef {inv_table_t.name} inv_table_t;
+    typedef {accum_t.name} accum_t;
+    typedef {inv_inp_t.name} inv_inp_t;
+    typedef {inp_norm_t_str} inp_norm_t;
 }};\n"""
 
 activ_function_template = 'nnet::{activation}<{input_t}, {output_t}, {config}>({input}, {output});'
@@ -208,10 +216,44 @@ def __init__(self):
         super(ActivationConfigTemplate, self).__init__(Softmax)  # Skip ActivationConfigTemplate's __init__
         self.template = softmax_config_template
 
+    def format(self, node):
+        params = self._default_config_params(node)
+        params['type'] = node.get_attr('activation')
+        params.setdefault('exp_table_size', params['table_size'])
+        params.setdefault('inv_table_size', params['table_size'])
+        params.setdefault('n_inner', 1)
+        params.setdefault('n_outer', 1)
+        params.setdefault('exp_scale', 1.0)
+        params.setdefault('parallelization_factor', -1)
+
+        if 'inp_norm_t' not in params:
+            input_t = node.get_input_variable().type.precision
+            width, iwidth = input_t.width, input_t.integer
+            params['inp_norm_t_str'] = f'ap_fixed<{width}, {iwidth}, AP_RND, AP_SAT>'
+        else:
+            params['inp_norm_t_str'] = params['inp_norm_t'].name  # type: ignore
+
+        return self.template.format(**params)
+
+
+class SoftmaxFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Softmax, include_header=activ_include_list)
+        self.template = activ_function_template
+
+    def format(self, node):
+        params = self._default_function_params(node)
+        use_multidim = node.get_attr('n_inner', 1) > 1 or node.get_attr('n_outer', 1) > 1
+        use_multidim = use_multidim and node.model.config.get_config_value('IOType') == 'io_parallel'
+        params['activation'] = 'softmax' if not use_multidim else 'softmax_multidim'
+        params['config'] = f'softmax_config{node.index}'
+
+        return self.template.format(**params)
+
 
 class ActivationFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
-        super().__init__((Activation, HardActivation, Softmax), include_header=activ_include_list)
+        super().__init__((Activation, HardActivation), include_header=activ_include_list)
         self.template = activ_function_template
 
     def format(self, node):

diff --git a/hls4ml/backends/vivado/passes/einsum.py b/hls4ml/backends/vivado/passes/einsum.py
@@ -0,0 +1,105 @@
+from math import ceil
+
+from hls4ml.backends.backend import get_backend
+from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate
+from hls4ml.model.layers import Einsum
+
+from .reshaping_templates import transpose_config_gen
+
+# Shared Dense template
+# Einsum template
+
+einsum_config_template = '''
+struct config{index} {{
+    typedef config{index}_tpose_inp0 tpose_inp0_conf;
+    typedef config{index}_tpose_inp1 tpose_inp1_conf;
+    typedef config{index}_tpose_out tpose_out_conf;
+
+    typedef {accum_t.name} accum_t;
+
+    // Layer Sizes
+    static const unsigned n_free0 = {n_free0};
+    static const unsigned n_free1 = {n_free1};
+    static const unsigned n_contract = {n_contract};
+    static const unsigned n_inplace = {n_inplace};
+
+    // Resource reuse info
+    static const unsigned io_type = nnet::{iotype};
+    static const unsigned strategy = nnet::{strategy};
+    static const unsigned reuse_factor = {reuse_factor};
+    static const unsigned multiplier_limit = {multiplier_limit};
+    static const bool store_weights_in_bram = false; // NOT USED
+
+    template <class x_T, class y_T>
+    using product = nnet::product::{product_type}<x_T, y_T>;
+}};
+'''
+
+einsum_function_template = 'nnet::einsum<{input0_t}, {input1_t}, {output_t}, {config}>({input0}, {input1}, {output});'
+
+einsum_include_list = ['nnet_utils/nnet_einsum.h']
+
+
+class EinsumConfigTemplate(LayerConfigTemplate):
+    def __init__(self):
+        super().__init__(Einsum)
+        self.template = einsum_config_template
+
+    def format(self, node: Einsum):
+        default_params = self._default_config_params(node)
+
+        strategy = node.model.config.get_strategy(node)
+        io_type = node.model.config.get_config_value('IOType')
+
+        assert io_type == 'io_parallel', 'EinsumDense layer only supports io_parallel for now'
+        assert strategy.lower() == 'latency', 'EinsumDense layer only supports Latency strategy for now'
+
+        # EinsumDense config
+        params = default_params.copy()
+        params['strategy'] = strategy
+        params['n_free0'] = node.attributes.attributes['n_free0']
+        params['n_free1'] = node.attributes.attributes['n_free1']
+        params['n_contract'] = node.attributes.attributes['n_contract']
+        params['n_inplace'] = node.attributes.attributes['n_inplace']
+        inp0_t = node.get_input_variable(node.inputs[0]).type.precision
+        inp1_t = node.get_input_variable(node.inputs[1]).type.precision
+        params['product_type'] = get_backend('vivado').product_type(inp0_t, inp1_t)
+
+        total_mults = params['n_free0'] * params['n_free1'] * params['n_contract'] * params['n_inplace']
+        params['multiplier_limit'] = ceil(total_mults / params['reuse_factor'])
+
+        einsum_conf = self.template.format(**params)
+
+        # inp/out transpose config
+        inp0_shape = node.attributes.attributes['inp0_shape']
+        inp1_shape = node.attributes.attributes['inp1_shape']
+        out_interpert_shape = node.attributes.attributes['out_interpert_shape']
+        inp0_tpose_idxs = node.attributes.attributes['inp0_tpose_idxs']
+        inp1_tpose_idxs = node.attributes.attributes['inp1_tpose_idxs']
+        out_tpose_idxs = node.attributes.attributes['out_tpose_idxs']
+        tpose_inp0_conf_name = f'config{node.index}_tpose_inp0'
+        tpose_inp1_conf_name = f'config{node.index}_tpose_inp1'
+        tpose_out_conf_name = f'config{node.index}_tpose_out'
+
+        inp0_tpose_conf = transpose_config_gen(tpose_inp0_conf_name, inp0_shape, inp0_tpose_idxs)
+        inp1_tpose_conf = transpose_config_gen(tpose_inp1_conf_name, inp1_shape, inp1_tpose_idxs)
+        out_tpose_conf = transpose_config_gen(tpose_out_conf_name, out_interpert_shape, out_tpose_idxs)
+
+        return '\n\n'.join((inp0_tpose_conf, inp1_tpose_conf, out_tpose_conf, einsum_conf))
+
+
+class EinsumFunctionTemplate(FunctionCallTemplate):
+    def __init__(self):
+        super().__init__(Einsum, include_header=einsum_include_list)
+        self.template = einsum_function_template
+
+    def format(self, node: Einsum):
+        params = {}
+        params['config'] = f'config{node.index}'
+        params['input0_t'] = node.get_input_variable(node.inputs[0]).type.name
+        params['input1_t'] = node.get_input_variable(node.inputs[1]).type.name
+        params['output_t'] = node.get_output_variable().type.name
+        params['input0'] = node.get_input_variable(node.inputs[0]).name
+        params['input1'] = node.get_input_variable(node.inputs[1]).name
+        params['output'] = node.get_output_variable().name
+        return self.template.format(**params)