From 5ed7dd28327aef0888434ae83cfbf57de85b7254 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Thu, 11 Jul 2024 17:11:38 +0200
Subject: [PATCH 01/47] Init depthwise resource implementation for streaming
 interface

---
 .../vivado/nnet_utils/nnet_sepconv_stream.h   | 246 +++++++++++++++++-
 1 file changed, 238 insertions(+), 8 deletions(-)
diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
index 9c16de1908..77a95ebb67 100644
--- a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
+++ b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
@@ -4,11 +4,223 @@
 #include "hls_stream.h"
 #include "nnet_common.h"
 #include "nnet_conv_stream.h"
+#include <iostream>
 
 namespace nnet {
 
 template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+void depthwise_product_resource_rf_lt_nchan(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
+                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
+                    
+    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
+    const int nout = CONFIG_T::n_chan;
+
+    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
+    const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
+    const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
+    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
+    // const int multscale = multiplier_limit;
+
+    assert((multiplier_limit % nout == 0 || rufactor > CONFIG_T::n_chan) && "The current Reuse Factor is not allowed");
+    assert((multiplier_limit == block_factor) && "This function is correct only for RF <= N_IN");
+
+    #pragma HLS function_instantiate variable=weights,biases
+    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
+    #pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
+    #pragma HLS ARRAY_RESHAPE   variable=data block factor=block_factor
+
+    #pragma HLS ARRAY_PARTITION variable=biases complete
+
+    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
+    #pragma HLS ARRAY_PARTITION variable=acc type=complete
+    
+InitAccum:  
+    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
+        #pragma HLS UNROLL
+        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
+    }
+
+ReuseLoop:
+    for (int ir = 0; ir < rufactor; ir++) {
+        #pragma HLS PIPELINE II=1 rewind
+
+        int in_index = ir;
+        int out_index = ir;
+
+    MultLoop:
+        for (int im = 0; im < block_factor; im++) {
+            #pragma HLS UNROLL
+            
+            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
+
+            in_index+=rufactor;
+            out_index+=rufactor;
+
+            if (out_index >= CONFIG_T::n_chan) {
+                out_index -= CONFIG_T::n_chan;
+            }
+            
+        }
+    }
+
+// Cast to "res_t" type
+Result:
+    for (int ires = 0; ires < nout; ires++) {
+        #pragma HLS UNROLL
+        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
+    }
+}
+
+template <class data_T, class res_T, typename CONFIG_T>
+void depthwise_product_resource_rf_geq_nchan_rem0(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
+                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
+
+    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
+    const int nout = CONFIG_T::n_chan;
+
+    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
+    const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
+    const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
+    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
+    // const int multscale = multiplier_limit;
+
+    assert((multiplier_limit % nout == 0 || rufactor >= CONFIG_T::n_chan) && "The current Reuse Factor is not allowed");
+    assert((rufactor >= CONFIG_T::n_chan && rufactor % CONFIG_T::n_chan == 0) && "This function is correct only for RF >= N_IN && RF % N_IN == 0");
+
+    #pragma HLS function_instantiate variable=weights,biases
+    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
+    #pragma HLS ARRAY_RESHAPE   variable=weights type=block factor=block_factor
+    #pragma HLS ARRAY_RESHAPE   variable=data type=block factor=block_factor
+
+    #pragma HLS ARRAY_PARTITION variable=biases complete
+
+    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
+    #pragma HLS ARRAY_PARTITION variable=acc type=complete
+
+InitAccum:  
+    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
+        #pragma HLS UNROLL
+        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
+    }
+
+int outidx[rufactor];
+int outstep = 0;
+IndexLoop:
+    for (int ir = 0; ir < rufactor; ir++) {
+        outidx[ir] = outstep;
+        outstep++;
+        if (outstep == CONFIG_T::n_chan) {
+            outstep = 0;
+        }
+    }
+int out_index = -1;
+
+ReuseLoop:
+    for (int ir = 0; ir < rufactor; ir++) {
+        #pragma HLS PIPELINE II=1 rewind
+
+        int in_index = ir;
+
+        out_index = outidx[ir];
+
+    MultLoop:
+        for (int im = 0; im < block_factor; im++) {
+            #pragma HLS UNROLL
+            
+            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
+
+            in_index+=rufactor;         
+        }
+    }
+
+// Cast to "res_t" type
+Result:
+    for (int ires = 0; ires < nout; ires++) {
+        #pragma HLS UNROLL
+        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
+    }
+}
+
+template <class data_T, class res_T, typename CONFIG_T>
+void depthwise_product_resource_rf_gt_nchan(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
+                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
+
+    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
+    const int nout = CONFIG_T::n_chan;
+
+    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
+    // const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
+    // const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
+    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
+    // const int multscale = multiplier_limit;
+
+    // assert((multiplier_limit % nout == 0 || rufactor >= nin) && "The current Reuse Factor is not allowed");
+    assert((rufactor > CONFIG_T::n_chan) && "This function is correct only for RF > N_IN");
+
+    #pragma HLS function_instantiate variable=weights,biases
+    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
+    #pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
+    #pragma HLS ARRAY_RESHAPE   variable=data block factor=block_factor
+
+    #pragma HLS ARRAY_PARTITION variable=biases complete
+
+    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
+    #pragma HLS ARRAY_PARTITION variable=acc type=complete
+    
+InitAccum:  
+    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
+        #pragma HLS UNROLL
+        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
+    }
+
+const int remainder = CONFIG_T::reuse_factor % CONFIG_T::n_chan;
+
+int outidx[rufactor];
+int outstep = 0;
+IndexLoop:
+    for (int ir = 0; ir < rufactor; ir++) {
+        outidx[ir] = outstep;
+        outstep++;
+        if (outstep == CONFIG_T::n_chan) {
+            outstep = 0;
+        }
+    }
+
+ReuseLoop:
+    for (int ir = 0; ir < rufactor; ir++) {
+        #pragma HLS PIPELINE II=1 rewind
+
+        int in_index = ir;
+        int out_index = outidx[ir];
+        
+    MultLoop:
+        for (int im = 0; im < block_factor; im++) {
+            #pragma HLS UNROLL
+
+            // out_index = in_index % CONFIG_T::n_chan;
+            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
+
+            in_index += rufactor;
+            out_index += remainder;
+            if (out_index >= CONFIG_T::n_chan) {
+                out_index -= CONFIG_T::n_chan;
+            }
+        }
+    }
+
+// Cast to "res_t" type
+Result:
+    for (int ires = 0; ires < nout; ires++) {
+        #pragma HLS UNROLL
+        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
+    }
+}
+
+template <class data_T, class res_T, typename CONFIG_T>
+void depthwise_product_latency(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
                        typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
                        typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
     #pragma HLS INLINE
@@ -58,6 +270,22 @@ void depthwise_product(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], re
     }
 }
 
+template <class data_T, class res_T, typename CONFIG_T>
+void depthwise_product_resource(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
+                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
+
+    #pragma HLS INLINE recursive
+
+    if (CONFIG_T::reuse_factor < CONFIG_T::n_chan) {
+        depthwise_product_resource_rf_lt_nchan<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    } else if (CONFIG_T::reuse_factor % CONFIG_T::n_chan == 0) {
+        depthwise_product_resource_rf_geq_nchan_rem0<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    } else {
+        depthwise_product_resource_rf_gt_nchan<data_T, res_T, CONFIG_T>(data, res, weights, biases);
+    }
+}
+
 template <class data_T, class res_T, typename CONFIG_T>
 void depthwise_mult_buffer(hls::stream<typename data_T::value_type> data_window[CONFIG_T::kernel_size * CONFIG_T::n_chan],
                            res_T &res_pack, hls::stream<res_T> &res_stream, unsigned &outputs_ready,
@@ -78,9 +306,9 @@ void depthwise_mult_buffer(hls::stream<typename data_T::value_type> data_window[
 
     #pragma HLS INLINE recursive
     if (CONFIG_T::strategy == nnet::latency) {
-        depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
+        depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
     } else {
-        assert("Resource strategy for DepthwiseConv2D is not supported." && false);
+        depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
     }
 
 CastLoop:
@@ -202,10 +430,11 @@ void compute_depthwise_output_buffer_1d(const data_T &in_elem, hls::stream<res_T
         // Dense multiply
         #pragma HLS INLINE recursive
         if (CONFIG_T::strategy == nnet::latency) {
-            depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+            depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
                                                                                                  weights, biases);
         } else {
-            assert("Resource strategy for DepthwiseConv1D is not supported." && false);
+            depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+                                                                                                 weights, biases);
         }
 
     // Pack output
@@ -267,10 +496,11 @@ void compute_depthwise_output_buffer_2d(const data_T &in_elem,
         // Dense multiply
         #pragma HLS INLINE recursive
         if (CONFIG_T::strategy == nnet::latency) {
-            depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+            depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
                                                                                                  weights, biases);
         } else {
-            assert("Resource strategy for DepthwiseConv2D is not supported." && false);
+            depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+                                                                                                 weights, biases);
         }
 
     // Pack output
@@ -303,4 +533,4 @@ void compute_depthwise_output_buffer_2d(const data_T &in_elem,
 }
 
 } // namespace nnet
-#endif
+#endif
\ No newline at end of file

From 441c1b8621f323757ba2d7d09bdc2a4c3575d0fb Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Mon, 15 Jul 2024 14:13:52 +0200
Subject: [PATCH 02/47] Init fifo optimization file for vitis backend

---
 .../vitis/passes/fifo_depth_optimization.py   | 102 ++++++++++++++++++
 hls4ml/backends/vitis/vitis_backend.py        |   4 +-
 2 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 hls4ml/backends/vitis/passes/fifo_depth_optimization.py

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
new file mode 100644
index 0000000000..8cdad1eedd
--- /dev/null
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -0,0 +1,102 @@
+import json
+
+from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass
+
+
+def populate_values(values, name, data, depth):
+    def get_values(x):
+        return int(x[1][1:], 2)
+
+    values.append({'name': name, 'data': [], 'max': 0, 'depth': 0})
+    values[-1]['data'] = [get_values(x) for x in data]
+    values[-1]['max'] = max(values[-1]['data'])
+    values[-1]['depth'] = int(depth[0][1][1:], 2)
+    return values
+
+
+def set_big_fifos(vars_to_profile, profiling_fifo_depth):
+    for v in vars_to_profile.values():
+        if v.pragma:
+            v.pragma = (v.pragma[0], profiling_fifo_depth)
+
+
+def get_vcd_data(model):
+    model.write()
+    model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False, fifo_opt=True)
+
+    with open(
+        model.config.get_output_dir()
+        + '/'
+        + model.config.get_project_name()
+        + '_prj'
+        + '/solution1/sim/verilog/fifo_opt.vcd'
+    ) as vcd_file:
+        vcd = VcdParser()
+        vcd.parse(vcd_file)
+        data = vcd.scope.toJson()
+    return data
+
+
+def generate_max_depth_file(model, maxs):
+    with open(model.config.get_output_dir() + '/max_depth.json', 'w') as f:
+        json.dump(maxs, f, indent=4)
+
+
+def set_fifo_depth(model, maxs):
+    for v in model.output_vars.values():
+        if v.pragma:
+            filtered_max = [x['max'] for x in maxs if v.name in x['name']]
+            if len(filtered_max) == 0:
+                continue
+            if len(filtered_max) > 1:
+                print('WARNING! Check names of FIFOs')
+            v.pragma = (v.pragma[0], filtered_max[0] + 1)
+
+
+class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
+    def __init__(self):
+        self.values = []
+
+    def transform(self, model):
+        # use `large_fifo_depth = 0` to keep the default fifo depth
+        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000) # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 
+
+        # check axi-stream or io-stream, if not one the 2 exit
+        if not (model.config.get_config_value('IOType') == 'io_stream'):
+            raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config')
+
+        # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs
+        # and so they will be profiled
+        if profiling_fifo_depth:
+            vars_to_profile = {
+                k: v
+                for k, v in model.output_vars.items()
+                if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
+            }
+
+            set_big_fifos(vars_to_profile, profiling_fifo_depth)
+
+        data = get_vcd_data(model)
+
+        if len(data['children']) == 0:
+            print(
+                "FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible."
+            )
+            print("Consider increasing profiling_fifo_depth.")
+            return False
+
+        n_elem = len(data['children'][0]['children'][0]['children'])
+        for i in range(n_elem):
+            name = data['children'][0]['children'][0]['children'][i]['name']
+            data_p = data['children'][0]['children'][0]['children'][i]['children'][0]['data']
+            depth = data['children'][0]['children'][0]['children'][i]['children'][1]['data']
+            populate_values(self.values, name, data_p, depth)
+
+        maxs = [{'name': i['name'], 'max': i['max'], 'depth': i['depth']} for i in self.values]
+
+        generate_max_depth_file(model, maxs)
+
+        set_fifo_depth(model, maxs)
+
+        print('[hls4ml] - FIFO optimization completed')
+        return False
diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py
index 0110f78313..76dfffdb6c 100644
--- a/hls4ml/backends/vitis/vitis_backend.py
+++ b/hls4ml/backends/vitis/vitis_backend.py
@@ -76,7 +76,7 @@ def create_initial_config(
 
         return config
 
-    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False):
+    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, fifo_opt=False,):
         if 'linux' in sys.platform:
             found = os.system('command -v vitis_hls > /dev/null')
             if found != 0:
@@ -87,7 +87,7 @@ def build(self, model, reset=False, csim=True, synth=True, cosim=False, validati
         os.system(
             (
                 'vitis_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} '
-                'validation={validation} export={export} vsynth={vsynth}"'
+                'validation={validation} export={export} vsynth={vsynth} fifo_opt={fifo_opt}"'
             ).format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth)
         )
         os.chdir(curr_dir)

From 8ba0211a3c1e94b76da80d247cd9766a3f11ebf5 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Mon, 15 Jul 2024 15:15:54 +0200
Subject: [PATCH 03/47] Register fifo opt flow in vitis backend

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 4 ++--
 hls4ml/backends/vitis/vitis_backend.py                  | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 8cdad1eedd..81a61f0076 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -59,8 +59,8 @@ def __init__(self):
 
     def transform(self, model):
         # use `large_fifo_depth = 0` to keep the default fifo depth
-        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000) # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 
-
+        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000) # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation 
+        return
         # check axi-stream or io-stream, if not one the 2 exit
         if not (model.config.get_config_value('IOType') == 'io_stream'):
             raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config')
diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py
index 76dfffdb6c..f48f2408b5 100644
--- a/hls4ml/backends/vitis/vitis_backend.py
+++ b/hls4ml/backends/vitis/vitis_backend.py
@@ -33,6 +33,11 @@ def _register_flows(self):
         ip_flow_requirements.insert(ip_flow_requirements.index('vivado:apply_templates'), template_flow)
 
         self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
+        
+        # Register the fifo depth optimization flow which is different from the one for vivado
+        fifo_depth_opt_passes = ['vitis:fifo_depth_optimization'] + writer_passes # After optimization, a new project will be written
+
+        register_flow('fifo_depth_optimization', fifo_depth_opt_passes, requires=['vitis:ip'], backend=self.name)
 
     def create_initial_config(
         self,
@@ -76,7 +81,7 @@ def create_initial_config(
 
         return config
 
-    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, fifo_opt=False,):
+    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, fifo_opt=False):
         if 'linux' in sys.platform:
             found = os.system('command -v vitis_hls > /dev/null')
             if found != 0:
@@ -88,7 +93,7 @@ def build(self, model, reset=False, csim=True, synth=True, cosim=False, validati
             (
                 'vitis_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} '
                 'validation={validation} export={export} vsynth={vsynth} fifo_opt={fifo_opt}"'
-            ).format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth)
+            ).format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth, fifo_opt=fifo_opt)
         )
         os.chdir(curr_dir)
 

From bd59846bb642633e6f5a012cbd4097b35b13fe9f Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Mon, 15 Jul 2024 16:35:13 +0200
Subject: [PATCH 04/47] Init changes in build_prj.tcl and modification files in
 vitis writer

---
 .../vitis/passes/fifo_depth_optimization.py   | 44 +++++++++----------
 hls4ml/templates/vivado/build_prj.tcl         | 14 +++++-
 hls4ml/writer/vitis_writer.py                 | 10 +++++
 3 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 81a61f0076..cb955d4f83 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -20,21 +20,21 @@ def set_big_fifos(vars_to_profile, profiling_fifo_depth):
             v.pragma = (v.pragma[0], profiling_fifo_depth)
 
 
-def get_vcd_data(model):
+def get_fifo_data(model):
     model.write()
     model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False, fifo_opt=True)
 
-    with open(
-        model.config.get_output_dir()
-        + '/'
-        + model.config.get_project_name()
-        + '_prj'
-        + '/solution1/sim/verilog/fifo_opt.vcd'
-    ) as vcd_file:
-        vcd = VcdParser()
-        vcd.parse(vcd_file)
-        data = vcd.scope.toJson()
-    return data
+    # with open(
+    #     model.config.get_output_dir()
+    #     + '/'
+    #     + model.config.get_project_name()
+    #     + '_prj'
+    #     + '/solution1/sim/verilog/fifo_opt.vcd'
+    # ) as vcd_file:
+    #     vcd = VcdParser()
+    #     vcd.parse(vcd_file)
+    #     data = vcd.scope.toJson()
+    # return data
 
 
 def generate_max_depth_file(model, maxs):
@@ -60,23 +60,23 @@ def __init__(self):
     def transform(self, model):
         # use `large_fifo_depth = 0` to keep the default fifo depth
         profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000) # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation 
-        return
-        # check axi-stream or io-stream, if not one the 2 exit
+
+        # check axi-stream or io-stream, if not one the 2nd, exit
         if not (model.config.get_config_value('IOType') == 'io_stream'):
             raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config')
 
         # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs
         # and so they will be profiled
-        if profiling_fifo_depth:
-            vars_to_profile = {
-                k: v
-                for k, v in model.output_vars.items()
-                if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-            }
+        # if profiling_fifo_depth:
+        #     vars_to_profile = {
+        #         k: v
+        #         for k, v in model.output_vars.items()
+        #         if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
+        #     }
 
-            set_big_fifos(vars_to_profile, profiling_fifo_depth)
+        #     set_big_fifos(vars_to_profile, profiling_fifo_depth)
 
-        data = get_vcd_data(model)
+        data = get_fifo_data(model)
 
         if len(data['children']) == 0:
             print(
diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index 7d0420611a..1b14cec31d 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -43,6 +43,10 @@ proc remove_recursive_log_wave {} {
     file rename -force $temp $filename
 }
 
+proc add_vitis_profiling_instructions_tcl {} {
+    config_dataflow -override_user_fifo_depth 100000
+}
+
 proc add_vcd_instructions_tcl {} {
     set tcldir [file dirname [info script]]
     source [file join $tcldir project.tcl]
@@ -52,7 +56,7 @@ proc add_vcd_instructions_tcl {} {
     set temp     $filename.new.$timestamp
     # set backup   $filename.bak.$timestamp
 
-    set in  [open $filename r]
+    set in  [open $filename r]  
     set out [open $temp     w]
 
     # line-by-line, read the original file
@@ -195,7 +199,13 @@ if {$opt(cosim)} {
 
     if {$opt(fifo_opt)} {
         puts "\[hls4ml\] - FIFO optimization started"
-        add_vcd_instructions_tcl
+
+        if {[string equal "$backend" "vitis"]} {
+             puts "***** AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA *****"
+            add_vitis_profiling_instructions_tcl
+        } else {
+            add_vcd_instructions_tcl
+        }
     }
 
     remove_recursive_log_wave
diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index a2cca7f414..a6717edad3 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -23,6 +23,12 @@ def write_nnet_utils_overrides(self, model):
 
         for h in headers:
             copy(srcpath + h, dstpath + h)
+            
+    def write_board_script(model):
+        return
+
+    def modify_build_script(model):
+        return        
 
     def write_hls(self, model):
         """
@@ -30,4 +36,8 @@ def write_hls(self, model):
         """
         super().write_hls(model)
         self.write_nnet_utils_overrides(model)
+        self.write_board_script(model)
+        self.modify_build_script(model)
+        os.remove(model.config.get_output_dir() + '.tar.gz')
         self.write_tar(model)
+        

From ab4c232c2ecf0147f704364f3d4d1fe016638683 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Mon, 15 Jul 2024 17:11:11 +0200
Subject: [PATCH 05/47] Fix vitis writer by adding project.tcl modifer

---
 hls4ml/writer/vitis_writer.py | 39 ++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index a6717edad3..7eef002b15 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -1,10 +1,11 @@
 import glob
 import os
 from shutil import copy
+from distutils.dir_util import copy_tree
+from shutil import copyfile
 
 from hls4ml.writer.vivado_writer import VivadoWriter
 
-
 class VitisWriter(VivadoWriter):
     def __init__(self):
         super().__init__()
@@ -24,10 +25,42 @@ def write_nnet_utils_overrides(self, model):
         for h in headers:
             copy(srcpath + h, dstpath + h)
             
-    def write_board_script(model):
+    def write_board_script(self, model):
+        '''
+        Write the tcl scripts and kernel sources to create a Vivado IPI project for the VivadoAccelerator
+        '''
+        filedir = os.path.dirname(os.path.abspath(__file__))
+        # copyfile(
+        #     os.path.join(filedir, self.config.get_tcl_file_path()),
+        #     f'{model.config.get_output_dir()}/design.tcl',
+        # )
+        # # Generic alveo board
+        # if self.vivado_accelerator_config.get_board().startswith('alveo'):
+        #     src_dir = os.path.join(filedir, self.vivado_accelerator_config.get_krnl_rtl_src_dir())
+        #     dst_dir = os.path.abspath(model.config.get_output_dir()) + '/src'
+        #     copy_tree(src_dir, dst_dir)
+
+        ###################
+        # project.tcl
+        ###################
+        # project.tcl
+        f = open(f'{model.config.get_output_dir()}/project.tcl', 'w')
+        f.write('variable project_name\n')
+        f.write(f'set project_name "{model.config.get_project_name()}"\n')
+        f.write('variable backend\n')
+        f.write('set backend "vitis"\n')
+        f.write('variable part\n')
+        f.write('set part "{}"\n'.format(model.config.get_config_value('Part')))
+        f.write('variable clock_period\n')
+        f.write('set clock_period {}\n'.format(model.config.get_config_value('ClockPeriod')))
+        f.write('variable clock_uncertainty\n')
+        f.write('set clock_uncertainty {}\n'.format(model.config.get_config_value('ClockUncertainty', '12.5%')))
+        f.write('variable version\n')
+        f.write('set version "{}"\n'.format(model.config.get_config_value('Version', '1.0.0')))
+        f.close()
         return
 
-    def modify_build_script(model):
+    def modify_build_script(self, model):
         return        
 
     def write_hls(self, model):

From 1e583c5152eb816e43c7bf6f3b0cad3afda74cda Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Mon, 15 Jul 2024 17:56:27 +0200
Subject: [PATCH 06/47] Fix build_prj.tcl to synthesize with the large FIFOs

---
 hls4ml/templates/vivado/build_prj.tcl | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index 1b14cec31d..d844290bd9 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -183,6 +183,12 @@ if {$opt(csim)} {
 
 if {$opt(synth)} {
     puts "***** C/RTL SYNTHESIS *****"
+
+    if {$opt(fifo_opt) && [string equal $backend "vitis"]} {
+        puts "Synthesize with large FIFOs"
+        add_vitis_profiling_instructions_tcl
+    }
+
     set time_start [clock clicks -milliseconds]
     csynth_design
     set time_end [clock clicks -milliseconds]
@@ -200,10 +206,7 @@ if {$opt(cosim)} {
     if {$opt(fifo_opt)} {
         puts "\[hls4ml\] - FIFO optimization started"
 
-        if {[string equal "$backend" "vitis"]} {
-             puts "***** AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA *****"
-            add_vitis_profiling_instructions_tcl
-        } else {
+        if {[string equal "$backend" "vivado"] && [string equal $backend "vivadoaccelerator"]} {
             add_vcd_instructions_tcl
         }
     }

From 127da7c4fa228d52a04b5c1bc0f60ea466cbdfed Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 14:07:02 +0200
Subject: [PATCH 07/47] Fix if statement in cosim tcl script

---
 .../vitis/passes/fifo_depth_optimization.py   | 20 +++++++++----------
 hls4ml/templates/vivado/build_prj.tcl         |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index cb955d4f83..2a90208b30 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -14,10 +14,10 @@ def get_values(x):
     return values
 
 
-def set_big_fifos(vars_to_profile, profiling_fifo_depth):
-    for v in vars_to_profile.values():
-        if v.pragma:
-            v.pragma = (v.pragma[0], profiling_fifo_depth)
+# def set_big_fifos(vars_to_profile, profiling_fifo_depth):
+#     for v in vars_to_profile.values():
+#         if v.pragma:
+#             v.pragma = (v.pragma[0], profiling_fifo_depth)
 
 
 def get_fifo_data(model):
@@ -78,12 +78,12 @@ def transform(self, model):
 
         data = get_fifo_data(model)
 
-        if len(data['children']) == 0:
-            print(
-                "FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible."
-            )
-            print("Consider increasing profiling_fifo_depth.")
-            return False
+        # if len(data['children']) == 0:
+        #     print(
+        #         "FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible."
+        #     )
+        #     print("Consider increasing profiling_fifo_depth.")
+        #     return False
 
         n_elem = len(data['children'][0]['children'][0]['children'])
         for i in range(n_elem):
diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index d844290bd9..8659030bfd 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -206,7 +206,7 @@ if {$opt(cosim)} {
     if {$opt(fifo_opt)} {
         puts "\[hls4ml\] - FIFO optimization started"
 
-        if {[string equal "$backend" "vivado"] && [string equal $backend "vivadoaccelerator"]} {
+        if {[string equal "$backend" "vivado"] || [string equal $backend "vivadoaccelerator"]} {
             add_vcd_instructions_tcl
         }
     }

From a88fb0d9af97b1902585ac3be201568c1a2e2668 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 17:10:38 +0200
Subject: [PATCH 08/47] Clean the optimizer file

---
 .../vitis/passes/fifo_depth_optimization.py   | 152 +++++++++---------
 1 file changed, 80 insertions(+), 72 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 2a90208b30..26c8ae0991 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -1,102 +1,110 @@
 import json
+import os
+from hls4ml.model.optimizer.optimizer import (
+    ConfigurableOptimizerPass,
+    ModelOptimizerPass,
+)
+
+def set_big_fifos(vars_to_profile, profiling_fifo_depth):
+    for v in vars_to_profile.values():
+        if v.pragma:
+            v.pragma = (v.pragma[0], profiling_fifo_depth)
 
-from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass
-
-
-def populate_values(values, name, data, depth):
-    def get_values(x):
-        return int(x[1][1:], 2)
-
-    values.append({'name': name, 'data': [], 'max': 0, 'depth': 0})
-    values[-1]['data'] = [get_values(x) for x in data]
-    values[-1]['max'] = max(values[-1]['data'])
-    values[-1]['depth'] = int(depth[0][1][1:], 2)
-    return values
-
-
-# def set_big_fifos(vars_to_profile, profiling_fifo_depth):
-#     for v in vars_to_profile.values():
-#         if v.pragma:
-#             v.pragma = (v.pragma[0], profiling_fifo_depth)
-
-
-def get_fifo_data(model):
+def execute_cosim_to_profile_fifos(model):
     model.write()
-    model.build(reset=False, csim=True, synth=True, cosim=True, validation=False, export=False, vsynth=False, fifo_opt=True)
-
-    # with open(
-    #     model.config.get_output_dir()
-    #     + '/'
-    #     + model.config.get_project_name()
-    #     + '_prj'
-    #     + '/solution1/sim/verilog/fifo_opt.vcd'
-    # ) as vcd_file:
-    #     vcd = VcdParser()
-    #     vcd.parse(vcd_file)
-    #     data = vcd.scope.toJson()
-    # return data
-
+    model.build(
+        reset=False,
+        csim=True,
+        synth=True,
+        cosim=True,
+        validation=False,
+        export=False,
+        vsynth=False,
+        fifo_opt=True,
+    )
+
+def get_vitis_optimized_fifo_depths(model):
+    
+    # channel.zip is generated after the cosimulation and contains the chan_status*.csv files
+    # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last line
+    path_to_zip_file = (
+        model.config.get_output_dir()
+        + "/"
+        + model.config.get_project_name()
+        + "_prj"
+        + "/solution1/.autopilot/db/channel_depth_info/"
+    )
+    os.system(f"unzip -q {path_to_zip_file}channel.zip -d {path_to_zip_file}")
+    
+    names_file_path = (
+        model.config.get_output_dir()
+        + "/"
+        + model.config.get_project_name()
+        + "_prj"
+        + "/solution1/.autopilot/db/channel_info.csv"
+    )
+
+    # the channel_info.csv file contains the mapping of the fifo names with the respective chan_status*.csv file
+    csv_fifo_depth_files = {}
+    with open(names_file_path) as names_file:
+        for line in names_file:
+            # if "layer" in line:
+            layer_name = line.split(",")[1]
+            csv_file_name = line.split(",")[3]
+            csv_fifo_depth_files[layer_name] = csv_file_name
 
 def generate_max_depth_file(model, maxs):
-    with open(model.config.get_output_dir() + '/max_depth.json', 'w') as f:
+    with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(maxs, f, indent=4)
 
-
-def set_fifo_depth(model, maxs):
+def set_optimized_fifo_depths(model, maxs):
     for v in model.output_vars.values():
         if v.pragma:
-            filtered_max = [x['max'] for x in maxs if v.name in x['name']]
+            filtered_max = [x["max"] for x in maxs if v.name in x["name"]]
             if len(filtered_max) == 0:
                 continue
             if len(filtered_max) > 1:
-                print('WARNING! Check names of FIFOs')
+                print("WARNING! Check names of FIFOs")
             v.pragma = (v.pragma[0], filtered_max[0] + 1)
 
-
 class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
     def __init__(self):
-        self.values = []
+        self.profiled_fifo_data = []
 
     def transform(self, model):
         # use `large_fifo_depth = 0` to keep the default fifo depth
-        profiling_fifo_depth = getattr(self, 'profiling_fifo_depth', 100_000) # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation 
+        profiling_fifo_depth = getattr(
+            self, "profiling_fifo_depth", 100_000
+        )  # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation
 
-        # check axi-stream or io-stream, if not one the 2nd, exit
-        if not (model.config.get_config_value('IOType') == 'io_stream'):
-            raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config')
+        # check axi-stream or io-stream
+        if not (model.config.get_config_value("IOType") == "io_stream"):
+            raise RuntimeError(
+                "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config"
+            )
 
         # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs
         # and so they will be profiled
-        # if profiling_fifo_depth:
-        #     vars_to_profile = {
-        #         k: v
-        #         for k, v in model.output_vars.items()
-        #         if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-        #     }
-
-        #     set_big_fifos(vars_to_profile, profiling_fifo_depth)
-
-        data = get_fifo_data(model)
+        if profiling_fifo_depth:
+            vars_to_profile = {
+                k: v
+                for k, v in model.output_vars.items()
+                if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
+            }
 
-        # if len(data['children']) == 0:
-        #     print(
-        #         "FIFO depth optimization found no FIFOs implemented using BRAMs in the design, no optimization is possible."
-        #     )
-        #     print("Consider increasing profiling_fifo_depth.")
-        #     return False
+            set_big_fifos(vars_to_profile, profiling_fifo_depth)
 
-        n_elem = len(data['children'][0]['children'][0]['children'])
-        for i in range(n_elem):
-            name = data['children'][0]['children'][0]['children'][i]['name']
-            data_p = data['children'][0]['children'][0]['children'][i]['children'][0]['data']
-            depth = data['children'][0]['children'][0]['children'][i]['children'][1]['data']
-            populate_values(self.values, name, data_p, depth)
+        execute_cosim_to_profile_fifos(model)
+        optmized_fifo_depth_dict = get_vitis_optimized_fifo_depths(model)
 
-        maxs = [{'name': i['name'], 'max': i['max'], 'depth': i['depth']} for i in self.values]
+        # maxs = [
+        #     {"name": i["name"], "max": i["max"], "depth": i["depth"]}
+        #     for i in self.values
+        # ]
 
-        generate_max_depth_file(model, maxs)
+        generate_max_depth_file(model, optmized_fifo_depth_dict)
 
-        set_fifo_depth(model, maxs)
+        set_optimized_fifo_depths(model, optmized_fifo_depth_dict)
 
-        print('[hls4ml] - FIFO optimization completed')
+        print("[hls4ml] - FIFO optimization completed")
         return False

From 47281c4cbb9aaf7676a10f94eed15ae5d5a02dd5 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 17:33:18 +0200
Subject: [PATCH 09/47] Implement the optmized depths parsing

---
 .../vitis/passes/fifo_depth_optimization.py   | 41 +++++++++++--------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 26c8ae0991..4441790e5d 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -5,10 +5,18 @@
     ModelOptimizerPass,
 )
 
-def set_big_fifos(vars_to_profile, profiling_fifo_depth):
-    for v in vars_to_profile.values():
-        if v.pragma:
-            v.pragma = (v.pragma[0], profiling_fifo_depth)
+def set_big_fifos(model, profiling_fifo_depth):
+    # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
+    # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value 
+    if profiling_fifo_depth:
+        vars_to_profile = {
+            k: v
+            for k, v in model.output_vars.items()
+            if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
+        }
+        for v in vars_to_profile.values():
+            if v.pragma:
+                v.pragma = (v.pragma[0], profiling_fifo_depth)
 
 def execute_cosim_to_profile_fifos(model):
     model.write()
@@ -26,7 +34,7 @@ def execute_cosim_to_profile_fifos(model):
 def get_vitis_optimized_fifo_depths(model):
     
     # channel.zip is generated after the cosimulation and contains the chan_status*.csv files
-    # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last line
+    # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last (4th) line
     path_to_zip_file = (
         model.config.get_output_dir()
         + "/"
@@ -52,7 +60,15 @@ def get_vitis_optimized_fifo_depths(model):
             layer_name = line.split(",")[1]
             csv_file_name = line.split(",")[3]
             csv_fifo_depth_files[layer_name] = csv_file_name
-
+    
+    optmized_fifo_depths = {}
+    for layer_name, file_name in csv_fifo_depth_files.items():
+        with open(path_to_zip_file+file_name) as chan_status_file:
+            lines = chan_status_file.readlines()
+            optmized_fifo_depths[layer_name] = int(lines[-1])
+            
+    return optmized_fifo_depths
+        
 def generate_max_depth_file(model, maxs):
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(maxs, f, indent=4)
@@ -83,19 +99,10 @@ def transform(self, model):
                 "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config"
             )
 
-        # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs
-        # and so they will be profiled
-        if profiling_fifo_depth:
-            vars_to_profile = {
-                k: v
-                for k, v in model.output_vars.items()
-                if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-            }
-
-            set_big_fifos(vars_to_profile, profiling_fifo_depth)
+        set_big_fifos(model, profiling_fifo_depth)
 
         execute_cosim_to_profile_fifos(model)
-        optmized_fifo_depth_dict = get_vitis_optimized_fifo_depths(model)
+        optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
 
         # maxs = [
         #     {"name": i["name"], "max": i["max"], "depth": i["depth"]}

From 57e8ffe5641685074edd711a55a394bb61233f1d Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 17:46:05 +0200
Subject: [PATCH 10/47] Implement setter for new depths

---
 .../vitis/passes/fifo_depth_optimization.py   | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 4441790e5d..5628b5d5a1 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -73,15 +73,18 @@ def generate_max_depth_file(model, maxs):
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(maxs, f, indent=4)
 
-def set_optimized_fifo_depths(model, maxs):
+def set_optimized_fifo_depths(model, optmized_fifo_depths):
     for v in model.output_vars.values():
         if v.pragma:
-            filtered_max = [x["max"] for x in maxs if v.name in x["name"]]
-            if len(filtered_max) == 0:
+            if v.name in optmized_fifo_depths.keys():
+                filtered_depth = optmized_fifo_depths[v.name]
+            else:
                 continue
-            if len(filtered_max) > 1:
-                print("WARNING! Check names of FIFOs")
-            v.pragma = (v.pragma[0], filtered_max[0] + 1)
+            # if len(filtered_max) == 0:
+            #     continue
+            # if len(filtered_max) > 1:
+            #     print("WARNING! Check names of FIFOs")
+            v.pragma = (v.pragma[0], filtered_depth)
 
 class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
     def __init__(self):
@@ -109,9 +112,9 @@ def transform(self, model):
         #     for i in self.values
         # ]
 
-        generate_max_depth_file(model, optmized_fifo_depth_dict)
+        # generate_max_depth_file(model, optmized_fifo_depths)
 
-        set_optimized_fifo_depths(model, optmized_fifo_depth_dict)
+        set_optimized_fifo_depths(model, optmized_fifo_depths)
 
         print("[hls4ml] - FIFO optimization completed")
         return False

From 26f54c8048736065afabc9845d586f604af058c1 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 18:38:40 +0200
Subject: [PATCH 11/47] Fix csv file name parsing

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 2 +-
 hls4ml/templates/vivado/build_prj.tcl                   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 5628b5d5a1..eff97ef245 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -58,7 +58,7 @@ def get_vitis_optimized_fifo_depths(model):
         for line in names_file:
             # if "layer" in line:
             layer_name = line.split(",")[1]
-            csv_file_name = line.split(",")[3]
+            csv_file_name = line.split(",")[3][:-1]
             csv_fifo_depth_files[layer_name] = csv_file_name
     
     optmized_fifo_depths = {}
diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index 8659030bfd..623772b08c 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -186,7 +186,7 @@ if {$opt(synth)} {
 
     if {$opt(fifo_opt) && [string equal $backend "vitis"]} {
         puts "Synthesize with large FIFOs"
-        add_vitis_profiling_instructions_tcl
+        # add_vitis_profiling_instructions_tcl
     }
 
     set time_start [clock clicks -milliseconds]

From 78a8933b4218bbd197f237509d57fad87ce23a64 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Tue, 16 Jul 2024 19:44:59 +0200
Subject: [PATCH 12/47] Fix name parsing, deeply hardcoded for now

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index eff97ef245..2640ebd82e 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -42,7 +42,7 @@ def get_vitis_optimized_fifo_depths(model):
         + "_prj"
         + "/solution1/.autopilot/db/channel_depth_info/"
     )
-    os.system(f"unzip -q {path_to_zip_file}channel.zip -d {path_to_zip_file}")
+    os.system(f"unzip -q -o {path_to_zip_file}channel.zip -d {path_to_zip_file}")
     
     names_file_path = (
         model.config.get_output_dir()
@@ -65,7 +65,7 @@ def get_vitis_optimized_fifo_depths(model):
     for layer_name, file_name in csv_fifo_depth_files.items():
         with open(path_to_zip_file+file_name) as chan_status_file:
             lines = chan_status_file.readlines()
-            optmized_fifo_depths[layer_name] = int(lines[-1])
+            optmized_fifo_depths[layer_name[:-2]] = int(lines[-1])
             
     return optmized_fifo_depths
         

From 0b7d4b3d1634f1747481792020012228ecfb5cc8 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 11:19:05 +0200
Subject: [PATCH 13/47] Clean documentation and files

---
 hls4ml/templates/vivado/build_prj.tcl |  9 ---------
 hls4ml/writer/vitis_writer.py         | 12 +-----------
 2 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index 623772b08c..582cc810d2 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -43,10 +43,6 @@ proc remove_recursive_log_wave {} {
     file rename -force $temp $filename
 }
 
-proc add_vitis_profiling_instructions_tcl {} {
-    config_dataflow -override_user_fifo_depth 100000
-}
-
 proc add_vcd_instructions_tcl {} {
     set tcldir [file dirname [info script]]
     source [file join $tcldir project.tcl]
@@ -184,11 +180,6 @@ if {$opt(csim)} {
 if {$opt(synth)} {
     puts "***** C/RTL SYNTHESIS *****"
 
-    if {$opt(fifo_opt) && [string equal $backend "vitis"]} {
-        puts "Synthesize with large FIFOs"
-        # add_vitis_profiling_instructions_tcl
-    }
-
     set time_start [clock clicks -milliseconds]
     csynth_design
     set time_end [clock clicks -milliseconds]
diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index 7eef002b15..91abd513af 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -27,18 +27,8 @@ def write_nnet_utils_overrides(self, model):
             
     def write_board_script(self, model):
         '''
-        Write the tcl scripts and kernel sources to create a Vivado IPI project for the VivadoAccelerator
+        Write the tcl scripts and kernel sources to create a Vitis IPI
         '''
-        filedir = os.path.dirname(os.path.abspath(__file__))
-        # copyfile(
-        #     os.path.join(filedir, self.config.get_tcl_file_path()),
-        #     f'{model.config.get_output_dir()}/design.tcl',
-        # )
-        # # Generic alveo board
-        # if self.vivado_accelerator_config.get_board().startswith('alveo'):
-        #     src_dir = os.path.join(filedir, self.vivado_accelerator_config.get_krnl_rtl_src_dir())
-        #     dst_dir = os.path.abspath(model.config.get_output_dir()) + '/src'
-        #     copy_tree(src_dir, dst_dir)
 
         ###################
         # project.tcl

From 6bbd2a2bf552c6370581167ba1c73a90ea389366 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 11:21:57 +0200
Subject: [PATCH 14/47] Remove unused function

---
 hls4ml/writer/vitis_writer.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index 91abd513af..5bb0a478fa 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -48,10 +48,7 @@ def write_board_script(self, model):
         f.write('variable version\n')
         f.write('set version "{}"\n'.format(model.config.get_config_value('Version', '1.0.0')))
         f.close()
-        return
-
-    def modify_build_script(self, model):
-        return        
+        return     
 
     def write_hls(self, model):
         """
@@ -60,7 +57,6 @@ def write_hls(self, model):
         super().write_hls(model)
         self.write_nnet_utils_overrides(model)
         self.write_board_script(model)
-        self.modify_build_script(model)
         os.remove(model.config.get_output_dir() + '.tar.gz')
         self.write_tar(model)
         

From 67a00bf1e627637da6e2e337255e3b95bd8e4bc6 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 12:00:54 +0200
Subject: [PATCH 15/47] Add documentation and runtime checks

---
 .../vitis/passes/fifo_depth_optimization.py   | 104 ++++++++++++------
 1 file changed, 72 insertions(+), 32 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 2640ebd82e..975b456fdc 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -5,20 +5,34 @@
     ModelOptimizerPass,
 )
 
+
 def set_big_fifos(model, profiling_fifo_depth):
+    """_summary_
+
+    Args:
+       model (ModelGraph): _description_
+        profiling_fifo_depth (int): _description_
+    """
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
-    # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value 
-    if profiling_fifo_depth:
-        vars_to_profile = {
-            k: v
-            for k, v in model.output_vars.items()
-            if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-        }
-        for v in vars_to_profile.values():
-            if v.pragma:
-                v.pragma = (v.pragma[0], profiling_fifo_depth)
+    # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value
+    vars_to_profile = {
+        k: v
+        for k, v in model.output_vars.items()
+        if v != model.get_output_variables()[0]
+        and v != model.get_input_variables()[0]
+    }
+    for v in vars_to_profile.values():
+        if v.pragma:
+            v.pragma = (v.pragma[0], profiling_fifo_depth)
+    return
+
 
 def execute_cosim_to_profile_fifos(model):
+    """_summary_
+
+    Args:
+        model (ModelGraph): _description_
+    """
     model.write()
     model.build(
         reset=False,
@@ -31,8 +45,17 @@ def execute_cosim_to_profile_fifos(model):
         fifo_opt=True,
     )
 
+    return
+
 def get_vitis_optimized_fifo_depths(model):
-    
+    """_summary_
+
+    Args:
+        model (_type_): _description_
+
+    Returns:
+        Dict[str, int]: _description_
+    """
     # channel.zip is generated after the cosimulation and contains the chan_status*.csv files
     # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last (4th) line
     path_to_zip_file = (
@@ -43,7 +66,8 @@ def get_vitis_optimized_fifo_depths(model):
         + "/solution1/.autopilot/db/channel_depth_info/"
     )
     os.system(f"unzip -q -o {path_to_zip_file}channel.zip -d {path_to_zip_file}")
-    
+
+    # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective chan_status*.csv file
     names_file_path = (
         model.config.get_output_dir()
         + "/"
@@ -52,7 +76,6 @@ def get_vitis_optimized_fifo_depths(model):
         + "/solution1/.autopilot/db/channel_info.csv"
     )
 
-    # the channel_info.csv file contains the mapping of the fifo names with the respective chan_status*.csv file
     csv_fifo_depth_files = {}
     with open(names_file_path) as names_file:
         for line in names_file:
@@ -60,41 +83,65 @@ def get_vitis_optimized_fifo_depths(model):
             layer_name = line.split(",")[1]
             csv_file_name = line.split(",")[3][:-1]
             csv_fifo_depth_files[layer_name] = csv_file_name
-    
+
     optmized_fifo_depths = {}
     for layer_name, file_name in csv_fifo_depth_files.items():
-        with open(path_to_zip_file+file_name) as chan_status_file:
+        with open(path_to_zip_file + file_name) as chan_status_file:
             lines = chan_status_file.readlines()
-            optmized_fifo_depths[layer_name[:-2]] = int(lines[-1])
-            
+            optmized_fifo_depths[layer_name[:-2]] = int(lines[-1])  # remove "_U" from the layer name string and keep the last line of the file that contains the max depth
+
     return optmized_fifo_depths
-        
+
+
 def generate_max_depth_file(model, maxs):
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(maxs, f, indent=4)
 
+
 def set_optimized_fifo_depths(model, optmized_fifo_depths):
+    """_summary_
+
+    Args:
+        model (ModelGraph): _description_
+        optmized_fifo_depths (int): _description_
+    """
+    
+    # iterate through the layer output FIFOs
     for v in model.output_vars.values():
         if v.pragma:
-            if v.name in optmized_fifo_depths.keys():
-                filtered_depth = optmized_fifo_depths[v.name]
-            else:
+            if v.name not in optmized_fifo_depths.keys():
                 continue
-            # if len(filtered_max) == 0:
-            #     continue
-            # if len(filtered_max) > 1:
-            #     print("WARNING! Check names of FIFOs")
+
+            filtered_depth = optmized_fifo_depths[v.name]
             v.pragma = (v.pragma[0], filtered_depth)
+    return
+
 
 class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
     def __init__(self):
         self.profiled_fifo_data = []
 
     def transform(self, model):
+        """_summary_
+
+        Args:
+            model (ModelGraph): The model to apply FIFO depth optimization on
+
+        Raises:
+            ValueError: 
+            RuntimeError: If the IO type is not set to "io_stream"
+
+        Returns:
+            _type_: _description_
+        """        
+
         # use `large_fifo_depth = 0` to keep the default fifo depth
         profiling_fifo_depth = getattr(
             self, "profiling_fifo_depth", 100_000
         )  # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation
+        
+        if not isinstance(profiling_fifo_depth, int) or profiling_fifo_depth < 0:
+            raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer")
 
         # check axi-stream or io-stream
         if not (model.config.get_config_value("IOType") == "io_stream"):
@@ -107,13 +154,6 @@ def transform(self, model):
         execute_cosim_to_profile_fifos(model)
         optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
 
-        # maxs = [
-        #     {"name": i["name"], "max": i["max"], "depth": i["depth"]}
-        #     for i in self.values
-        # ]
-
-        # generate_max_depth_file(model, optmized_fifo_depths)
-
         set_optimized_fifo_depths(model, optmized_fifo_depths)
 
         print("[hls4ml] - FIFO optimization completed")

From b4923088cdfd0a21b91bc16e37915221b3646f14 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 17:00:25 +0200
Subject: [PATCH 16/47] Add documentation

---
 .../vitis/passes/fifo_depth_optimization.py   | 41 ++++++++++---------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 975b456fdc..594a3df15d 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -5,12 +5,11 @@
     ModelOptimizerPass,
 )
 
-
-def set_big_fifos(model, profiling_fifo_depth):
-    """_summary_
+def initialize_large_fifos(model, profiling_fifo_depth):
+    """Setting all FIFOs equal to a large value so that they can be profiled
 
     Args:
-       model (ModelGraph): _description_
+        model (ModelGraph): The model to which FIFO depth optimization is applied
         profiling_fifo_depth (int): _description_
     """
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
@@ -26,12 +25,11 @@ def set_big_fifos(model, profiling_fifo_depth):
             v.pragma = (v.pragma[0], profiling_fifo_depth)
     return
 
-
 def execute_cosim_to_profile_fifos(model):
-    """_summary_
+    """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths
 
     Args:
-        model (ModelGraph): _description_
+        model (ModelGraph): The model to which FIFO depth optimization is applied
     """
     model.write()
     model.build(
@@ -48,13 +46,14 @@ def execute_cosim_to_profile_fifos(model):
     return
 
 def get_vitis_optimized_fifo_depths(model):
-    """_summary_
+    """Parse the files generated by the cosimulation to retrieve the optimized depths for the FIFOs.
+    Attention, only the FIFOs between the layers are profiled!
 
     Args:
-        model (_type_): _description_
+        model (ModelGraph): The model to which FIFO depth optimization is applied
 
     Returns:
-        Dict[str, int]: _description_
+        Dict[str, int]: A dictionary that contains the FIFO names as keys and the optimized depths as values
     """
     # channel.zip is generated after the cosimulation and contains the chan_status*.csv files
     # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last (4th) line
@@ -99,11 +98,11 @@ def generate_max_depth_file(model, maxs):
 
 
 def set_optimized_fifo_depths(model, optmized_fifo_depths):
-    """_summary_
+    """Set the new optimized FIFO depths 
 
     Args:
-        model (ModelGraph): _description_
-        optmized_fifo_depths (int): _description_
+        model (ModelGraph): The model to which FIFO depth optimization is applied
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values
     """
     
     # iterate through the layer output FIFOs
@@ -116,19 +115,20 @@ def set_optimized_fifo_depths(model, optmized_fifo_depths):
             v.pragma = (v.pragma[0], filtered_depth)
     return
 
-
 class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
     def __init__(self):
-        self.profiled_fifo_data = []
-
+        pass
+    
     def transform(self, model):
-        """_summary_
+        """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the initial FIFOs set by hls4ml might be larger than required.
+        At the end of the optimization the FIFOs will have the largest depths achieved during cosimulation without causing any deadlocks between the layers (producer-consumer)
+        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the tool in order to prevent deadlocks. 
 
         Args:
-            model (ModelGraph): The model to apply FIFO depth optimization on
+            model (ModelGraph): The model to which FIFO depth optimization is applied
 
         Raises:
-            ValueError: 
+            ValueError: If the FIFO depth for profiling provided by the user is not a non-negative integer
             RuntimeError: If the IO type is not set to "io_stream"
 
         Returns:
@@ -149,9 +149,10 @@ def transform(self, model):
                 "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config"
             )
 
-        set_big_fifos(model, profiling_fifo_depth)
+        initialize_large_fifos(model, profiling_fifo_depth)
 
         execute_cosim_to_profile_fifos(model)
+        
         optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
 
         set_optimized_fifo_depths(model, optmized_fifo_depths)

From d0918b513262f66452900c35dfae4aa33af0c841 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 17:16:53 +0200
Subject: [PATCH 17/47] Include extracting optimized depths

---
 .../vitis/passes/fifo_depth_optimization.py   | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 594a3df15d..e131b29ecc 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -6,11 +6,11 @@
 )
 
 def initialize_large_fifos(model, profiling_fifo_depth):
-    """Setting all FIFOs equal to a large value so that they can be profiled
+    """Setting all FIFO depths equal to a large value so that they can be profiled
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied
-        profiling_fifo_depth (int): _description_
+        profiling_fifo_depth (int): A large non-negative integer, must be larger than the max expected depth of the FIFOs
     """
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
     # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value
@@ -92,9 +92,16 @@ def get_vitis_optimized_fifo_depths(model):
     return optmized_fifo_depths
 
 
-def generate_max_depth_file(model, maxs):
+def generate_max_depth_file(model, optmized_fifo_depths):
+    """Generate a json file with the names of the FIFOs and their optimized depths for post-processing.
+    The json file is not used by the rest of the pipeline, it is only produced for the user
+
+    Args:
+        model (ModelGraph): The model to which FIFO depth optimization is applied
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values
+    """    
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
-        json.dump(maxs, f, indent=4)
+        json.dump(optmized_fifo_depths, f, indent=4)
 
 
 def set_optimized_fifo_depths(model, optmized_fifo_depths):
@@ -122,7 +129,7 @@ def __init__(self):
     def transform(self, model):
         """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the initial FIFOs set by hls4ml might be larger than required.
         At the end of the optimization the FIFOs will have the largest depths achieved during cosimulation without causing any deadlocks between the layers (producer-consumer)
-        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the tool in order to prevent deadlocks. 
+        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks. 
 
         Args:
             model (ModelGraph): The model to which FIFO depth optimization is applied
@@ -154,6 +161,8 @@ def transform(self, model):
         execute_cosim_to_profile_fifos(model)
         
         optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
+        
+        generate_max_depth_file(model, optmized_fifo_depths)
 
         set_optimized_fifo_depths(model, optmized_fifo_depths)
 

From baa81f54733024efa4688077f616239eb10410e6 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Wed, 17 Jul 2024 17:25:20 +0200
Subject: [PATCH 18/47] Fix documentation

---
 .../vitis/passes/fifo_depth_optimization.py   | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index e131b29ecc..7a2369a829 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -6,11 +6,11 @@
 )
 
 def initialize_large_fifos(model, profiling_fifo_depth):
-    """Setting all FIFO depths equal to a large value so that they can be profiled
+    """Setting all FIFO depths equal to a large value so that they can be profiled.
 
     Args:
-        model (ModelGraph): The model to which FIFO depth optimization is applied
-        profiling_fifo_depth (int): A large non-negative integer, must be larger than the max expected depth of the FIFOs
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
+        profiling_fifo_depth (int): A large non-negative integer, must be larger than the max expected depth of the FIFOs.
     """
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
     # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value
@@ -26,10 +26,10 @@ def initialize_large_fifos(model, profiling_fifo_depth):
     return
 
 def execute_cosim_to_profile_fifos(model):
-    """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths
+    """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths.
 
     Args:
-        model (ModelGraph): The model to which FIFO depth optimization is applied
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
     """
     model.write()
     model.build(
@@ -50,10 +50,10 @@ def get_vitis_optimized_fifo_depths(model):
     Attention, only the FIFOs between the layers are profiled!
 
     Args:
-        model (ModelGraph): The model to which FIFO depth optimization is applied
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
 
     Returns:
-        Dict[str, int]: A dictionary that contains the FIFO names as keys and the optimized depths as values
+        Dict[str, int]: A dictionary that contains the FIFO names as keys and the optimized depths as values.
     """
     # channel.zip is generated after the cosimulation and contains the chan_status*.csv files
     # in the chan_status*.csv files the max depth achieved during cosimulation can be found at the last (4th) line
@@ -94,22 +94,22 @@ def get_vitis_optimized_fifo_depths(model):
 
 def generate_max_depth_file(model, optmized_fifo_depths):
     """Generate a json file with the names of the FIFOs and their optimized depths for post-processing.
-    The json file is not used by the rest of the pipeline, it is only produced for the user
+    The json file is not used by the rest of the pipeline, it is only produced for the user.
 
     Args:
-        model (ModelGraph): The model to which FIFO depth optimization is applied
-        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
     """    
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(optmized_fifo_depths, f, indent=4)
 
 
 def set_optimized_fifo_depths(model, optmized_fifo_depths):
-    """Set the new optimized FIFO depths 
+    """Set the new optimized FIFO depths.
 
     Args:
-        model (ModelGraph): The model to which FIFO depth optimization is applied
-        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
     """
     
     # iterate through the layer output FIFOs
@@ -132,14 +132,14 @@ def transform(self, model):
         , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks. 
 
         Args:
-            model (ModelGraph): The model to which FIFO depth optimization is applied
+            model (ModelGraph): The model to which FIFO depth optimization is applied.
 
         Raises:
-            ValueError: If the FIFO depth for profiling provided by the user is not a non-negative integer
-            RuntimeError: If the IO type is not set to "io_stream"
+            ValueError: If the FIFO depth for profiling provided by the user is not a non-negative integer.
+            RuntimeError: If the IO type is not set to "io_stream".
 
         Returns:
-            _type_: _description_
+            bool: The execution state of the Optimzer Pass
         """        
 
         # use `large_fifo_depth = 0` to keep the default fifo depth

From 62a933fb3cb877c087985e5e617b9f2e40793106 Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Thu, 18 Jul 2024 10:10:03 +0200
Subject: [PATCH 19/47] Add function to override Vivado test bench

---
 .../vitis/passes/fifo_depth_optimization.py   | 43 ++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 7a2369a829..4265eaeb19 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -25,13 +25,54 @@ def initialize_large_fifos(model, profiling_fifo_depth):
             v.pragma = (v.pragma[0], profiling_fifo_depth)
     return
 
+def override_test_bench(model):
+    """In order for the FIFO depth profiling to produce correct results, it is necessary for the cosimulation to call the top function - Vitis IP at **least twice**.
+    The test bench produced by the Vivado Writer is overwritten by adding a for-loop over the top function.
+
+    Args:
+        model (ModelGraph): The model to which FIFO depth optimization is applied.
+    """    
+    indent = '    '
+    path_to_old_test_bench = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp'
+    path_to_new_test_bench = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_new_test.cpp'
+    
+    newline = ""
+    second_part_of_testbench = False
+    with open(path_to_old_test_bench, 'r') as old_test_bench:
+        file_iterator = iter(old_test_bench)
+        for line in file_iterator:
+
+            if '// hls-fpga-machine-learning insert zero' in line:
+                newline += indent + indent + 'const unsigned BATCH_SIZE = 2;\n'
+                newline += indent + indent + 'for(unsigned batch_iteration = 0; batch_iteration < BATCH_SIZE; ++batch_iteration) {\n'
+                newline += line
+                second_part_of_testbench = True
+            elif ('// hls-fpga-machine-learning insert tb-output' in line) and second_part_of_testbench:
+                newline += line
+                newline += next(file_iterator)
+                newline += indent + '}\n'
+            else:
+                newline += line
+                
+    with open(path_to_new_test_bench, 'w+') as new_test_bench:
+        new_test_bench.write(newline)
+    
+    # replace the old test bench with the new test bench that includes a for-loop
+    os.system(f"mv {path_to_new_test_bench} {path_to_old_test_bench}")
+    return
+
 def execute_cosim_to_profile_fifos(model):
     """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths.
-
+    The function will momentarily replace the initial test bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from 
+    the user-written script that utilized hls4ml will reinitilize the original test bench.
+    
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
     """
     model.write()
+    
+    override_test_bench(model)
+    
     model.build(
         reset=False,
         csim=True,

From eb85e41edd02668b0500268a1808d847eee2941e Mon Sep 17 00:00:00 2001
From: stzelepi <stylianos.tzelepis@cern.ch>
Date: Thu, 18 Jul 2024 10:16:47 +0200
Subject: [PATCH 20/47] Fix hls4ml docs

---
 docs/advanced/fifo_depth.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/advanced/fifo_depth.rst b/docs/advanced/fifo_depth.rst
index 44f87b014d..703bff0891 100644
--- a/docs/advanced/fifo_depth.rst
+++ b/docs/advanced/fifo_depth.rst
@@ -21,10 +21,10 @@ First, we can define a simple neural network in Keras
     from tensorflow.keras.models import Sequential
 
     model = Sequential()
-    model.add(Dense(64, input_shape=(16,), name='fc1', activation='relu')
+    model.add(Dense(64, input_shape=(16,), name='fc1', activation='relu'))
     model.add(Dense(32, name='fc2', activation='relu'))
     model.add(Dense(32, name='fc3', activation='relu'))
-    model.add(Dense(5, name='fc3', activation='softmax'))
+    model.add(Dense(5, name='fc4', activation='softmax'))
 
 Then, we can convert the model, including the flow
 

From ba47496fffd43968f50fbd378cb67bb51dd5a234 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 11:40:44 +0200
Subject: [PATCH 21/47] Undo changes in sepconv stream

---
 .../vivado/nnet_utils/nnet_sepconv_stream.h   | 246 +-----------------
 1 file changed, 8 insertions(+), 238 deletions(-)

diff --git a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
index 77a95ebb67..9c16de1908 100644
--- a/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
+++ b/hls4ml/templates/vivado/nnet_utils/nnet_sepconv_stream.h
@@ -4,223 +4,11 @@
 #include "hls_stream.h"
 #include "nnet_common.h"
 #include "nnet_conv_stream.h"
-#include <iostream>
 
 namespace nnet {
 
 template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product_resource_rf_lt_nchan(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
-                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
-                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
-                    
-    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
-    const int nout = CONFIG_T::n_chan;
-
-    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
-    const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
-    const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
-    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
-    // const int multscale = multiplier_limit;
-
-    assert((multiplier_limit % nout == 0 || rufactor > CONFIG_T::n_chan) && "The current Reuse Factor is not allowed");
-    assert((multiplier_limit == block_factor) && "This function is correct only for RF <= N_IN");
-
-    #pragma HLS function_instantiate variable=weights,biases
-    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
-    #pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
-    #pragma HLS ARRAY_RESHAPE   variable=data block factor=block_factor
-
-    #pragma HLS ARRAY_PARTITION variable=biases complete
-
-    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
-    #pragma HLS ARRAY_PARTITION variable=acc type=complete
-    
-InitAccum:  
-    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
-        #pragma HLS UNROLL
-        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
-    }
-
-ReuseLoop:
-    for (int ir = 0; ir < rufactor; ir++) {
-        #pragma HLS PIPELINE II=1 rewind
-
-        int in_index = ir;
-        int out_index = ir;
-
-    MultLoop:
-        for (int im = 0; im < block_factor; im++) {
-            #pragma HLS UNROLL
-            
-            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
-
-            in_index+=rufactor;
-            out_index+=rufactor;
-
-            if (out_index >= CONFIG_T::n_chan) {
-                out_index -= CONFIG_T::n_chan;
-            }
-            
-        }
-    }
-
-// Cast to "res_t" type
-Result:
-    for (int ires = 0; ires < nout; ires++) {
-        #pragma HLS UNROLL
-        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
-    }
-}
-
-template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product_resource_rf_geq_nchan_rem0(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
-                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
-                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
-
-    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
-    const int nout = CONFIG_T::n_chan;
-
-    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
-    const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
-    const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
-    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
-    // const int multscale = multiplier_limit;
-
-    assert((multiplier_limit % nout == 0 || rufactor >= CONFIG_T::n_chan) && "The current Reuse Factor is not allowed");
-    assert((rufactor >= CONFIG_T::n_chan && rufactor % CONFIG_T::n_chan == 0) && "This function is correct only for RF >= N_IN && RF % N_IN == 0");
-
-    #pragma HLS function_instantiate variable=weights,biases
-    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
-    #pragma HLS ARRAY_RESHAPE   variable=weights type=block factor=block_factor
-    #pragma HLS ARRAY_RESHAPE   variable=data type=block factor=block_factor
-
-    #pragma HLS ARRAY_PARTITION variable=biases complete
-
-    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
-    #pragma HLS ARRAY_PARTITION variable=acc type=complete
-
-InitAccum:  
-    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
-        #pragma HLS UNROLL
-        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
-    }
-
-int outidx[rufactor];
-int outstep = 0;
-IndexLoop:
-    for (int ir = 0; ir < rufactor; ir++) {
-        outidx[ir] = outstep;
-        outstep++;
-        if (outstep == CONFIG_T::n_chan) {
-            outstep = 0;
-        }
-    }
-int out_index = -1;
-
-ReuseLoop:
-    for (int ir = 0; ir < rufactor; ir++) {
-        #pragma HLS PIPELINE II=1 rewind
-
-        int in_index = ir;
-
-        out_index = outidx[ir];
-
-    MultLoop:
-        for (int im = 0; im < block_factor; im++) {
-            #pragma HLS UNROLL
-            
-            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
-
-            in_index+=rufactor;         
-        }
-    }
-
-// Cast to "res_t" type
-Result:
-    for (int ires = 0; ires < nout; ires++) {
-        #pragma HLS UNROLL
-        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
-    }
-}
-
-template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product_resource_rf_gt_nchan(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
-                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
-                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
-
-    const int nin = CONFIG_T::kernel_size * CONFIG_T::n_chan;
-    const int nout = CONFIG_T::n_chan;
-
-    const int rufactor = MIN(CONFIG_T::reuse_factor, nin);
-    // const int multfactor = MIN(nin, CONFIG_T::reuse_factor);
-    // const int multiplier_limit = DIV_ROUNDUP(nin, multfactor);
-    const int block_factor = DIV_ROUNDUP(nin, CONFIG_T::reuse_factor);
-    // const int multscale = multiplier_limit;
-
-    // assert((multiplier_limit % nout == 0 || rufactor >= nin) && "The current Reuse Factor is not allowed");
-    assert((rufactor > CONFIG_T::n_chan) && "This function is correct only for RF > N_IN");
-
-    #pragma HLS function_instantiate variable=weights,biases
-    //#pragma HLS RESOURCE variable=weights core=RAM_2P_BRAM Commenting out the deisgnation HLS seems to choose correctly
-    #pragma HLS ARRAY_RESHAPE   variable=weights block factor=block_factor
-    #pragma HLS ARRAY_RESHAPE   variable=data block factor=block_factor
-
-    #pragma HLS ARRAY_PARTITION variable=biases complete
-
-    typename CONFIG_T::accum_t acc[CONFIG_T::n_chan];
-    #pragma HLS ARRAY_PARTITION variable=acc type=complete
-    
-InitAccum:  
-    for (int iacc = 0; iacc < CONFIG_T::n_chan; iacc++) {
-        #pragma HLS UNROLL
-        acc[iacc] = (typename CONFIG_T::accum_t)biases[iacc];
-    }
-
-const int remainder = CONFIG_T::reuse_factor % CONFIG_T::n_chan;
-
-int outidx[rufactor];
-int outstep = 0;
-IndexLoop:
-    for (int ir = 0; ir < rufactor; ir++) {
-        outidx[ir] = outstep;
-        outstep++;
-        if (outstep == CONFIG_T::n_chan) {
-            outstep = 0;
-        }
-    }
-
-ReuseLoop:
-    for (int ir = 0; ir < rufactor; ir++) {
-        #pragma HLS PIPELINE II=1 rewind
-
-        int in_index = ir;
-        int out_index = outidx[ir];
-        
-    MultLoop:
-        for (int im = 0; im < block_factor; im++) {
-            #pragma HLS UNROLL
-
-            // out_index = in_index % CONFIG_T::n_chan;
-            acc[out_index] += static_cast<typename CONFIG_T::accum_t>(CONFIG_T::mult_config::template product<data_T, typename CONFIG_T::mult_config::weight_t>::product(data[in_index], weights[in_index]));
-
-            in_index += rufactor;
-            out_index += remainder;
-            if (out_index >= CONFIG_T::n_chan) {
-                out_index -= CONFIG_T::n_chan;
-            }
-        }
-    }
-
-// Cast to "res_t" type
-Result:
-    for (int ires = 0; ires < nout; ires++) {
-        #pragma HLS UNROLL
-        res[ires] = cast<data_T, res_T, CONFIG_T>(acc[ires]);
-    }
-}
-
-template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product_latency(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
+void depthwise_product(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
                        typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
                        typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
     #pragma HLS INLINE
@@ -270,22 +58,6 @@ void depthwise_product_latency(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_c
     }
 }
 
-template <class data_T, class res_T, typename CONFIG_T>
-void depthwise_product_resource(data_T data[CONFIG_T::kernel_size * CONFIG_T::n_chan], res_T res[CONFIG_T::n_chan],
-                       typename CONFIG_T::weight_t weights[CONFIG_T::kernel_size * CONFIG_T::n_chan],
-                       typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
-
-    #pragma HLS INLINE recursive
-
-    if (CONFIG_T::reuse_factor < CONFIG_T::n_chan) {
-        depthwise_product_resource_rf_lt_nchan<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    } else if (CONFIG_T::reuse_factor % CONFIG_T::n_chan == 0) {
-        depthwise_product_resource_rf_geq_nchan_rem0<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    } else {
-        depthwise_product_resource_rf_gt_nchan<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    }
-}
-
 template <class data_T, class res_T, typename CONFIG_T>
 void depthwise_mult_buffer(hls::stream<typename data_T::value_type> data_window[CONFIG_T::kernel_size * CONFIG_T::n_chan],
                            res_T &res_pack, hls::stream<res_T> &res_stream, unsigned &outputs_ready,
@@ -306,9 +78,9 @@ void depthwise_mult_buffer(hls::stream<typename data_T::value_type> data_window[
 
     #pragma HLS INLINE recursive
     if (CONFIG_T::strategy == nnet::latency) {
-        depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
+        depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
     } else {
-        depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(data, res, weights, biases);
+        assert("Resource strategy for DepthwiseConv2D is not supported." && false);
     }
 
 CastLoop:
@@ -430,11 +202,10 @@ void compute_depthwise_output_buffer_1d(const data_T &in_elem, hls::stream<res_T
         // Dense multiply
         #pragma HLS INLINE recursive
         if (CONFIG_T::strategy == nnet::latency) {
-            depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+            depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
                                                                                                  weights, biases);
         } else {
-            depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
-                                                                                                 weights, biases);
+            assert("Resource strategy for DepthwiseConv1D is not supported." && false);
         }
 
     // Pack output
@@ -496,11 +267,10 @@ void compute_depthwise_output_buffer_2d(const data_T &in_elem,
         // Dense multiply
         #pragma HLS INLINE recursive
         if (CONFIG_T::strategy == nnet::latency) {
-            depthwise_product_latency<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
+            depthwise_product<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
                                                                                                  weights, biases);
         } else {
-            depthwise_product_resource<typename data_T::value_type, typename res_T::value_type, CONFIG_T>(kernel_data, res_out,
-                                                                                                 weights, biases);
+            assert("Resource strategy for DepthwiseConv2D is not supported." && false);
         }
 
     // Pack output
@@ -533,4 +303,4 @@ void compute_depthwise_output_buffer_2d(const data_T &in_elem,
 }
 
 } // namespace nnet
-#endif
\ No newline at end of file
+#endif

From 3ab3b615f45878391d36aab363e87cb406abb3fb Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 11:43:48 +0200
Subject: [PATCH 22/47] Format code

---
 .../vitis/passes/fifo_depth_optimization.py   | 76 +++++++++++--------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 4265eaeb19..366881d263 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -5,6 +5,7 @@
     ModelOptimizerPass,
 )
 
+
 def initialize_large_fifos(model, profiling_fifo_depth):
     """Setting all FIFO depths equal to a large value so that they can be profiled.
 
@@ -17,62 +18,71 @@ def initialize_large_fifos(model, profiling_fifo_depth):
     vars_to_profile = {
         k: v
         for k, v in model.output_vars.items()
-        if v != model.get_output_variables()[0]
-        and v != model.get_input_variables()[0]
+        if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
     }
     for v in vars_to_profile.values():
         if v.pragma:
             v.pragma = (v.pragma[0], profiling_fifo_depth)
     return
 
+
 def override_test_bench(model):
     """In order for the FIFO depth profiling to produce correct results, it is necessary for the cosimulation to call the top function - Vitis IP at **least twice**.
     The test bench produced by the Vivado Writer is overwritten by adding a for-loop over the top function.
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
-    """    
-    indent = '    '
-    path_to_old_test_bench = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp'
-    path_to_new_test_bench = f'{model.config.get_output_dir()}/{model.config.get_project_name()}_new_test.cpp'
-    
+    """
+    indent = "    "
+    path_to_old_test_bench = (
+        f"{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp"
+    )
+    path_to_new_test_bench = f"{model.config.get_output_dir()}/{model.config.get_project_name()}_new_test.cpp"
+
     newline = ""
     second_part_of_testbench = False
-    with open(path_to_old_test_bench, 'r') as old_test_bench:
+    with open(path_to_old_test_bench, "r") as old_test_bench:
         file_iterator = iter(old_test_bench)
         for line in file_iterator:
 
-            if '// hls-fpga-machine-learning insert zero' in line:
-                newline += indent + indent + 'const unsigned BATCH_SIZE = 2;\n'
-                newline += indent + indent + 'for(unsigned batch_iteration = 0; batch_iteration < BATCH_SIZE; ++batch_iteration) {\n'
+            if "// hls-fpga-machine-learning insert zero" in line:
+                newline += indent + indent + "const unsigned BATCH_SIZE = 2;\n"
+                newline += (
+                    indent
+                    + indent
+                    + "for(unsigned batch_iteration = 0; batch_iteration < BATCH_SIZE; ++batch_iteration) {\n"
+                )
                 newline += line
                 second_part_of_testbench = True
-            elif ('// hls-fpga-machine-learning insert tb-output' in line) and second_part_of_testbench:
+            elif (
+                "// hls-fpga-machine-learning insert tb-output" in line
+            ) and second_part_of_testbench:
                 newline += line
                 newline += next(file_iterator)
-                newline += indent + '}\n'
+                newline += indent + "}\n"
             else:
                 newline += line
-                
-    with open(path_to_new_test_bench, 'w+') as new_test_bench:
+
+    with open(path_to_new_test_bench, "w+") as new_test_bench:
         new_test_bench.write(newline)
-    
+
     # replace the old test bench with the new test bench that includes a for-loop
     os.system(f"mv {path_to_new_test_bench} {path_to_old_test_bench}")
     return
 
+
 def execute_cosim_to_profile_fifos(model):
     """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths.
-    The function will momentarily replace the initial test bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from 
+    The function will momentarily replace the initial test bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from
     the user-written script that utilized hls4ml will reinitilize the original test bench.
-    
+
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
     """
     model.write()
-    
+
     override_test_bench(model)
-    
+
     model.build(
         reset=False,
         csim=True,
@@ -86,6 +96,7 @@ def execute_cosim_to_profile_fifos(model):
 
     return
 
+
 def get_vitis_optimized_fifo_depths(model):
     """Parse the files generated by the cosimulation to retrieve the optimized depths for the FIFOs.
     Attention, only the FIFOs between the layers are profiled!
@@ -128,7 +139,9 @@ def get_vitis_optimized_fifo_depths(model):
     for layer_name, file_name in csv_fifo_depth_files.items():
         with open(path_to_zip_file + file_name) as chan_status_file:
             lines = chan_status_file.readlines()
-            optmized_fifo_depths[layer_name[:-2]] = int(lines[-1])  # remove "_U" from the layer name string and keep the last line of the file that contains the max depth
+            optmized_fifo_depths[layer_name[:-2]] = int(
+                lines[-1]
+            )  # remove "_U" from the layer name string and keep the last line of the file that contains the max depth
 
     return optmized_fifo_depths
 
@@ -140,7 +153,7 @@ def generate_max_depth_file(model, optmized_fifo_depths):
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
         optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
-    """    
+    """
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(optmized_fifo_depths, f, indent=4)
 
@@ -152,7 +165,7 @@ def set_optimized_fifo_depths(model, optmized_fifo_depths):
         model (ModelGraph): The model to which FIFO depth optimization is applied.
         optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
     """
-    
+
     # iterate through the layer output FIFOs
     for v in model.output_vars.values():
         if v.pragma:
@@ -163,14 +176,15 @@ def set_optimized_fifo_depths(model, optmized_fifo_depths):
             v.pragma = (v.pragma[0], filtered_depth)
     return
 
+
 class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass):
     def __init__(self):
         pass
-    
+
     def transform(self, model):
         """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the initial FIFOs set by hls4ml might be larger than required.
         At the end of the optimization the FIFOs will have the largest depths achieved during cosimulation without causing any deadlocks between the layers (producer-consumer)
-        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks. 
+        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks.
 
         Args:
             model (ModelGraph): The model to which FIFO depth optimization is applied.
@@ -181,15 +195,17 @@ def transform(self, model):
 
         Returns:
             bool: The execution state of the Optimzer Pass
-        """        
+        """
 
         # use `large_fifo_depth = 0` to keep the default fifo depth
         profiling_fifo_depth = getattr(
             self, "profiling_fifo_depth", 100_000
         )  # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation
-        
+
         if not isinstance(profiling_fifo_depth, int) or profiling_fifo_depth < 0:
-            raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer")
+            raise ValueError(
+                "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer"
+            )
 
         # check axi-stream or io-stream
         if not (model.config.get_config_value("IOType") == "io_stream"):
@@ -200,9 +216,9 @@ def transform(self, model):
         initialize_large_fifos(model, profiling_fifo_depth)
 
         execute_cosim_to_profile_fifos(model)
-        
+
         optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
-        
+
         generate_max_depth_file(model, optmized_fifo_depths)
 
         set_optimized_fifo_depths(model, optmized_fifo_depths)

From 7ca3438fb1d104d78e15c86bcd85ea43e47258ed Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 11:54:08 +0200
Subject: [PATCH 23/47] Run pre-commit

---
 .../vitis/passes/fifo_depth_optimization.py   | 24 +++++----------
 hls4ml/backends/vitis/vitis_backend.py        | 30 ++++++++++++++++---
 hls4ml/templates/vivado/build_prj.tcl         |  2 +-
 hls4ml/writer/vitis_writer.py                 |  9 +++---
 4 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 366881d263..ec47a9753f 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -1,9 +1,7 @@
 import json
 import os
-from hls4ml.model.optimizer.optimizer import (
-    ConfigurableOptimizerPass,
-    ModelOptimizerPass,
-)
+
+from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass
 
 
 def initialize_large_fifos(model, profiling_fifo_depth):
@@ -34,14 +32,12 @@ def override_test_bench(model):
         model (ModelGraph): The model to which FIFO depth optimization is applied.
     """
     indent = "    "
-    path_to_old_test_bench = (
-        f"{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp"
-    )
+    path_to_old_test_bench = f"{model.config.get_output_dir()}/{model.config.get_project_name()}_test.cpp"
     path_to_new_test_bench = f"{model.config.get_output_dir()}/{model.config.get_project_name()}_new_test.cpp"
 
     newline = ""
     second_part_of_testbench = False
-    with open(path_to_old_test_bench, "r") as old_test_bench:
+    with open(path_to_old_test_bench) as old_test_bench:
         file_iterator = iter(old_test_bench)
         for line in file_iterator:
 
@@ -54,9 +50,7 @@ def override_test_bench(model):
                 )
                 newline += line
                 second_part_of_testbench = True
-            elif (
-                "// hls-fpga-machine-learning insert tb-output" in line
-            ) and second_part_of_testbench:
+            elif ("// hls-fpga-machine-learning insert tb-output" in line) and second_part_of_testbench:
                 newline += line
                 newline += next(file_iterator)
                 newline += indent + "}\n"
@@ -203,15 +197,11 @@ def transform(self, model):
         )  # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation
 
         if not isinstance(profiling_fifo_depth, int) or profiling_fifo_depth < 0:
-            raise ValueError(
-                "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer"
-            )
+            raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer")
 
         # check axi-stream or io-stream
         if not (model.config.get_config_value("IOType") == "io_stream"):
-            raise RuntimeError(
-                "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config"
-            )
+            raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config")
 
         initialize_large_fifos(model, profiling_fifo_depth)
 
diff --git a/hls4ml/backends/vitis/vitis_backend.py b/hls4ml/backends/vitis/vitis_backend.py
index f48f2408b5..cf623bb19a 100644
--- a/hls4ml/backends/vitis/vitis_backend.py
+++ b/hls4ml/backends/vitis/vitis_backend.py
@@ -33,9 +33,11 @@ def _register_flows(self):
         ip_flow_requirements.insert(ip_flow_requirements.index('vivado:apply_templates'), template_flow)
 
         self._default_flow = register_flow('ip', None, requires=ip_flow_requirements, backend=self.name)
-        
+
         # Register the fifo depth optimization flow which is different from the one for vivado
-        fifo_depth_opt_passes = ['vitis:fifo_depth_optimization'] + writer_passes # After optimization, a new project will be written
+        fifo_depth_opt_passes = [
+            'vitis:fifo_depth_optimization'
+        ] + writer_passes  # After optimization, a new project will be written
 
         register_flow('fifo_depth_optimization', fifo_depth_opt_passes, requires=['vitis:ip'], backend=self.name)
 
@@ -81,7 +83,18 @@ def create_initial_config(
 
         return config
 
-    def build(self, model, reset=False, csim=True, synth=True, cosim=False, validation=False, export=False, vsynth=False, fifo_opt=False):
+    def build(
+        self,
+        model,
+        reset=False,
+        csim=True,
+        synth=True,
+        cosim=False,
+        validation=False,
+        export=False,
+        vsynth=False,
+        fifo_opt=False,
+    ):
         if 'linux' in sys.platform:
             found = os.system('command -v vitis_hls > /dev/null')
             if found != 0:
@@ -93,7 +106,16 @@ def build(self, model, reset=False, csim=True, synth=True, cosim=False, validati
             (
                 'vitis_hls -f build_prj.tcl "reset={reset} csim={csim} synth={synth} cosim={cosim} '
                 'validation={validation} export={export} vsynth={vsynth} fifo_opt={fifo_opt}"'
-            ).format(reset=reset, csim=csim, synth=synth, cosim=cosim, validation=validation, export=export, vsynth=vsynth, fifo_opt=fifo_opt)
+            ).format(
+                reset=reset,
+                csim=csim,
+                synth=synth,
+                cosim=cosim,
+                validation=validation,
+                export=export,
+                vsynth=vsynth,
+                fifo_opt=fifo_opt,
+            )
         )
         os.chdir(curr_dir)
 
diff --git a/hls4ml/templates/vivado/build_prj.tcl b/hls4ml/templates/vivado/build_prj.tcl
index 582cc810d2..67a32c46ed 100644
--- a/hls4ml/templates/vivado/build_prj.tcl
+++ b/hls4ml/templates/vivado/build_prj.tcl
@@ -52,7 +52,7 @@ proc add_vcd_instructions_tcl {} {
     set temp     $filename.new.$timestamp
     # set backup   $filename.bak.$timestamp
 
-    set in  [open $filename r]  
+    set in  [open $filename r]
     set out [open $temp     w]
 
     # line-by-line, read the original file
diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index 5bb0a478fa..d23098a561 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -1,11 +1,11 @@
 import glob
 import os
-from shutil import copy
 from distutils.dir_util import copy_tree
-from shutil import copyfile
+from shutil import copy, copyfile
 
 from hls4ml.writer.vivado_writer import VivadoWriter
 
+
 class VitisWriter(VivadoWriter):
     def __init__(self):
         super().__init__()
@@ -24,7 +24,7 @@ def write_nnet_utils_overrides(self, model):
 
         for h in headers:
             copy(srcpath + h, dstpath + h)
-            
+
     def write_board_script(self, model):
         '''
         Write the tcl scripts and kernel sources to create a Vitis IPI
@@ -48,7 +48,7 @@ def write_board_script(self, model):
         f.write('variable version\n')
         f.write('set version "{}"\n'.format(model.config.get_config_value('Version', '1.0.0')))
         f.close()
-        return     
+        return
 
     def write_hls(self, model):
         """
@@ -59,4 +59,3 @@ def write_hls(self, model):
         self.write_board_script(model)
         os.remove(model.config.get_output_dir() + '.tar.gz')
         self.write_tar(model)
-        

From d2a5aa6b42ab4184b7f92bb9a522aeeef15febf2 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 13:50:50 +0200
Subject: [PATCH 24/47] Remove unused imports

---
 hls4ml/writer/vitis_writer.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index d23098a561..baaf20d294 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -1,7 +1,5 @@
 import glob
 import os
-from distutils.dir_util import copy_tree
-from shutil import copy, copyfile
 
 from hls4ml.writer.vivado_writer import VivadoWriter
 

From e76fcde777a9a7f3d70c74e9568e9894ee518000 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 14:00:12 +0200
Subject: [PATCH 25/47] Run pre-commit

---
 .../vitis/passes/fifo_depth_optimization.py   | 38 +++++++++++--------
 hls4ml/writer/vitis_writer.py                 |  1 +
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index ec47a9753f..c98e4247e4 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -11,8 +11,9 @@ def initialize_large_fifos(model, profiling_fifo_depth):
         model (ModelGraph): The model to which FIFO depth optimization is applied.
         profiling_fifo_depth (int): A large non-negative integer, must be larger than the max expected depth of the FIFOs.
     """
-    # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so they will be profiled
-    # alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be used inside build_prj.tcl to override all FIFO depths with the specified value
+    # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
+    # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
+    # used inside build_prj.tcl to override all FIFO depths with the specified value
     vars_to_profile = {
         k: v
         for k, v in model.output_vars.items()
@@ -25,8 +26,9 @@ def initialize_large_fifos(model, profiling_fifo_depth):
 
 
 def override_test_bench(model):
-    """In order for the FIFO depth profiling to produce correct results, it is necessary for the cosimulation to call the top function - Vitis IP at **least twice**.
-    The test bench produced by the Vivado Writer is overwritten by adding a for-loop over the top function.
+    """In order for the FIFO depth profiling to produce correct results, it is necessary for the cosimulation to
+    call the top function - Vitis IP at **least twice**. The test bench produced by the Vivado Writer is
+    overwritten by adding a for-loop over the top function.
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
@@ -66,8 +68,9 @@ def override_test_bench(model):
 
 
 def execute_cosim_to_profile_fifos(model):
-    """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**, to properly profile the max FIFO depths.
-    The function will momentarily replace the initial test bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from
+    """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**,
+    to properly profile the max FIFO depths.     The function will momentarily replace the initial test
+    bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from
     the user-written script that utilized hls4ml will reinitilize the original test bench.
 
     Args:
@@ -112,7 +115,8 @@ def get_vitis_optimized_fifo_depths(model):
     )
     os.system(f"unzip -q -o {path_to_zip_file}channel.zip -d {path_to_zip_file}")
 
-    # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective chan_status*.csv file
+    # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective
+    # chan_status*.csv file
     names_file_path = (
         model.config.get_output_dir()
         + "/"
@@ -146,7 +150,8 @@ def generate_max_depth_file(model, optmized_fifo_depths):
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
-        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized
+        depths as values.
     """
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
         json.dump(optmized_fifo_depths, f, indent=4)
@@ -157,7 +162,8 @@ def set_optimized_fifo_depths(model, optmized_fifo_depths):
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
-        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized depths as values.
+        optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized
+        depths as values.
     """
 
     # iterate through the layer output FIFOs
@@ -176,9 +182,11 @@ def __init__(self):
         pass
 
     def transform(self, model):
-        """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the initial FIFOs set by hls4ml might be larger than required.
-        At the end of the optimization the FIFOs will have the largest depths achieved during cosimulation without causing any deadlocks between the layers (producer-consumer)
-        , thus no additional delays between the layers. In some cases, this optimization might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks.
+        """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the
+        initial FIFOs set by hls4ml might be larger than required. At the end of the optimization the FIFOs will
+        have the largest depths achieved during cosimulation without causing any deadlocks between the layers
+        (producer-consumer), thus no additional delays between the layers. In some cases, this optimization
+        might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks.
 
         Args:
             model (ModelGraph): The model to which FIFO depth optimization is applied.
@@ -192,9 +200,9 @@ def transform(self, model):
         """
 
         # use `large_fifo_depth = 0` to keep the default fifo depth
-        profiling_fifo_depth = getattr(
-            self, "profiling_fifo_depth", 100_000
-        )  # consider changing 100_000 either with a very very large value > of any total bram storage space or via vitis 2023.2 c-simulation
+        # consider changing 100_000 either with a very very large value > of any total bram storage space
+        # or via vitis 2023.2 c-simulation
+        profiling_fifo_depth = getattr(self, "profiling_fifo_depth", 100_000)
 
         if not isinstance(profiling_fifo_depth, int) or profiling_fifo_depth < 0:
             raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer")
diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index baaf20d294..2e8a1498b7 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -1,5 +1,6 @@
 import glob
 import os
+from shutil import copy
 
 from hls4ml.writer.vivado_writer import VivadoWriter
 

From 3328cf9bd3ec8c2fe522335a9fe76af7a54fd1e6 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 18 Jul 2024 14:01:44 +0200
Subject: [PATCH 26/47] Remove comment

---
 hls4ml/writer/vitis_writer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index 2e8a1498b7..e0863945ab 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -32,7 +32,7 @@ def write_board_script(self, model):
         ###################
         # project.tcl
         ###################
-        # project.tcl
+
         f = open(f'{model.config.get_output_dir()}/project.tcl', 'w')
         f.write('variable project_name\n')
         f.write(f'set project_name "{model.config.get_project_name()}"\n')

From 01d585135890f082515ff6e21fa40599499f7750 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Fri, 19 Jul 2024 15:06:52 +0200
Subject: [PATCH 27/47] Fix typo and documentation

---
 .../backends/vitis/passes/fifo_depth_optimization.py  | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index c98e4247e4..e89a52f7b6 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -69,9 +69,8 @@ def override_test_bench(model):
 
 def execute_cosim_to_profile_fifos(model):
     """Execute a cosimulation with a testh bench that calls the top function - Vitis IP at **least twice**,
-    to properly profile the max FIFO depths.     The function will momentarily replace the initial test
-    bench with a suitable one for the optimization, and a converter call (i.e convert_from_keras_model()) from
-    the user-written script that utilized hls4ml will reinitilize the original test bench.
+    to properly profile the max FIFO depths. The function will momentarily replace the initial test bench
+    with a suitable one for the optimization, and after the optimizer pass, the original test bench reinitialized.
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
@@ -215,11 +214,11 @@ def transform(self, model):
 
         execute_cosim_to_profile_fifos(model)
 
-        optmized_fifo_depths = get_vitis_optimized_fifo_depths(model)
+        optimized_fifo_depths = get_vitis_optimized_fifo_depths(model)
 
-        generate_max_depth_file(model, optmized_fifo_depths)
+        generate_max_depth_file(model, optimized_fifo_depths)
 
-        set_optimized_fifo_depths(model, optmized_fifo_depths)
+        set_optimized_fifo_depths(model, optimized_fifo_depths)
 
         print("[hls4ml] - FIFO optimization completed")
         return False

From a133606cb7a4a0b5759cbcce0414a69def316667 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Mon, 29 Jul 2024 17:01:28 +0200
Subject: [PATCH 28/47] Remove commented out code

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index e89a52f7b6..8cb7bd9d0c 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -127,7 +127,6 @@ def get_vitis_optimized_fifo_depths(model):
     csv_fifo_depth_files = {}
     with open(names_file_path) as names_file:
         for line in names_file:
-            # if "layer" in line:
             layer_name = line.split(",")[1]
             csv_file_name = line.split(",")[3][:-1]
             csv_fifo_depth_files[layer_name] = csv_file_name

From a077c336078a09a5b2c97f0f1ca2592959b157af Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Mon, 29 Jul 2024 17:20:28 +0200
Subject: [PATCH 29/47] Init unit test

---
 .../test_optimization/test_fifo_depth.py      | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 test/pytest/test_optimization/test_fifo_depth.py

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
new file mode 100644
index 0000000000..5756417aec
--- /dev/null
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -0,0 +1,61 @@
+from pathlib import Path
+
+import numpy as np
+import pytest
+import tensorflow as tf
+
+import hls4ml
+
+test_root_path = Path(__file__).parent
+
+padds_options = ['same']
+chans_options = ['channels_last']
+io_type_options = ['io_stream']
+strides_options = [(1, 1)]
+kernel_options = [(3, 3)]
+bias_options = [False]
+# backends = ['Vivado', 'Vitis']
+backends = ['Vitis']
+
+
+@pytest.mark.parametrize('chans', chans_options)
+@pytest.mark.parametrize('padds', padds_options)
+@pytest.mark.parametrize('strides', strides_options)
+@pytest.mark.parametrize('kernels', kernel_options)
+@pytest.mark.parametrize('bias', bias_options)
+@pytest.mark.parametrize('backend', backends)
+def test_sepconv2d(chans, padds, strides, kernels, bias, io_type, backend):
+    model = tf.keras.models.Sequential()
+    input_shape = (16, 16, 3)
+    model.add(
+        tf.keras.layers.SeparableConv2D(
+            filters=8,
+            kernel_size=kernels,
+            strides=strides,
+            padding=padds,
+            input_shape=input_shape,
+            kernel_initializer='normal',
+            use_bias=bias,
+            data_format=chans,
+        )
+    )
+
+    model.compile(optimizer='adam', loss='mse')
+    X_input = np.random.rand(100, *input_shape)
+    keras_prediction = model.predict(X_input)
+    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
+    stride_cfg = str(strides).replace(', ', '_').replace('(', '').replace(')', '')
+    kernel_cfg = str(kernels).replace(', ', '_').replace('(', '').replace(')', '')
+    output_dir = str(
+        test_root_path
+        / 'hls4mlprj_sepconv2d_{}_strides_{}_kernels_{}_{}_padding_{}_{}'.format(
+            chans, stride_cfg, kernel_cfg, padds, backend, io_type
+        )
+    )
+    hls_model = hls4ml.converters.convert_from_keras_model(
+        model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend
+    )
+    hls_model.compile()
+    hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)

From 8daca5a3acbf7a9a8b733ac67c0b49116fc2f59a Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Tue, 30 Jul 2024 18:07:35 +0200
Subject: [PATCH 30/47] Use proper model for unit test to profile fifos

---
 .../vitis/passes/fifo_depth_optimization.py   | 27 +++---
 .../test_optimization/test_fifo_depth.py      | 82 +++++++++----------
 2 files changed, 58 insertions(+), 51 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 8cb7bd9d0c..88a972ca34 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -19,10 +19,13 @@ def initialize_large_fifos(model, profiling_fifo_depth):
         for k, v in model.output_vars.items()
         if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
     }
+
+    initial_fifo_depths = {}
     for v in vars_to_profile.values():
         if v.pragma:
+            initial_fifo_depths[v.name] = v.pragma[1]
             v.pragma = (v.pragma[0], profiling_fifo_depth)
-    return
+    return initial_fifo_depths
 
 
 def override_test_bench(model):
@@ -142,20 +145,23 @@ def get_vitis_optimized_fifo_depths(model):
     return optmized_fifo_depths
 
 
-def generate_max_depth_file(model, optmized_fifo_depths):
-    """Generate a json file with the names of the FIFOs and their optimized depths for post-processing.
-    The json file is not used by the rest of the pipeline, it is only produced for the user.
+def generate_depths_file(model, initial_fifo_depths, optimized_fifo_depths):
+    """Generate a json file with the names of the FIFOs, the initial depths set by hls4ml and their optimized depths,
+    for post-processing. The json file is not used by the rest of the pipeline, it is only produced for the user.
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
+        initial_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the initial
+        depths as values.
         optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized
         depths as values.
     """
     with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
-        json.dump(optmized_fifo_depths, f, indent=4)
+        json.dump(initial_fifo_depths, f, indent=4)
+        json.dump(optimized_fifo_depths, f, indent=4)
 
 
-def set_optimized_fifo_depths(model, optmized_fifo_depths):
+def set_optimized_fifo_depths(model, optimized_fifo_depths):
     """Set the new optimized FIFO depths.
 
     Args:
@@ -167,10 +173,11 @@ def set_optimized_fifo_depths(model, optmized_fifo_depths):
     # iterate through the layer output FIFOs
     for v in model.output_vars.values():
         if v.pragma:
-            if v.name not in optmized_fifo_depths.keys():
+
+            if v.name not in optimized_fifo_depths.keys():
                 continue
 
-            filtered_depth = optmized_fifo_depths[v.name]
+            filtered_depth = optimized_fifo_depths[v.name]
             v.pragma = (v.pragma[0], filtered_depth)
     return
 
@@ -209,13 +216,13 @@ def transform(self, model):
         if not (model.config.get_config_value("IOType") == "io_stream"):
             raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config")
 
-        initialize_large_fifos(model, profiling_fifo_depth)
+        initial_fifo_depths = initialize_large_fifos(model, profiling_fifo_depth)
 
         execute_cosim_to_profile_fifos(model)
 
         optimized_fifo_depths = get_vitis_optimized_fifo_depths(model)
 
-        generate_max_depth_file(model, optimized_fifo_depths)
+        generate_depths_file(model, initial_fifo_depths, optimized_fifo_depths)
 
         set_optimized_fifo_depths(model, optimized_fifo_depths)
 
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 5756417aec..de15d21bc2 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -1,61 +1,61 @@
 from pathlib import Path
 
-import numpy as np
+# import numpy as np
 import pytest
-import tensorflow as tf
+from tensorflow.keras.layers import SeparableConv2D
+from tensorflow.keras.models import Sequential
 
 import hls4ml
 
 test_root_path = Path(__file__).parent
 
-padds_options = ['same']
-chans_options = ['channels_last']
-io_type_options = ['io_stream']
-strides_options = [(1, 1)]
-kernel_options = [(3, 3)]
-bias_options = [False]
 # backends = ['Vivado', 'Vitis']
 backends = ['Vitis']
 
 
-@pytest.mark.parametrize('chans', chans_options)
-@pytest.mark.parametrize('padds', padds_options)
-@pytest.mark.parametrize('strides', strides_options)
-@pytest.mark.parametrize('kernels', kernel_options)
-@pytest.mark.parametrize('bias', bias_options)
+@pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backends)
-def test_sepconv2d(chans, padds, strides, kernels, bias, io_type, backend):
-    model = tf.keras.models.Sequential()
-    input_shape = (16, 16, 3)
+def test_fifo_depth(backend):
+
+    input_shape = (128, 128, 3)
+    activation = 'relu'
+    kernel_size = (3, 3)
+    padding = 'same'
+
+    model = Sequential()
     model.add(
-        tf.keras.layers.SeparableConv2D(
-            filters=8,
-            kernel_size=kernels,
-            strides=strides,
-            padding=padds,
-            input_shape=input_shape,
-            kernel_initializer='normal',
-            use_bias=bias,
-            data_format=chans,
-        )
+        SeparableConv2D(filters=4, kernel_size=kernel_size, padding=padding, activation=activation, input_shape=input_shape)
     )
+    model.add(SeparableConv2D(filters=8, kernel_size=kernel_size, padding=padding, activation=activation))
 
     model.compile(optimizer='adam', loss='mse')
-    X_input = np.random.rand(100, *input_shape)
-    keras_prediction = model.predict(X_input)
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
-    stride_cfg = str(strides).replace(', ', '_').replace('(', '').replace(')', '')
-    kernel_cfg = str(kernels).replace(', ', '_').replace('(', '').replace(')', '')
-    output_dir = str(
-        test_root_path
-        / 'hls4mlprj_sepconv2d_{}_strides_{}_kernels_{}_{}_padding_{}_{}'.format(
-            chans, stride_cfg, kernel_cfg, padds, backend, io_type
-        )
-    )
+    # X_input = np.random.rand(100, *input_shape)
+    # keras_prediction = model.predict(X_input)
+
+    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<8, 4>')
+    config['Flows'] = ['vitis:fifo_depth_optimization']
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=200_000)
+
+    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_backend_{backend}')
+
     hls_model = hls4ml.converters.convert_from_keras_model(
-        model, hls_config=config, output_dir=output_dir, io_type=io_type, backend=backend
+        model, io_type='io_stream', hls_config=config, output_dir=output_dir, backend=backend
     )
-    hls_model.compile()
-    hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
 
-    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
+    hls_model.build(reset=False, csim=False, synth=True, cosim=True)
+    hls4ml.report.read_vivado_report(output_dir)
+
+    # config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
+    # output_dir = str(
+    #     test_root_path
+    #     / 'hls4mlprj_fifo_depth_optimization_backend_{}'.format(
+    #         backend
+    #     )
+    # )
+    # hls_model = hls4ml.converters.convert_from_keras_model(
+    #     model, hls_config=config, output_dir=output_dir, io_type='io_stream', backend=backend
+    # )
+    # hls_model.compile()
+    # hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+
+    # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)

From 7c7e4d35dbf01af35e492312e46a55f77195d8c8 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Tue, 30 Jul 2024 18:53:55 +0200
Subject: [PATCH 31/47] Fix json generator to include before and after depths

---
 .../vitis/passes/fifo_depth_optimization.py       | 15 ++++++++++-----
 test/pytest/test_optimization/test_fifo_depth.py  | 13 +++++++++----
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 88a972ca34..314fc33259 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -23,7 +23,7 @@ def initialize_large_fifos(model, profiling_fifo_depth):
     initial_fifo_depths = {}
     for v in vars_to_profile.values():
         if v.pragma:
-            initial_fifo_depths[v.name] = v.pragma[1]
+            initial_fifo_depths[v.name] = int(v.pragma[1])
             v.pragma = (v.pragma[0], profiling_fifo_depth)
     return initial_fifo_depths
 
@@ -156,9 +156,14 @@ def generate_depths_file(model, initial_fifo_depths, optimized_fifo_depths):
         optmized_fifo_depths (Dict[str, int]): A dictionary that contains the FIFO names as keys and the optimized
         depths as values.
     """
-    with open(model.config.get_output_dir() + "/max_depth.json", "w") as f:
-        json.dump(initial_fifo_depths, f, indent=4)
-        json.dump(optimized_fifo_depths, f, indent=4)
+    depths = {}
+    for fifo_name in initial_fifo_depths.keys():
+        depths[fifo_name] = {}
+        depths[fifo_name]['initial'] = initial_fifo_depths[fifo_name]
+        depths[fifo_name]['optimized'] = optimized_fifo_depths[fifo_name]
+
+    with open(model.config.get_output_dir() + "/fifo_depths.json", "w") as f:
+        json.dump(depths, f, indent=4)
 
 
 def set_optimized_fifo_depths(model, optimized_fifo_depths):
@@ -217,7 +222,7 @@ def transform(self, model):
             raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config")
 
         initial_fifo_depths = initialize_large_fifos(model, profiling_fifo_depth)
-
+        print("AAAA", initial_fifo_depths)
         execute_cosim_to_profile_fifos(model)
 
         optimized_fifo_depths = get_vitis_optimized_fifo_depths(model)
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index de15d21bc2..3e463945e4 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-# import numpy as np
+import numpy as np
 import pytest
 from tensorflow.keras.layers import SeparableConv2D
 from tensorflow.keras.models import Sequential
@@ -12,8 +12,13 @@
 # backends = ['Vivado', 'Vitis']
 backends = ['Vitis']
 
+import os
 
-@pytest.mark.skip(reason='Skipping synthesis tests for now')
+os.environ['XILINX_VITIS'] = "/opt/Xilinx/Vitis_HLS/2023.2/"
+os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']
+
+
+# @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backends)
 def test_fifo_depth(backend):
 
@@ -29,8 +34,8 @@ def test_fifo_depth(backend):
     model.add(SeparableConv2D(filters=8, kernel_size=kernel_size, padding=padding, activation=activation))
 
     model.compile(optimizer='adam', loss='mse')
-    # X_input = np.random.rand(100, *input_shape)
-    # keras_prediction = model.predict(X_input)
+    X_input = np.random.rand(100, *input_shape)
+    keras_prediction = model.predict(X_input)
 
     config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<8, 4>')
     config['Flows'] = ['vitis:fifo_depth_optimization']

From 54822216dd351516a5e4d0bd55fcdacee4971f81 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 09:33:28 +0200
Subject: [PATCH 32/47] Set up full test

---
 .../test_optimization/test_fifo_depth.py      | 63 ++++++++++++++-----
 1 file changed, 47 insertions(+), 16 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 3e463945e4..2b32133bbb 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -1,5 +1,6 @@
 from pathlib import Path
 
+import json
 import numpy as np
 import pytest
 from tensorflow.keras.layers import SeparableConv2D
@@ -17,11 +18,30 @@
 os.environ['XILINX_VITIS'] = "/opt/Xilinx/Vitis_HLS/2023.2/"
 os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']
 
+def parse_cosim_report(project_path):
+    prj_dir = None
+    top_func_name = None
+
+    project_path = project_path + '/project.tcl'
+
+    with open(project_path) as f:
+        for line in f.readlines():
+            if 'set project_name' in line:
+                top_func_name = line.split('"')[-2]
+                prj_dir = top_func_name + '_prj'
+
+    sln_dir = project_path + '/' + prj_dir
+    cosim_file_path = sln_dir + f'/sim/report/{top_func_name}_cosim.rpt'
+    
+    if os.path.isfile(cosim_file_path):
+        return cosim_file_path
+    else:
+        raise FileNotFoundError("Co-simulation report not found.")    
 
 # @pytest.mark.skip(reason='Skipping synthesis tests for now')
-@pytest.mark.parametrize('backend', backends)
-def test_fifo_depth(backend):
+def fifo_depth_optimization_script(backend):
 
+    # build a keras model
     input_shape = (128, 128, 3)
     activation = 'relu'
     kernel_size = (3, 3)
@@ -37,6 +57,7 @@ def test_fifo_depth(backend):
     X_input = np.random.rand(100, *input_shape)
     keras_prediction = model.predict(X_input)
 
+    # execute fifo optimization
     config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<8, 4>')
     config['Flows'] = ['vitis:fifo_depth_optimization']
     hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=200_000)
@@ -47,20 +68,30 @@ def test_fifo_depth(backend):
         model, io_type='io_stream', hls_config=config, output_dir=output_dir, backend=backend
     )
 
+    # build the new project with optimized depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
-    hls4ml.report.read_vivado_report(output_dir)
-
-    # config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32,16>')
-    # output_dir = str(
-    #     test_root_path
-    #     / 'hls4mlprj_fifo_depth_optimization_backend_{}'.format(
-    #         backend
-    #     )
-    # )
-    # hls_model = hls4ml.converters.convert_from_keras_model(
-    #     model, hls_config=config, output_dir=output_dir, io_type='io_stream', backend=backend
-    # )
-    # hls_model.compile()
-    # hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+    # hls4ml.report.read_vivado_report(output_dir)
+    
+    # checks if the fifo depths decreased
+    fifo_depths = {}
+    with open(model.config.get_output_dir() + "/fifo_depths.json", "w") as fifo_depths_file:
+        fifo_depths = json.load(fifo_depths_file)
+    
+    fifo_depths_descreased = True
+    for fifo_name in fifo_depths.keys():
+        if fifo_depths[fifo_name]['optimized'] >= fifo_depths[fifo_name]['initial']:
+            fifo_depths_descreased = False
+    
+    # checks that cosimulation ran succesfully without detecting deadlocks
+    cosim_report_path = parse_cosim_report(model.config.get_output_dir())
+    
+    with open(cosim_report_path) as cosim_report_file:
+        cosim_succesful = any(line.strip() == "Pass" for line in cosim_report_file)
+        
 
     # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
+    assert cosim_succesful and fifo_depths_descreased
+
+@pytest.mark.parametrize('backend', backends)
+def test_fifo_depth():
+    
\ No newline at end of file

From 3335ffae0e718e8e8a9cc892aeef3444fb5d311d Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 09:48:43 +0200
Subject: [PATCH 33/47] Set up exception tests

---
 .../vitis/passes/fifo_depth_optimization.py   |  4 +-
 .../test_optimization/test_fifo_depth.py      | 51 +++++++++++++------
 2 files changed, 38 insertions(+), 17 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 314fc33259..2e305f7f38 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -215,11 +215,11 @@ def transform(self, model):
         profiling_fifo_depth = getattr(self, "profiling_fifo_depth", 100_000)
 
         if not isinstance(profiling_fifo_depth, int) or profiling_fifo_depth < 0:
-            raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer")
+            raise ValueError("The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer.")
 
         # check axi-stream or io-stream
         if not (model.config.get_config_value("IOType") == "io_stream"):
-            raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config")
+            raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config.")
 
         initial_fifo_depths = initialize_large_fifos(model, profiling_fifo_depth)
         print("AAAA", initial_fifo_depths)
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 2b32133bbb..6ee8834013 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -1,17 +1,19 @@
+import json
 from pathlib import Path
 
-import json
 import numpy as np
 import pytest
 from tensorflow.keras.layers import SeparableConv2D
 from tensorflow.keras.models import Sequential
+import re
 
 import hls4ml
 
 test_root_path = Path(__file__).parent
 
 # backends = ['Vivado', 'Vitis']
-backends = ['Vitis']
+io_type_options = ['io_stream', 'io_parallel']
+backend_options = ['Vitis']
 
 import os
 
@@ -32,14 +34,14 @@ def parse_cosim_report(project_path):
 
     sln_dir = project_path + '/' + prj_dir
     cosim_file_path = sln_dir + f'/sim/report/{top_func_name}_cosim.rpt'
-    
+
     if os.path.isfile(cosim_file_path):
         return cosim_file_path
     else:
-        raise FileNotFoundError("Co-simulation report not found.")    
+        raise FileNotFoundError("Co-simulation report not found.")
 
 # @pytest.mark.skip(reason='Skipping synthesis tests for now')
-def fifo_depth_optimization_script(backend):
+def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     # build a keras model
     input_shape = (128, 128, 3)
@@ -60,38 +62,57 @@ def fifo_depth_optimization_script(backend):
     # execute fifo optimization
     config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<8, 4>')
     config['Flows'] = ['vitis:fifo_depth_optimization']
-    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=200_000)
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=profiling_fifo_depth)
 
     output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_backend_{backend}')
 
     hls_model = hls4ml.converters.convert_from_keras_model(
-        model, io_type='io_stream', hls_config=config, output_dir=output_dir, backend=backend
+        model, io_type=io_type, hls_config=config, output_dir=output_dir, backend=backend
     )
 
     # build the new project with optimized depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
     # hls4ml.report.read_vivado_report(output_dir)
-    
+
     # checks if the fifo depths decreased
     fifo_depths = {}
     with open(model.config.get_output_dir() + "/fifo_depths.json", "w") as fifo_depths_file:
         fifo_depths = json.load(fifo_depths_file)
-    
+
     fifo_depths_descreased = True
     for fifo_name in fifo_depths.keys():
         if fifo_depths[fifo_name]['optimized'] >= fifo_depths[fifo_name]['initial']:
             fifo_depths_descreased = False
-    
+
     # checks that cosimulation ran succesfully without detecting deadlocks
     cosim_report_path = parse_cosim_report(model.config.get_output_dir())
-    
+
     with open(cosim_report_path) as cosim_report_file:
         cosim_succesful = any(line.strip() == "Pass" for line in cosim_report_file)
-        
+
 
     # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
     assert cosim_succesful and fifo_depths_descreased
 
-@pytest.mark.parametrize('backend', backends)
-def test_fifo_depth():
-    
\ No newline at end of file
+@pytest.mark.parametrize('backend', backend_options)
+def test_fifo_depth(backend):
+    profiling_fifo_depth = -2
+    io_type = 'io_stream'
+    value_error_expected_message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
+    with pytest.raises(ValueError, match=re.escape(value_error_expected_message)):
+        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+        
+    profiling_fifo_depth = "aaa"
+    with pytest.raises(ValueError, match=re.escape(value_error_expected_message)):
+        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+        
+    profiling_fifo_depth = 200_000
+    io_type = 'io_parallel'
+    runtime_error_expected_message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
+    with pytest.raises(RuntimeError, match=re.escape(runtime_error_expected_message)):
+            fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+            
+    # profiling_fifo_depth = "asdada"
+    # io_type = 'io_stream'
+    # with pytest.raises(ValueError, match="The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer"):
+    #     fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)

From ede391e3a112056c42efa63c50c99854a4830ba5 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 10:10:37 +0200
Subject: [PATCH 34/47] Clean test

---
 .../test_optimization/test_fifo_depth.py      | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 6ee8834013..4e3b7c6282 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -93,24 +93,32 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
     assert cosim_succesful and fifo_depths_descreased
+     
+def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
+    with pytest.raises(error, match=re.escape(message)):
+        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)   
+    
+def expect_value_error(backend, profiling_fifo_depth):
+    io_type = 'io_stream'
+    value_error_expected_message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
+    expect_exception(ValueError, value_error_expected_message, backend, profiling_fifo_depth, io_type)
+
+def expect_runtime_error(backend, io_type):
+    profiling_fifo_depth = 200_000
+    runtime_error_expected_message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
+    expect_exception(RuntimeError, runtime_error_expected_message, backend, profiling_fifo_depth, io_type)
 
 @pytest.mark.parametrize('backend', backend_options)
 def test_fifo_depth(backend):
     profiling_fifo_depth = -2
-    io_type = 'io_stream'
-    value_error_expected_message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
-    with pytest.raises(ValueError, match=re.escape(value_error_expected_message)):
-        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
-        
-    profiling_fifo_depth = "aaa"
-    with pytest.raises(ValueError, match=re.escape(value_error_expected_message)):
-        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+    expect_value_error(backend, profiling_fifo_depth)
+    
+    profiling_fifo_depth = "a"
+    expect_value_error(backend, profiling_fifo_depth)
         
-    profiling_fifo_depth = 200_000
     io_type = 'io_parallel'
-    runtime_error_expected_message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
-    with pytest.raises(RuntimeError, match=re.escape(runtime_error_expected_message)):
-            fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+    expect_runtime_error(backend, io_type)
+
             
     # profiling_fifo_depth = "asdada"
     # io_type = 'io_stream'

From 25ca08afd5334b5163fa937e5aa0e501840ae7cc Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 11:29:54 +0200
Subject: [PATCH 35/47] Fix full test

---
 .../vitis/passes/fifo_depth_optimization.py   |  2 +-
 .../test_optimization/test_fifo_depth.py      | 31 +++++++++----------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 2e305f7f38..6cf6fdeb7e 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -222,7 +222,7 @@ def transform(self, model):
             raise RuntimeError("To use this optimization you have to set `IOType` field to `io_stream` in the HLS config.")
 
         initial_fifo_depths = initialize_large_fifos(model, profiling_fifo_depth)
-        print("AAAA", initial_fifo_depths)
+
         execute_cosim_to_profile_fifos(model)
 
         optimized_fifo_depths = get_vitis_optimized_fifo_depths(model)
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 4e3b7c6282..bdcfac2d65 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -24,23 +24,21 @@ def parse_cosim_report(project_path):
     prj_dir = None
     top_func_name = None
 
-    project_path = project_path + '/project.tcl'
+    project_tcl_path = project_path + '/project.tcl'
 
-    with open(project_path) as f:
+    with open(project_tcl_path) as f:
         for line in f.readlines():
             if 'set project_name' in line:
                 top_func_name = line.split('"')[-2]
                 prj_dir = top_func_name + '_prj'
 
-    sln_dir = project_path + '/' + prj_dir
-    cosim_file_path = sln_dir + f'/sim/report/{top_func_name}_cosim.rpt'
-
+    cosim_file_path = project_path + '/' + prj_dir + f'/solution1/sim/report/{top_func_name}_cosim.rpt'
+    
     if os.path.isfile(cosim_file_path):
         return cosim_file_path
     else:
         raise FileNotFoundError("Co-simulation report not found.")
 
-# @pytest.mark.skip(reason='Skipping synthesis tests for now')
 def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     # build a keras model
@@ -72,11 +70,11 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     # build the new project with optimized depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
-    # hls4ml.report.read_vivado_report(output_dir)
+    hls4ml.report.read_vivado_report(output_dir)
 
     # checks if the fifo depths decreased
     fifo_depths = {}
-    with open(model.config.get_output_dir() + "/fifo_depths.json", "w") as fifo_depths_file:
+    with open(hls_model.config.get_output_dir() + "/fifo_depths.json", "r") as fifo_depths_file:
         fifo_depths = json.load(fifo_depths_file)
 
     fifo_depths_descreased = True
@@ -85,11 +83,10 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
             fifo_depths_descreased = False
 
     # checks that cosimulation ran succesfully without detecting deadlocks
-    cosim_report_path = parse_cosim_report(model.config.get_output_dir())
+    cosim_report_path = parse_cosim_report(hls_model.config.get_output_dir())
 
     with open(cosim_report_path) as cosim_report_file:
-        cosim_succesful = any(line.strip() == "Pass" for line in cosim_report_file)
-
+        cosim_succesful = any("Pass" in line for line in cosim_report_file)
 
     # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
     assert cosim_succesful and fifo_depths_descreased
@@ -108,6 +105,12 @@ def expect_runtime_error(backend, io_type):
     runtime_error_expected_message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
     expect_exception(RuntimeError, runtime_error_expected_message, backend, profiling_fifo_depth, io_type)
 
+def expect_succeful_execution(backend):
+    profiling_fifo_depth = 200_000
+    io_type = 'io_stream'
+    fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+
+# @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_fifo_depth(backend):
     profiling_fifo_depth = -2
@@ -119,8 +122,4 @@ def test_fifo_depth(backend):
     io_type = 'io_parallel'
     expect_runtime_error(backend, io_type)
 
-            
-    # profiling_fifo_depth = "asdada"
-    # io_type = 'io_stream'
-    # with pytest.raises(ValueError, match="The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer"):
-    #     fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+    expect_succeful_execution(backend)        

From 34949bb95075a77b56540cb75d29ddbd8ebeb0cb Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 11:36:02 +0200
Subject: [PATCH 36/47] Clean test

---
 .../test_optimization/test_fifo_depth.py       | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index bdcfac2d65..acebffc6f8 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -77,10 +77,9 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
     with open(hls_model.config.get_output_dir() + "/fifo_depths.json", "r") as fifo_depths_file:
         fifo_depths = json.load(fifo_depths_file)
 
-    fifo_depths_descreased = True
-    for fifo_name in fifo_depths.keys():
-        if fifo_depths[fifo_name]['optimized'] >= fifo_depths[fifo_name]['initial']:
-            fifo_depths_descreased = False
+    fifo_depths_decreased = all(
+        fifo['optimized'] < fifo['initial'] for fifo in fifo_depths.values()
+    )
 
     # checks that cosimulation ran succesfully without detecting deadlocks
     cosim_report_path = parse_cosim_report(hls_model.config.get_output_dir())
@@ -89,7 +88,7 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
         cosim_succesful = any("Pass" in line for line in cosim_report_file)
 
     # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
-    assert cosim_succesful and fifo_depths_descreased
+    assert cosim_succesful and fifo_depths_decreased
      
 def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
     with pytest.raises(error, match=re.escape(message)):
@@ -113,13 +112,10 @@ def expect_succeful_execution(backend):
 # @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_fifo_depth(backend):
-    profiling_fifo_depth = -2
-    expect_value_error(backend, profiling_fifo_depth)
+    expect_value_error(backend, profiling_fifo_depth=-2)
     
-    profiling_fifo_depth = "a"
-    expect_value_error(backend, profiling_fifo_depth)
+    expect_value_error(backend, profiling_fifo_depth="a")
         
-    io_type = 'io_parallel'
-    expect_runtime_error(backend, io_type)
+    expect_runtime_error(backend, io_type='io_parallel')
 
     expect_succeful_execution(backend)        

From 1a1f34758135fd2e757a992fb29f807a7d042175 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 13:08:47 +0200
Subject: [PATCH 37/47] Run precommit

---
 .../test_optimization/test_fifo_depth.py      | 80 +++++++++----------
 1 file changed, 37 insertions(+), 43 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index acebffc6f8..cc8fb02b0a 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -1,24 +1,19 @@
 import json
+import os
+import re
 from pathlib import Path
 
 import numpy as np
 import pytest
 from tensorflow.keras.layers import SeparableConv2D
 from tensorflow.keras.models import Sequential
-import re
 
 import hls4ml
 
 test_root_path = Path(__file__).parent
 
-# backends = ['Vivado', 'Vitis']
-io_type_options = ['io_stream', 'io_parallel']
 backend_options = ['Vitis']
 
-import os
-
-os.environ['XILINX_VITIS'] = "/opt/Xilinx/Vitis_HLS/2023.2/"
-os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']
 
 def parse_cosim_report(project_path):
     prj_dir = None
@@ -33,12 +28,13 @@ def parse_cosim_report(project_path):
                 prj_dir = top_func_name + '_prj'
 
     cosim_file_path = project_path + '/' + prj_dir + f'/solution1/sim/report/{top_func_name}_cosim.rpt'
-    
+
     if os.path.isfile(cosim_file_path):
         return cosim_file_path
     else:
         raise FileNotFoundError("Co-simulation report not found.")
 
+
 def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     # build a keras model
@@ -58,28 +54,31 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
     keras_prediction = model.predict(X_input)
 
     # execute fifo optimization
-    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<8, 4>')
+    config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32, 16>')
     config['Flows'] = ['vitis:fifo_depth_optimization']
-    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=profiling_fifo_depth)
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
+        profiling_fifo_depth=profiling_fifo_depth
+    )
 
     output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_backend_{backend}')
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, io_type=io_type, hls_config=config, output_dir=output_dir, backend=backend
     )
+    hls_model.compile()
+    hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
+
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
 
     # build the new project with optimized depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
-    hls4ml.report.read_vivado_report(output_dir)
 
     # checks if the fifo depths decreased
     fifo_depths = {}
-    with open(hls_model.config.get_output_dir() + "/fifo_depths.json", "r") as fifo_depths_file:
+    with open(hls_model.config.get_output_dir() + "/fifo_depths.json") as fifo_depths_file:
         fifo_depths = json.load(fifo_depths_file)
 
-    fifo_depths_decreased = all(
-        fifo['optimized'] < fifo['initial'] for fifo in fifo_depths.values()
-    )
+    fifo_depths_decreased = all(fifo['optimized'] < fifo['initial'] for fifo in fifo_depths.values())
 
     # checks that cosimulation ran succesfully without detecting deadlocks
     cosim_report_path = parse_cosim_report(hls_model.config.get_output_dir())
@@ -87,35 +86,30 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
     with open(cosim_report_path) as cosim_report_file:
         cosim_succesful = any("Pass" in line for line in cosim_report_file)
 
-    # np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
     assert cosim_succesful and fifo_depths_decreased
-     
+
+
 def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
     with pytest.raises(error, match=re.escape(message)):
-        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)   
-    
-def expect_value_error(backend, profiling_fifo_depth):
-    io_type = 'io_stream'
-    value_error_expected_message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
-    expect_exception(ValueError, value_error_expected_message, backend, profiling_fifo_depth, io_type)
-
-def expect_runtime_error(backend, io_type):
-    profiling_fifo_depth = 200_000
-    runtime_error_expected_message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
-    expect_exception(RuntimeError, runtime_error_expected_message, backend, profiling_fifo_depth, io_type)
-
-def expect_succeful_execution(backend):
-    profiling_fifo_depth = 200_000
-    io_type = 'io_stream'
-    fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
-
-# @pytest.mark.skip(reason='Skipping synthesis tests for now')
+        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+
+
+# test faulty inputs of profiling_fifo_depth to verify that an exception is raised
+@pytest.mark.parametrize('backend', backend_options)
+@pytest.mark.parametrize('profiling_fifo_depth', [-2, "a"])
+def test_value_error(backend, profiling_fifo_depth):
+    message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
+    expect_exception(ValueError, message, backend, profiling_fifo_depth, io_type='io_stream')
+
+
+# test with io_type='io_parallel' to verify that an exception is raised
+@pytest.mark.parametrize('backend', backend_options)
+def test_runtime_error(backend):
+    message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
+    expect_exception(RuntimeError, message, backend, profiling_fifo_depth=200_000, io_type='io_parallel')
+
+
 @pytest.mark.parametrize('backend', backend_options)
-def test_fifo_depth(backend):
-    expect_value_error(backend, profiling_fifo_depth=-2)
-    
-    expect_value_error(backend, profiling_fifo_depth="a")
-        
-    expect_runtime_error(backend, io_type='io_parallel')
-
-    expect_succeful_execution(backend)        
+def test_successful_execution(backend):
+    fifo_depth_optimization_script(backend, profiling_fifo_depth=200_000, io_type='io_stream')

From 18f9385be43071d2eb3d8e56cde119b0fad58217 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 14:35:51 +0200
Subject: [PATCH 38/47] Force the cosimulation to execute twice

---
 test/pytest/test_optimization/test_fifo_depth.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index cc8fb02b0a..d28b4672f1 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -9,6 +9,7 @@
 from tensorflow.keras.models import Sequential
 
 import hls4ml
+from hls4ml.backends.vitis.passes.fifo_depth_optimization import override_test_bench
 
 test_root_path = Path(__file__).parent
 
@@ -36,8 +37,7 @@ def parse_cosim_report(project_path):
 
 
 def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
-
-    # build a keras model
+    # create a keras model
     input_shape = (128, 128, 3)
     activation = 'relu'
     kernel_size = (3, 3)
@@ -48,8 +48,8 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
         SeparableConv2D(filters=4, kernel_size=kernel_size, padding=padding, activation=activation, input_shape=input_shape)
     )
     model.add(SeparableConv2D(filters=8, kernel_size=kernel_size, padding=padding, activation=activation))
-
     model.compile(optimizer='adam', loss='mse')
+
     X_input = np.random.rand(100, *input_shape)
     keras_prediction = model.predict(X_input)
 
@@ -70,7 +70,12 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
 
-    # build the new project with optimized depths
+    # force the top-function to execute twice in the cosimulation, to verify no deadlocks occur even
+    # when streaming multiple inputs into the network
+    override_test_bench(hls_model)
+
+    # build the new project with optimized depths and execute cosimulation to check for deadlocks
+    # due to the new FIFO depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
 
     # checks if the fifo depths decreased
@@ -86,7 +91,6 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
     with open(cosim_report_path) as cosim_report_file:
         cosim_succesful = any("Pass" in line for line in cosim_report_file)
 
-    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
     assert cosim_succesful and fifo_depths_decreased
 
 

From e1d80a5fdb6dc286066a9e32b63bf376cff69950 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 31 Jul 2024 14:41:16 +0200
Subject: [PATCH 39/47] Skip tests

---
 test/pytest/test_optimization/test_fifo_depth.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index d28b4672f1..21ce268f76 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -100,6 +100,7 @@ def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
 
 
 # test faulty inputs of profiling_fifo_depth to verify that an exception is raised
+@pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 @pytest.mark.parametrize('profiling_fifo_depth', [-2, "a"])
 def test_value_error(backend, profiling_fifo_depth):
@@ -108,12 +109,14 @@ def test_value_error(backend, profiling_fifo_depth):
 
 
 # test with io_type='io_parallel' to verify that an exception is raised
+@pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_runtime_error(backend):
     message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
     expect_exception(RuntimeError, message, backend, profiling_fifo_depth=200_000, io_type='io_parallel')
 
 
+@pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_successful_execution(backend):
     fifo_depth_optimization_script(backend, profiling_fifo_depth=200_000, io_type='io_stream')

From 0c4f958c90a55044b9f6594c3d890aaf72d14a07 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Thu, 1 Aug 2024 10:06:01 +0200
Subject: [PATCH 40/47] Update documentation

---
 .../vitis/passes/fifo_depth_optimization.py    |  7 ++++++-
 .../test_optimization/test_fifo_depth.py       | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 6cf6fdeb7e..74f5746c1c 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -5,12 +5,17 @@
 
 
 def initialize_large_fifos(model, profiling_fifo_depth):
-    """Setting all FIFO depths equal to a large value so that they can be profiled.
+    """Set all FIFO depths equal to a large value so that they can be profiled.
 
     Args:
         model (ModelGraph): The model to which FIFO depth optimization is applied.
         profiling_fifo_depth (int): A large non-negative integer, must be larger than the max expected depth of the FIFOs.
+
+    Returns:
+        Dict[str, int]: A dictionary containing FIFO names as keys and their initial depths as values is returned for
+        comparison with the optimized depths.
     """
+
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
     # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
     # used inside build_prj.tcl to override all FIFO depths with the specified value
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 21ce268f76..c8ed76e7d2 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -17,6 +17,9 @@
 
 
 def parse_cosim_report(project_path):
+    """Parse the cosimulation report to check whether the cosimulation passed or failed and therefore a deadlock is
+    detected.
+    """
     prj_dir = None
     top_func_name = None
 
@@ -37,6 +40,8 @@ def parse_cosim_report(project_path):
 
 
 def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
+    """Execute the FIFO depth optimizer on an example model."""
+
     # create a keras model
     input_shape = (128, 128, 3)
     activation = 'relu'
@@ -78,20 +83,20 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
     # due to the new FIFO depths
     hls_model.build(reset=False, csim=False, synth=True, cosim=True)
 
-    # checks if the fifo depths decreased
+    # checks if the fifo depths decreased/were optimized
     fifo_depths = {}
     with open(hls_model.config.get_output_dir() + "/fifo_depths.json") as fifo_depths_file:
         fifo_depths = json.load(fifo_depths_file)
 
     fifo_depths_decreased = all(fifo['optimized'] < fifo['initial'] for fifo in fifo_depths.values())
 
-    # checks that cosimulation ran succesfully without detecting deadlocks
+    # checks that the cosimulation ran succesfully without detecting deadlocks
     cosim_report_path = parse_cosim_report(hls_model.config.get_output_dir())
 
     with open(cosim_report_path) as cosim_report_file:
         cosim_succesful = any("Pass" in line for line in cosim_report_file)
 
-    assert cosim_succesful and fifo_depths_decreased
+    assert fifo_depths_decreased and cosim_succesful
 
 
 def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
@@ -99,19 +104,19 @@ def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
         fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
 
 
-# test faulty inputs of profiling_fifo_depth to verify that an exception is raised
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
-@pytest.mark.parametrize('profiling_fifo_depth', [-2, "a"])
+@pytest.mark.parametrize('profiling_fifo_depth', [-2, 3.14, "a"])
 def test_value_error(backend, profiling_fifo_depth):
+    """Test the FIFO depth optimizer with faulty inputs of profiling_fifo_depth to verify that an exception is raised."""
     message = "The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer."
     expect_exception(ValueError, message, backend, profiling_fifo_depth, io_type='io_stream')
 
 
-# test with io_type='io_parallel' to verify that an exception is raised
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_runtime_error(backend):
+    """Test the FIFO depth optimizer with io_type='io_parallel' to verify that an exception is raised."""
     message = "To use this optimization you have to set `IOType` field to `io_stream` in the HLS config."
     expect_exception(RuntimeError, message, backend, profiling_fifo_depth=200_000, io_type='io_parallel')
 
@@ -119,4 +124,5 @@ def test_runtime_error(backend):
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_successful_execution(backend):
+    """Test the correct execution of the FIFO depth optimizer."""
     fifo_depth_optimization_script(backend, profiling_fifo_depth=200_000, io_type='io_stream')

From 92dc84964c0248c46e492478a8dc48f811f584eb Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Fri, 2 Aug 2024 17:59:20 +0200
Subject: [PATCH 41/47] Fix conflict, use built-in os function

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 2 +-
 hls4ml/writer/vitis_writer.py                           | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 74f5746c1c..87ae56cecc 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -71,7 +71,7 @@ def override_test_bench(model):
         new_test_bench.write(newline)
 
     # replace the old test bench with the new test bench that includes a for-loop
-    os.system(f"mv {path_to_new_test_bench} {path_to_old_test_bench}")
+    os.replace(path_to_new_test_bench, path_to_old_test_bench)
     return
 
 
diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py
index e0863945ab..1d3c9e34ca 100644
--- a/hls4ml/writer/vitis_writer.py
+++ b/hls4ml/writer/vitis_writer.py
@@ -55,6 +55,5 @@ def write_hls(self, model):
         """
         super().write_hls(model)
         self.write_nnet_utils_overrides(model)
-        self.write_board_script(model)
-        os.remove(model.config.get_output_dir() + '.tar.gz')
         self.write_tar(model)
+        self.write_board_script(model)

From e7b4caa5d72dd7a4c6d7f817da9756083e64ccbe Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 27 Nov 2024 11:40:30 +0100
Subject: [PATCH 42/47] Setup onnx pytest

---
 .../test_optimization/test_fifo_depth.py      | 76 +++++++++++++++++--
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index c8ed76e7d2..bbf08fa69b 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -5,6 +5,9 @@
 
 import numpy as np
 import pytest
+
+# import qonnx.core.onnx_exec as oxe
+from qonnx.core.modelwrapper import ModelWrapper
 from tensorflow.keras.layers import SeparableConv2D
 from tensorflow.keras.models import Sequential
 
@@ -12,9 +15,13 @@
 from hls4ml.backends.vitis.passes.fifo_depth_optimization import override_test_bench
 
 test_root_path = Path(__file__).parent
+example_model_path = (test_root_path / '../../example-models').resolve()
 
 backend_options = ['Vitis']
 
+os.environ['XILINX_VITIS'] = "/opt/Xilinx/Vitis_HLS/2023.2/"
+os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']
+
 
 def parse_cosim_report(project_path):
     """Parse the cosimulation report to check whether the cosimulation passed or failed and therefore a deadlock is
@@ -39,8 +46,8 @@ def parse_cosim_report(project_path):
         raise FileNotFoundError("Co-simulation report not found.")
 
 
-def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
-    """Execute the FIFO depth optimizer on an example model."""
+def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
+    """Execute the FIFO depth optimization sequence on a dummy Keras model."""
 
     # create a keras model
     input_shape = (128, 128, 3)
@@ -65,7 +72,7 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
         profiling_fifo_depth=profiling_fifo_depth
     )
 
-    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_backend_{backend}')
+    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_keras_backend_{backend}')
 
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, io_type=io_type, hls_config=config, output_dir=output_dir, backend=backend
@@ -75,6 +82,12 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
     np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
 
+    fifo_depth_optimization_checks(hls_model)
+
+
+def fifo_depth_optimization_checks(hls_model):
+    """Execute the FIFO depth optimization sequence on an hls4ml model."""
+
     # force the top-function to execute twice in the cosimulation, to verify no deadlocks occur even
     # when streaming multiple inputs into the network
     override_test_bench(hls_model)
@@ -101,7 +114,7 @@ def fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type):
 
 def expect_exception(error, message, backend, profiling_fifo_depth, io_type):
     with pytest.raises(error, match=re.escape(message)):
-        fifo_depth_optimization_script(backend, profiling_fifo_depth, io_type)
+        run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type)
 
 
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
@@ -123,6 +136,57 @@ def test_runtime_error(backend):
 
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
-def test_successful_execution(backend):
+def test_successful_execution_of_dummy_keras(backend):
+    """Test the correct execution of the FIFO depth optimizer."""
+    run_fifo_depth_optimization_keras(backend, profiling_fifo_depth=200_000, io_type='io_stream')
+
+
+# @pytest.fixture(scope='module')
+def get_tiny_unet_model():
+    """
+    Load tiny unet model, already channels-last and cleaned
+    """
+    dl_file = str(example_model_path / "onnx/tiny_unet_ch_last.onnx")
+    assert os.path.isfile(dl_file)
+    model = ModelWrapper(dl_file)
+    return model
+
+
+def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, model):
+    """Execute the FIFO depth optimization sequence on a ONNX/QONNX model."""
+
+    ishape = tuple(model.get_tensor_shape(model.graph.input[0].name))
+    X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape)
+    X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
+    # idict = {model.graph.input[0].name: X}
+    # y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+
+    config = hls4ml.utils.config.config_from_onnx_model(
+        model, granularity='name', backend=backend, default_precision='fixed<4,2>'
+    )
+
+    config['Flows'] = ['vitis:fifo_depth_optimization']
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
+        profiling_fifo_depth=profiling_fifo_depth
+    )
+
+    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_tiny_unet_backend_{backend}')
+
+    hls_model = hls4ml.converters.convert_from_onnx_model(
+        model,
+        output_dir=output_dir,
+        io_type='io_stream',
+        backend=backend,
+        hls_config=config,
+    )
+    hls_model.compile()
+    # y_hls4ml = hls_model.predict(np.ascontiguousarray(X))
+    # np.testing.assert_array_equal(y_qonnx.ravel(), y_hls4ml.ravel())s
+
+    # fifo_depth_optimization_checks(hls_model)
+
+
+@pytest.mark.parametrize('backend', backend_options)
+def test_successful_execution_of_tiny_unet(backend):
     """Test the correct execution of the FIFO depth optimizer."""
-    fifo_depth_optimization_script(backend, profiling_fifo_depth=200_000, io_type='io_stream')
+    run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth=200_000, io_type='io_stream', model=get_tiny_unet_model())

From a2557fd69589f1331d25ba262c60abefd0b21f13 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Mon, 2 Dec 2024 10:27:58 +0100
Subject: [PATCH 43/47] Rebase and fix optimizer after main branch changes

---
 .../vitis/passes/fifo_depth_optimization.py   | 23 +++++++------
 .../test_optimization/test_fifo_depth.py      | 33 +++++++++----------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 87ae56cecc..204024294f 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -18,13 +18,14 @@ def initialize_large_fifos(model, profiling_fifo_depth):
 
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
     # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
-    # used inside build_prj.tcl to override all FIFO depths with the specified value
+    # used inside build_prj.tcl to override all FIFO depths with the specified value    
     vars_to_profile = {
         k: v
         for k, v in model.output_vars.items()
-        if v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-    }
+        if ("VivadoStreamVariable" in str(type(v))) and v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
 
+    }
+    
     initial_fifo_depths = {}
     for v in vars_to_profile.values():
         if v.pragma:
@@ -89,7 +90,7 @@ def execute_cosim_to_profile_fifos(model):
 
     model.build(
         reset=False,
-        csim=True,
+        csim=False,
         synth=True,
         cosim=True,
         validation=False,
@@ -120,6 +121,7 @@ def get_vitis_optimized_fifo_depths(model):
         + "_prj"
         + "/solution1/.autopilot/db/channel_depth_info/"
     )
+    
     os.system(f"unzip -q -o {path_to_zip_file}channel.zip -d {path_to_zip_file}")
 
     # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective
@@ -181,14 +183,15 @@ def set_optimized_fifo_depths(model, optimized_fifo_depths):
     """
 
     # iterate through the layer output FIFOs
-    for v in model.output_vars.values():
-        if v.pragma:
+    for _, v in model.output_vars.items():
+        if "VivadoStreamVariable" in str(type(v)):
+            if v.pragma:
 
-            if v.name not in optimized_fifo_depths.keys():
-                continue
+                if v.name not in optimized_fifo_depths.keys():
+                    continue
 
-            filtered_depth = optimized_fifo_depths[v.name]
-            v.pragma = (v.pragma[0], filtered_depth)
+                filtered_depth = optimized_fifo_depths[v.name]
+                v.pragma = (v.pragma[0], filtered_depth)
     return
 
 
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index bbf08fa69b..52ad31bb78 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -6,7 +6,7 @@
 import numpy as np
 import pytest
 
-# import qonnx.core.onnx_exec as oxe
+import qonnx.core.onnx_exec as oxe
 from qonnx.core.modelwrapper import ModelWrapper
 from tensorflow.keras.layers import SeparableConv2D
 from tensorflow.keras.models import Sequential
@@ -15,14 +15,10 @@
 from hls4ml.backends.vitis.passes.fifo_depth_optimization import override_test_bench
 
 test_root_path = Path(__file__).parent
-example_model_path = (test_root_path / '../../example-models').resolve()
+example_model_path = (test_root_path / '../../../example-models').resolve()
 
 backend_options = ['Vitis']
 
-os.environ['XILINX_VITIS'] = "/opt/Xilinx/Vitis_HLS/2023.2/"
-os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH']
-
-
 def parse_cosim_report(project_path):
     """Parse the cosimulation report to check whether the cosimulation passed or failed and therefore a deadlock is
     detected.
@@ -62,7 +58,7 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
     model.add(SeparableConv2D(filters=8, kernel_size=kernel_size, padding=padding, activation=activation))
     model.compile(optimizer='adam', loss='mse')
 
-    X_input = np.random.rand(100, *input_shape)
+    X_input = np.random.rand(1, *input_shape)
     keras_prediction = model.predict(X_input)
 
     # execute fifo optimization
@@ -77,10 +73,11 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, io_type=io_type, hls_config=config, output_dir=output_dir, backend=backend
     )
+
     hls_model.compile()
     hls_prediction = hls_model.predict(X_input).reshape(keras_prediction.shape)
 
-    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.001)
+    np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.01)
 
     fifo_depth_optimization_checks(hls_model)
 
@@ -146,7 +143,7 @@ def get_tiny_unet_model():
     """
     Load tiny unet model, already channels-last and cleaned
     """
-    dl_file = str(example_model_path / "onnx/tiny_unet_ch_last.onnx")
+    dl_file = str(example_model_path / "onnx/branched_model_ch_last.onnx")
     assert os.path.isfile(dl_file)
     model = ModelWrapper(dl_file)
     return model
@@ -158,11 +155,11 @@ def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, mod
     ishape = tuple(model.get_tensor_shape(model.graph.input[0].name))
     X = np.random.uniform(low=0, high=1, size=np.prod(ishape)).reshape(ishape)
     X = (np.round(X * 2**16) * 2**-16).astype(np.float32)
-    # idict = {model.graph.input[0].name: X}
-    # y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
+    idict = {model.graph.input[0].name: X}
+    y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     config = hls4ml.utils.config.config_from_onnx_model(
-        model, granularity='name', backend=backend, default_precision='fixed<4,2>'
+        model, granularity='name', backend=backend, default_precision='fixed<32,16>'
     )
 
     config['Flows'] = ['vitis:fifo_depth_optimization']
@@ -170,22 +167,22 @@ def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, mod
         profiling_fifo_depth=profiling_fifo_depth
     )
 
-    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_tiny_unet_backend_{backend}')
+    output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_branched_model_backend_{backend}')
 
     hls_model = hls4ml.converters.convert_from_onnx_model(
         model,
         output_dir=output_dir,
-        io_type='io_stream',
+        io_type=io_type,
         backend=backend,
         hls_config=config,
     )
     hls_model.compile()
-    # y_hls4ml = hls_model.predict(np.ascontiguousarray(X))
-    # np.testing.assert_array_equal(y_qonnx.ravel(), y_hls4ml.ravel())s
-
-    # fifo_depth_optimization_checks(hls_model)
+    y_hls4ml = hls_model.predict(np.ascontiguousarray(X))
+    np.testing.assert_array_equal(y_qonnx.ravel(), y_hls4ml.ravel())
 
+    fifo_depth_optimization_checks(hls_model)
 
+@pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_successful_execution_of_tiny_unet(backend):
     """Test the correct execution of the FIFO depth optimizer."""

From 496a7e3d66a971584e5b24cebcc70a1fc015ebb0 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Mon, 2 Dec 2024 10:57:58 +0100
Subject: [PATCH 44/47] Update documentation

---
 docs/advanced/fifo_depth.rst                  | 14 ++++++++
 .../vitis/passes/fifo_depth_optimization.py   | 36 +++++++++----------
 .../test_optimization/test_fifo_depth.py      |  7 ++--
 3 files changed, 37 insertions(+), 20 deletions(-)

diff --git a/docs/advanced/fifo_depth.rst b/docs/advanced/fifo_depth.rst
index 703bff0891..9718a237b7 100644
--- a/docs/advanced/fifo_depth.rst
+++ b/docs/advanced/fifo_depth.rst
@@ -47,3 +47,17 @@ Then, we can convert the model, including the flow
     hls_model.build(reset=False, csim=True, synth=True, cosim=True)
 
 For more details and results, see `H. Borras et al., "Open-source FPGA-ML codesign for the MLPerf Tiny Benchmark" (2022) <https://arxiv.org/abs/2206.11791>`_.
+
+Similarly, the FIFO buffers can be optimized while using the `Vitis` backend with the following changes
+
+.. code-block:: Python
+
+    config['Flows'] = ['vitis:fifo_depth_optimization']
+    hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(profiling_fifo_depth=100_000)
+
+    hls_model = hls4ml.converters.convert_from_keras_model(model,
+                                                        io_type='io_stream',
+                                                        hls_config=config,
+                                                        output_dir='hls4mlprj_fifo_depth_opt',
+                                                        part='xc7z020clg400-1',
+                                                        backend='Vitis')
\ No newline at end of file
diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 204024294f..09c71f1c68 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -15,22 +15,22 @@ def initialize_large_fifos(model, profiling_fifo_depth):
         Dict[str, int]: A dictionary containing FIFO names as keys and their initial depths as values is returned for
         comparison with the optimized depths.
     """
-
-    # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
-    # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
-    # used inside build_prj.tcl to override all FIFO depths with the specified value    
+    
+    # filter all the output variables and keep only the internal FIFOs, excluding output objects that are not FIFOs and the inut and output FIFOs as they can't be profiled and are implementation dependant i.e AXI Stream, AXI Master or connected to another IP
     vars_to_profile = {
-        k: v
-        for k, v in model.output_vars.items()
-        if ("VivadoStreamVariable" in str(type(v))) and v != model.get_output_variables()[0] and v != model.get_input_variables()[0]
-
+        output_variable_name: output_variable
+        for output_variable_name, output_variable in model.output_vars.items()
+        if ("VivadoStreamVariable" in str(type(output_variable))) and output_variable != model.get_output_variables()[0] and output_variable != model.get_input_variables()[0]
     }
     
+    # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
+    # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
+    # used inside build_prj.tcl to override all FIFO depths with the specified value  
     initial_fifo_depths = {}
-    for v in vars_to_profile.values():
-        if v.pragma:
-            initial_fifo_depths[v.name] = int(v.pragma[1])
-            v.pragma = (v.pragma[0], profiling_fifo_depth)
+    for output_variable in vars_to_profile.values():
+        if output_variable.pragma:
+            initial_fifo_depths[output_variable.name] = int(output_variable.pragma[1])
+            output_variable.pragma = (output_variable.pragma[0], profiling_fifo_depth)
     return initial_fifo_depths
 
 
@@ -183,15 +183,15 @@ def set_optimized_fifo_depths(model, optimized_fifo_depths):
     """
 
     # iterate through the layer output FIFOs
-    for _, v in model.output_vars.items():
-        if "VivadoStreamVariable" in str(type(v)):
-            if v.pragma:
+    for output_variable in model.output_vars.values():
+        if "VivadoStreamVariable" in str(type(output_variable)):
+            if output_variable.pragma:
 
-                if v.name not in optimized_fifo_depths.keys():
+                if output_variable.name not in optimized_fifo_depths.keys():
                     continue
 
-                filtered_depth = optimized_fifo_depths[v.name]
-                v.pragma = (v.pragma[0], filtered_depth)
+                filtered_depth = optimized_fifo_depths[output_variable.name]
+                output_variable.pragma = (output_variable.pragma[0], filtered_depth)
     return
 
 
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 52ad31bb78..cffdb4590d 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -14,6 +14,7 @@
 import hls4ml
 from hls4ml.backends.vitis.passes.fifo_depth_optimization import override_test_bench
 
+
 test_root_path = Path(__file__).parent
 example_model_path = (test_root_path / '../../../example-models').resolve()
 
@@ -61,8 +62,9 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
     X_input = np.random.rand(1, *input_shape)
     keras_prediction = model.predict(X_input)
 
-    # execute fifo optimization
     config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32, 16>')
+    
+    # include the FIFO Depth optimizer do the flows
     config['Flows'] = ['vitis:fifo_depth_optimization']
     hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
         profiling_fifo_depth=profiling_fifo_depth
@@ -70,6 +72,7 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
 
     output_dir = str(test_root_path / f'hls4mlprj_fifo_depth_optimization_keras_backend_{backend}')
 
+    # execute fifo optimization
     hls_model = hls4ml.converters.convert_from_keras_model(
         model, io_type=io_type, hls_config=config, output_dir=output_dir, backend=backend
     )
@@ -79,6 +82,7 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
 
     np.testing.assert_allclose(hls_prediction, keras_prediction, rtol=0, atol=0.01)
 
+    # check that the FIFOs have been optimized succesfully
     fifo_depth_optimization_checks(hls_model)
 
 
@@ -138,7 +142,6 @@ def test_successful_execution_of_dummy_keras(backend):
     run_fifo_depth_optimization_keras(backend, profiling_fifo_depth=200_000, io_type='io_stream')
 
 
-# @pytest.fixture(scope='module')
 def get_tiny_unet_model():
     """
     Load tiny unet model, already channels-last and cleaned

From 81b3acd7477a746e1cee8b79399a65ec9dc4029c Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Mon, 2 Dec 2024 11:13:24 +0100
Subject: [PATCH 45/47] Run precommit

---
 docs/advanced/fifo_depth.rst                     |  2 +-
 .../vitis/passes/fifo_depth_optimization.py      | 16 ++++++++++------
 test/pytest/test_optimization/test_fifo_depth.py |  6 +++---
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/docs/advanced/fifo_depth.rst b/docs/advanced/fifo_depth.rst
index 9718a237b7..49d12e2857 100644
--- a/docs/advanced/fifo_depth.rst
+++ b/docs/advanced/fifo_depth.rst
@@ -60,4 +60,4 @@ Similarly, the FIFO buffers can be optimized while using the `Vitis` backend wit
                                                         hls_config=config,
                                                         output_dir='hls4mlprj_fifo_depth_opt',
                                                         part='xc7z020clg400-1',
-                                                        backend='Vitis')
\ No newline at end of file
+                                                        backend='Vitis')
diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 09c71f1c68..2444843984 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -15,17 +15,21 @@ def initialize_large_fifos(model, profiling_fifo_depth):
         Dict[str, int]: A dictionary containing FIFO names as keys and their initial depths as values is returned for
         comparison with the optimized depths.
     """
-    
-    # filter all the output variables and keep only the internal FIFOs, excluding output objects that are not FIFOs and the inut and output FIFOs as they can't be profiled and are implementation dependant i.e AXI Stream, AXI Master or connected to another IP
+
+    # filter all the output variables and keep only the internal FIFOs, excluding output objects that are not FIFOs and the
+    # inut and output FIFOs as they can't be profiled and are implementation dependant i.e AXI Stream, AXI Master or
+    # connected to another IP
     vars_to_profile = {
         output_variable_name: output_variable
         for output_variable_name, output_variable in model.output_vars.items()
-        if ("VivadoStreamVariable" in str(type(output_variable))) and output_variable != model.get_output_variables()[0] and output_variable != model.get_input_variables()[0]
+        if ("VivadoStreamVariable" in str(type(output_variable)))
+        and output_variable != model.get_output_variables()[0]
+        and output_variable != model.get_input_variables()[0]
     }
-    
+
     # initialize all the fifos to `profiling_fifo_depth` so that they will be automatically implemented in BRAMs and so
     # they will be profiled. Alternatively, "config_dataflow -override_user_fifo_depth profiling_fifo_depth" can be
-    # used inside build_prj.tcl to override all FIFO depths with the specified value  
+    # used inside build_prj.tcl to override all FIFO depths with the specified value
     initial_fifo_depths = {}
     for output_variable in vars_to_profile.values():
         if output_variable.pragma:
@@ -121,7 +125,7 @@ def get_vitis_optimized_fifo_depths(model):
         + "_prj"
         + "/solution1/.autopilot/db/channel_depth_info/"
     )
-    
+
     os.system(f"unzip -q -o {path_to_zip_file}channel.zip -d {path_to_zip_file}")
 
     # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index cffdb4590d..93e530c3ee 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 import pytest
-
 import qonnx.core.onnx_exec as oxe
 from qonnx.core.modelwrapper import ModelWrapper
 from tensorflow.keras.layers import SeparableConv2D
@@ -14,12 +13,12 @@
 import hls4ml
 from hls4ml.backends.vitis.passes.fifo_depth_optimization import override_test_bench
 
-
 test_root_path = Path(__file__).parent
 example_model_path = (test_root_path / '../../../example-models').resolve()
 
 backend_options = ['Vitis']
 
+
 def parse_cosim_report(project_path):
     """Parse the cosimulation report to check whether the cosimulation passed or failed and therefore a deadlock is
     detected.
@@ -63,7 +62,7 @@ def run_fifo_depth_optimization_keras(backend, profiling_fifo_depth, io_type):
     keras_prediction = model.predict(X_input)
 
     config = hls4ml.utils.config_from_keras_model(model, default_precision='ap_fixed<32, 16>')
-    
+
     # include the FIFO Depth optimizer do the flows
     config['Flows'] = ['vitis:fifo_depth_optimization']
     hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
@@ -185,6 +184,7 @@ def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, mod
 
     fifo_depth_optimization_checks(hls_model)
 
+
 @pytest.mark.skip(reason='Skipping synthesis tests for now')
 @pytest.mark.parametrize('backend', backend_options)
 def test_successful_execution_of_tiny_unet(backend):

From e86e11d131fbc0b69908dd743e851e4f08ba4912 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 11 Dec 2024 15:33:19 +0100
Subject: [PATCH 46/47] Fix qonnx test by optimizing away the input
 quantization

---
 hls4ml/backends/vitis/passes/fifo_depth_optimization.py | 2 +-
 test/pytest/test_optimization/test_fifo_depth.py        | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
index 2444843984..c048dfead5 100644
--- a/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
+++ b/hls4ml/backends/vitis/passes/fifo_depth_optimization.py
@@ -17,7 +17,7 @@ def initialize_large_fifos(model, profiling_fifo_depth):
     """
 
     # filter all the output variables and keep only the internal FIFOs, excluding output objects that are not FIFOs and the
-    # inut and output FIFOs as they can't be profiled and are implementation dependant i.e AXI Stream, AXI Master or
+    # input and output FIFOs as they can't be profiled and are implementation dependant i.e AXI Stream, AXI Master or
     # connected to another IP
     vars_to_profile = {
         output_variable_name: output_variable
diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index 93e530c3ee..a4b5718df8 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -161,9 +161,12 @@ def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, mod
     y_qonnx = oxe.execute_onnx(model, idict)[model.graph.output[0].name]
 
     config = hls4ml.utils.config.config_from_onnx_model(
-        model, granularity='name', backend=backend, default_precision='fixed<32,16>'
+        model, granularity='name', backend=backend, default_precision='ap_fixed<15,2,AP_RND_CONV>'
     )
-
+    
+    # add this line to remove the linear layer that quantizes the input of the NN
+    config['LayerName']['global_in']['Precision']['result'] = 'fixed<4,0,AP_RND_CONV,AP_SAT,0>'
+    
     config['Flows'] = ['vitis:fifo_depth_optimization']
     hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
         profiling_fifo_depth=profiling_fifo_depth

From 63aa3df36f0608c95331f31fc0409499c4ef7713 Mon Sep 17 00:00:00 2001
From: steltze <stel.tze09@gmail.com>
Date: Wed, 11 Dec 2024 15:34:44 +0100
Subject: [PATCH 47/47] Run precommit

---
 test/pytest/test_optimization/test_fifo_depth.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/pytest/test_optimization/test_fifo_depth.py b/test/pytest/test_optimization/test_fifo_depth.py
index a4b5718df8..e682a41c9a 100644
--- a/test/pytest/test_optimization/test_fifo_depth.py
+++ b/test/pytest/test_optimization/test_fifo_depth.py
@@ -163,10 +163,10 @@ def run_fifo_depth_optimization_onnx(backend, profiling_fifo_depth, io_type, mod
     config = hls4ml.utils.config.config_from_onnx_model(
         model, granularity='name', backend=backend, default_precision='ap_fixed<15,2,AP_RND_CONV>'
     )
-    
+
     # add this line to remove the linear layer that quantizes the input of the NN
     config['LayerName']['global_in']['Precision']['result'] = 'fixed<4,0,AP_RND_CONV,AP_SAT,0>'
-    
+
     config['Flows'] = ['vitis:fifo_depth_optimization']
     hls4ml.model.optimizer.get_optimizer('vitis:fifo_depth_optimization').configure(
         profiling_fifo_depth=profiling_fifo_depth