-
Notifications
You must be signed in to change notification settings - Fork 424
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add LayerNorm support for Vivado #1110
base: main
Are you sure you want to change the base?
Changes from 60 commits
c4c818b
3ee64d1
5626a1a
d51f8a9
89025a2
d76cf60
56811de
45cd493
1402f48
430b9ea
97f3e8d
52cc7e8
3961f97
3533999
d2f0df6
6aaa5ed
3b7a288
130092d
09b0ba0
b49fffd
5324a11
bf8c788
b6be2c4
2472b7d
97e71e9
5ed4a76
5d28f58
2fc68d0
b5c95cf
3b8aa8d
d28b24c
de79bb9
6c23326
20a0199
ddccde2
afbe00b
dedf96c
a9de9cb
49313d3
1156ba5
17e0048
63891fd
8dccac6
595cc71
5f3ec00
5697334
d2e27b8
a149f2e
552fa83
69f26bc
be5f5a4
adf7356
8437581
39ab36c
b5b82e2
f3ff077
0f08e7a
21049e7
cbd88bd
0fe0ec3
0d96cb0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,15 @@ | ||
from hls4ml.backends.backend import get_backend | ||
from hls4ml.backends.template import FunctionCallTemplate, LayerConfigTemplate | ||
from hls4ml.model.layers import Activation, BatchNormalization, Dense, HardActivation, ParametrizedActivation, PReLU, Softmax | ||
from hls4ml.model.layers import ( | ||
Activation, | ||
BatchNormalization, | ||
Dense, | ||
HardActivation, | ||
LayerNormalization, | ||
ParametrizedActivation, | ||
PReLU, | ||
Softmax, | ||
) | ||
from hls4ml.model.optimizer.passes.hgq_proxy_model import UnaryLUT | ||
|
||
# Dense templates | ||
|
@@ -119,6 +128,59 @@ def format(self, node): | |
return self.template.format(**params) | ||
|
||
|
||
# LayerNormalization templates | ||
|
||
layernorm_config_template = """struct config{index} : nnet::layernorm_config {{ | ||
static const unsigned n_in = {n_in}; | ||
static const unsigned seq_len = {seq_len}; | ||
static const unsigned table_size = {table_size}; | ||
static constexpr double table_range = {table_range}; | ||
static const unsigned io_type = nnet::{iotype}; | ||
static const unsigned reuse_factor = {reuse}; | ||
static const bool store_weights_in_bram = false; | ||
static constexpr double epsilon = {epsilon}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to above, this will only ever be used to store values in |
||
typedef {bias_t.name} bias_t; | ||
typedef {scale_t.name} scale_t; | ||
typedef {mean_t.name} mean_t; | ||
typedef {table_t.name} table_t; | ||
template<class x_T, class y_T> | ||
using product = nnet::product::{product_type}<x_T, y_T>; | ||
}};\n""" | ||
|
||
layernorm_function_template = 'nnet::layernormalize<{input_t}, {output_t}, {config}>({input}, {output}, {scale}, {bias});' | ||
|
||
layernorm_include_list = ['nnet_utils/nnet_layernorm.h'] | ||
|
||
|
||
class LayerNormalizationConfigTemplate(LayerConfigTemplate): | ||
def __init__(self): | ||
super().__init__(LayerNormalization) | ||
self.template = layernorm_config_template | ||
|
||
def format(self, node): | ||
params = self._default_config_params(node) | ||
params['n_in'] = node.get_input_variable().size_cpp() | ||
params['seq_len'] = node.get_attr('seq_len') | ||
params['product_type'] = get_backend('vivado').product_type( | ||
node.get_input_variable().type.precision, node.get_weights('scale').type.precision | ||
) | ||
|
||
return self.template.format(**params) | ||
|
||
|
||
class LayerNormalizationFunctionTemplate(FunctionCallTemplate): | ||
def __init__(self): | ||
super().__init__(LayerNormalization, include_header=layernorm_include_list) | ||
self.template = layernorm_function_template | ||
|
||
def format(self, node): | ||
params = self._default_function_params(node) | ||
params['scale'] = node.get_weights('scale').name | ||
params['bias'] = node.get_weights('bias').name | ||
|
||
return self.template.format(**params) | ||
|
||
|
||
# Activation templates | ||
|
||
activ_config_template = """struct {type}_config{index} : nnet::activ_config {{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
GarNet, | ||
GarNetStack, | ||
Layer, | ||
LayerNormalization, | ||
Pooling1D, | ||
Pooling2D, | ||
SeparableConv1D, | ||
|
@@ -558,6 +559,21 @@ def init_softmax(self, layer): | |
len(layer.get_input_variable().shape) == 1 | ||
), 'Softmax with io_parallel strategy cannot be used on multidimensional tensors.' | ||
|
||
@layer_optimizer(LayerNormalization) | ||
def init_layernormalization(self, layer): | ||
if 'table_t' not in layer.attributes: | ||
layer.set_attr( | ||
'table_t', NamedType(name=layer.name + '_table_t', precision=FixedPrecisionType(width=16, integer=6)) | ||
) | ||
if 'table_size' not in layer.attributes: | ||
layer.set_attr('table_size', 4096) # table size | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These attributes should be set as default in Also, is 4096 necessary for this implementation to work? All other tables are 1024. |
||
if 'table_range' not in layer.attributes: | ||
layer.set_attr('table_range', 1.0) # table range | ||
if 'mean_t' not in layer.attributes: | ||
layer.set_attr( | ||
'mean_t', NamedType(name=layer.name + '_mean_t', precision=FixedPrecisionType(width=19, integer=6)) | ||
) | ||
|
||
@layer_optimizer(Embedding) | ||
def init_embed(self, layer): | ||
if layer.attributes['n_in'] is None: | ||
|
rianbrooksflynn marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -129,6 +129,34 @@ def parse_batchnorm_layer(keras_layer, input_names, input_shapes, data_reader): | |
return layer, [shape for shape in input_shapes[0]] | ||
|
||
|
||
@keras_handler('LayerNormalization') | ||
def parse_layernorm_layer(keras_layer, input_names, input_shapes, data_reader): | ||
assert 'LayerNormalization' in keras_layer['class_name'] | ||
|
||
layer = parse_default_keras_layer(keras_layer, input_names) | ||
|
||
in_size = 1 | ||
for dim in input_shapes[0][1:]: | ||
in_size *= dim | ||
layer['n_in'] = layer['n_out'] = in_size | ||
|
||
if not ((len(input_shapes[0])) == 3): | ||
raise Exception('input size is not currently supported by hls4ml, only dim3 is supported') | ||
layer['seq_len'] = input_shapes[0][-2] | ||
|
||
if not (keras_layer['config']['axis'][0] == 2): | ||
raise Exception('assigning the axis is not currently supported by hls4ml, only axis 2 is supported') | ||
|
||
layer['gamma_data'] = get_weights_data(data_reader, layer['name'], 'gamma') | ||
layer['beta_data'] = get_weights_data(data_reader, layer['name'], 'beta') | ||
|
||
layer['epsilon'] = keras_layer['config']['epsilon'] | ||
if layer['epsilon'] <= 0: | ||
raise Exception('epsilon must be positive') | ||
|
||
return layer, [shape for shape in input_shapes[0]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function should also parse |
||
|
||
|
||
@keras_handler('Embedding') | ||
def parse_embedding_layer(keras_layer, input_names, input_shapes, data_reader): | ||
assert 'Embedding' in keras_layer['class_name'] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,3 +138,32 @@ def parse_batchnorm_layer(operation, layer_name, input_names, input_shapes, node | |
layer['n_filt'] = input_shapes[0][1] # Always channel first for Pytorch | ||
|
||
return layer, [shape for shape in input_shapes[0]] | ||
|
||
|
||
@pytorch_handler('LayerNorm') | ||
def parse_layernorm_layer(operation, layer_name, input_names, input_shapes, node, class_object, data_reader, config): | ||
assert 'LayerNorm' in operation | ||
|
||
layer = {} | ||
|
||
layer['class_name'] = 'LayerNormalization' | ||
layer['name'] = layer_name | ||
layer['inputs'] = input_names | ||
|
||
in_size = 1 | ||
for dim in input_shapes[0][1:]: | ||
in_size *= dim | ||
layer['n_in'] = layer['n_out'] = in_size | ||
|
||
if not ((len(input_shapes[0])) == 3): | ||
raise Exception('input size is not currently supported by hls4ml, only dim3 is supported') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is |
||
layer['seq_len'] = input_shapes[0][-2] | ||
|
||
layer['gamma_data'] = class_object.weight.data.numpy() | ||
layer['beta_data'] = class_object.bias.data.numpy() | ||
|
||
layer['epsilon'] = class_object.eps | ||
if layer['epsilon'] <= 0: | ||
raise Exception('epsilon must be positive') | ||
|
||
return layer, [shape for shape in input_shapes[0]] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1058,6 +1058,30 @@ def add_bias(self, bias, quantizer=None, precision=None): | |
self.add_weights_variable(name='bias', var_name='b{index}', data=bias, quantizer=quantizer, precision=precision) | ||
|
||
|
||
class LayerNormalization(Layer): | ||
_expected_attributes = [ | ||
Attribute('n_in'), | ||
Attribute('seq_len'), | ||
Attribute('epsilon', value_type=float, default=1e-3), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
WeightAttribute('scale'), | ||
WeightAttribute('bias'), | ||
TypeAttribute('scale'), | ||
TypeAttribute('bias'), | ||
] | ||
|
||
def initialize(self): | ||
inp = self.get_input_variable() | ||
shape = inp.shape | ||
dims = inp.dim_names | ||
self.add_output_variable(shape, dims) | ||
|
||
scale = self.get_attr('gamma_data') | ||
bias = self.get_attr('beta_data') | ||
|
||
self.add_weights_variable(name='scale', var_name='s{index}', data=scale) | ||
self.add_weights_variable(name='bias', var_name='b{index}', data=bias) | ||
|
||
|
||
class Merge(Layer): | ||
def initialize(self): | ||
assert len(self.inputs) == 2 | ||
|
@@ -1682,6 +1706,7 @@ def initialize(self): | |
'BatchNormOnnx': BatchNormOnnx, | ||
'LayerGroup': LayerGroup, | ||
'SymbolicExpression': SymbolicExpression, | ||
'LayerNormalization': LayerNormalization, | ||
# TensorFlow-specific layers: | ||
'BiasAdd': BiasAdd, | ||
} | ||
|
rianbrooksflynn marked this conversation as resolved.
Show resolved
Hide resolved
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason this is a double? It is not used as such, and breaks Vivado synthesis.