diff --git a/tests/benchdnn/inputs/graph/op/f32/dynamicq_s4.json b/tests/benchdnn/inputs/graph/op/f32/dynamicq_s4.json new file mode 100644 index 00000000000..39f00734325 --- /dev/null +++ b/tests/benchdnn/inputs/graph/op/f32/dynamicq_s4.json @@ -0,0 +1,74 @@ +{ + "version": "3.0.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "graph": [ + { + "id": 0, + "name": "QUANTIZE_0", + "kind": "DynamicQuantize", + "attrs": { + "axis": { + "type": "s64", + "value": 1 + }, + "qtype": { + "type": "string", + "value": "per_channel" + } + }, + "inputs": [ + { + "id": 0, + "dtype": "f32", + "shape": [ + 2, + 64, + 3, + 3 + ], + "stride": [ + 576, + 9, + 3, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 1, + "dtype": "f32", + "shape": [ + 64 + ], + "stride": [ + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 2, + "dtype": "s8", + "shape": [ + 2, + 64, + 3, + 3 + ], + "stride": [ + 576, + 9, + 3, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +} diff --git a/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_add_fusion.json b/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_add_fusion.json new file mode 100644 index 00000000000..edc28d6c1bc --- /dev/null +++ b/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_add_fusion.json @@ -0,0 +1,374 @@ +{ + "version": "3.7.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "fpmath_mode_apply_to_int": "false", + "input_ports": [ + 0, + 3, + 7, + 9 + ], + "output_ports": [ + 10 + ], + "graph": [ + { + "id": 0, + "name": "DEQUANTIZE_1", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 0, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 1, + "name": "TYPECAST_0", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 2, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 2, + "name": "DEQUANTIZE_2", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 3, + "dtype": "f8_e5m2", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 3, + "name": "TYPECAST_0", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 5, + "dtype": "bf16", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 4, + "name": "MATMUL_0", + "kind": "MatMul", + "attrs": { + "transpose_b": { + "type": "bool", + "value": 0 + }, + "transpose_a": { + "type": "bool", + "value": 0 + } + }, + "inputs": [ + { + "id": 2, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 5, + "dtype": "bf16", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 6, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 5, + "name": "BINARY_3", + "kind": "Multiply", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 6, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 7, + "dtype": "bf16", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 8, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 6, + "name": "BINARY_4", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 8, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 9, + "dtype": "bf16", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 10, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +} diff --git a/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_sum_add_mul_relu.json b/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_sum_add_mul_relu.json new file mode 100644 index 00000000000..caac5b7d83e --- /dev/null +++ b/tests/benchdnn/inputs/graph/pattern/f8/f8_bf16_matmul_sum_add_mul_relu.json @@ -0,0 +1,652 @@ +{ + "version": "3.7.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "fpmath_mode_apply_to_int": "false", + "input_ports": [ + 0, + 3, + 6208, + 6219, + 22 + ], + "output_ports": [ + 12 + ], + "graph": [ + { + "id": 8205, + "name": "DEQUANTIZE_1", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 0, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8207, + "name": "DEQUANTIZE_2", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 3, + "dtype": "f8_e5m2", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 8209, + "name": "DEQUANTIZE_3", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 1 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 6208, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 6209, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8206, + "name": "TYPECAST_1", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 2, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8208, + "name": "TYPECAST_2", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 5, + "dtype": "bf16", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 8210, + "name": "TYPECAST_3", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 6209, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 6210, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8211, + "name": "MATMUL_0", + "kind": "MatMul", + "attrs": { + "transpose_b": { + "type": "bool", + "value": 0 + }, + "transpose_a": { + "type": "bool", + "value": 0 + } + }, + "inputs": [ + { + "id": 2, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 5, + "dtype": "bf16", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 6, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8212, + "name": "BINARY_4", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 6, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 6210, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 10, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8216, + "name": "BINARY_6", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 10, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 6219, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 21, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8215, + "name": "BINARY_5", + "kind": "Multiply", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 21, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 22, + "dtype": "bf16", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 24, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8219, + "name": "ELEWISE_1", + "kind": "ReLU", + "attrs": {}, + "inputs": [ + { + "id": 24, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 25, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8213, + "name": "TYPECAST_4", + "kind": "TypeCast", + "attrs": {}, + "inputs": [ + { + "id": 25, + "dtype": "bf16", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 11, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8, + "name": "QUANTIZE_4", + "kind": "Quantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 11, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 12, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +} diff --git a/tests/benchdnn/inputs/graph/pattern/f8/f8_f32_matmul_mul_add_fusion.json b/tests/benchdnn/inputs/graph/pattern/f8/f8_f32_matmul_mul_add_fusion.json new file mode 100644 index 00000000000..fa1a01a5610 --- /dev/null +++ b/tests/benchdnn/inputs/graph/pattern/f8/f8_f32_matmul_mul_add_fusion.json @@ -0,0 +1,553 @@ +{ + "version": "3.7.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "fpmath_mode_apply_to_int": "false", + "input_ports": [ + 0, + 1, + 2, + 3 + ], + "output_ports": [ + 8353 + ], + "graph": [ + { + "id": 2065, + "name": "DEQUANTIZE_1", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 0, + "dtype": "f8_e5m2", + "shape": [ + 16, + 512 + ], + "stride": [ + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 40, + "dtype": "f32", + "shape": [ + 16, + 512 + ], + "stride": [ + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 4113, + "name": "DEQUANTIZE_2", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 1 + }, + "qtype": { + "type": "string", + "value": "per_channel" + }, + "scales": { + "type": "f32[]", + "value": [ + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125, + 8, + 0.25, + 4, + 0.5, + 2, + 1, + 1, + 2, + 0.5, + 4, + 0.25, + 8, + 0.125, + 16, + 0.0625, + 32, + 0.03125, + 0.0625, + 16, + 0.125 + ] + } + }, + "inputs": [ + { + "id": 1, + "dtype": "f8_e5m2", + "shape": [ + 512, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 104, + "dtype": "f32", + "shape": [ + 512, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 8, + "name": "MATMUL_0", + "kind": "MatMul", + "attrs": { + "transpose_b": { + "type": "bool", + "value": 0 + }, + "transpose_a": { + "type": "bool", + "value": 0 + } + }, + "inputs": [ + { + "id": 40, + "dtype": "f32", + "shape": [ + 16, + 512 + ], + "stride": [ + 512, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 104, + "dtype": "f32", + "shape": [ + 512, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 168, + "dtype": "f32", + "shape": [ + 16, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 6145, + "name": "BINARY_3", + "kind": "Multiply", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 168, + "dtype": "f32", + "shape": [ + 16, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 2, + "dtype": "f32", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 6305, + "dtype": "f32", + "shape": [ + 16, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8193, + "name": "BINARY_4", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 6305, + "dtype": "f32", + "shape": [ + 16, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 3, + "dtype": "f32", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 8353, + "dtype": "f32", + "shape": [ + 16, + 256 + ], + "stride": [ + 256, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +} diff --git a/tests/benchdnn/inputs/graph/pattern/f8/f8_matmul_sum_add_mul_relu.json b/tests/benchdnn/inputs/graph/pattern/f8/f8_matmul_sum_add_mul_relu.json new file mode 100644 index 00000000000..a5970fabcc1 --- /dev/null +++ b/tests/benchdnn/inputs/graph/pattern/f8/f8_matmul_sum_add_mul_relu.json @@ -0,0 +1,500 @@ +{ + "version": "3.7.0", + "engine_kind": "cpu", + "fpmath_mode": "strict", + "fpmath_mode_apply_to_int": "false", + "input_ports": [ + 0, + 3, + 6208, + 6219, + 22 + ], + "output_ports": [ + 12 + ], + "graph": [ + { + "id": 8205, + "name": "DEQUANTIZE_1", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 0, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8207, + "name": "DEQUANTIZE_2", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 3, + "dtype": "f8_e5m2", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ] + }, + { + "id": 8209, + "name": "DEQUANTIZE_3", + "kind": "Dequantize", + "attrs": { + "axis": { + "type": "s64", + "value": 1 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 6208, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 6209, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8211, + "name": "MATMUL_0", + "kind": "MatMul", + "attrs": { + "transpose_b": { + "type": "bool", + "value": 0 + }, + "transpose_a": { + "type": "bool", + "value": 0 + } + }, + "inputs": [ + { + "id": 1, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 4, + "dtype": "f32", + "shape": [ + 1024, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "constant" + } + ], + "outputs": [ + { + "id": 6, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8212, + "name": "BINARY_4", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 6, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 6209, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 10, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8216, + "name": "BINARY_6", + "kind": "Add", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 10, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 6219, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 21, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8215, + "name": "BINARY_5", + "kind": "Multiply", + "attrs": { + "auto_broadcast": { + "type": "string", + "value": "numpy" + } + }, + "inputs": [ + { + "id": 21, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + }, + { + "id": 22, + "dtype": "f32", + "shape": [ + 1, + 1 + ], + "stride": [ + 1, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 24, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8219, + "name": "ELEWISE_1", + "kind": "ReLU", + "attrs": {}, + "inputs": [ + { + "id": 24, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 25, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + }, + { + "id": 8, + "name": "QUANTIZE_4", + "kind": "Quantize", + "attrs": { + "axis": { + "type": "s64", + "value": 0 + }, + "qtype": { + "type": "string", + "value": "per_tensor" + }, + "scales": { + "type": "f32[]", + "value": [ + 1 + ] + } + }, + "inputs": [ + { + "id": 25, + "dtype": "f32", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ], + "outputs": [ + { + "id": 12, + "dtype": "f8_e5m2", + "shape": [ + 16, + 1024 + ], + "stride": [ + 1024, + 1 + ], + "layout_type": "strided", + "property_type": "undef" + } + ] + } + ] +} diff --git a/tests/benchdnn/inputs/graph/pattern/harness_f8_all b/tests/benchdnn/inputs/graph/pattern/harness_f8_all index cdb04e1cb97..1f5ab5ae468 100644 --- a/tests/benchdnn/inputs/graph/pattern/harness_f8_all +++ b/tests/benchdnn/inputs/graph/pattern/harness_f8_all @@ -5,3 +5,7 @@ --reset --expected-n-partitions=0 --case=pattern/f8/f8_conv_post_ops_int8_add_fusion.json --reset --expected-n-partitions=0 --case=pattern/f8/f8_conv_bias_relu_fusion.json --reset --expected-n-partitions=0 --case=pattern/f8/f8_matmul.json +--reset --expected-n-partitions=0 --case=pattern/f8/f8_bf16_matmul_add_fusion.json +--reset --expected-n-partitions=0 --case=pattern/f8/f8_bf16_matmul_sum_add_mul_relu.json +--reset --expected-n-partitions=0 --case=pattern/f8/f8_matmul_sum_add_mul_relu.json +--reset --expected-n-partitions=0 --case=pattern/f8/f8_f32_matmul_mul_add_fusion.json