-
Notifications
You must be signed in to change notification settings - Fork 90
/
Copy pathloopback.cpp
117 lines (91 loc) · 3.74 KB
/
loopback.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
// SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0
#include "tt_metal/host_api.hpp"
#include "tt_metal/device.hpp"
#include "common/bfloat16.hpp"
/*
* 1. Host writes data to buffer in DRAM
* 2. dram_copy kernel on logical core {0, 0} BRISC copies data from buffer
* in step 1. to buffer in L1 and back to another buffer in DRAM
* 3. Host reads from buffer written to in step 2.
*/
using namespace tt::tt_metal;
int main(int argc, char** argv) {
if (getenv("TT_METAL_SLOW_DISPATCH_MODE") != nullptr) {
TT_THROW("Test not supported w/ slow dispatch, exiting");
}
bool pass = true;
try {
/*
* Silicon accelerator setup
*/
constexpr int device_id = 0;
IDevice* device = CreateDevice(device_id);
/*
* Setup program and command queue to execute along with its buffers and kernels to use
*/
CommandQueue& cq = device->command_queue();
Program program = CreateProgram();
constexpr CoreCoord core = {0, 0};
KernelHandle dram_copy_kernel_id = CreateKernel(
program,
"tt_metal/programming_examples/loopback/kernels/loopback_dram_copy.cpp",
core,
DataMovementConfig{.processor = DataMovementProcessor::RISCV_0, .noc = NOC::RISCV_0_default});
constexpr uint32_t single_tile_size = 2 * (32 * 32);
constexpr uint32_t num_tiles = 50;
constexpr uint32_t dram_buffer_size = single_tile_size * num_tiles;
tt::tt_metal::InterleavedBufferConfig dram_config{
.device = device,
.size = dram_buffer_size,
.page_size = dram_buffer_size,
.buffer_type = tt::tt_metal::BufferType::DRAM};
tt::tt_metal::InterleavedBufferConfig l1_config{
.device = device,
.size = dram_buffer_size,
.page_size = dram_buffer_size,
.buffer_type = tt::tt_metal::BufferType::L1};
auto l1_buffer = CreateBuffer(l1_config);
auto input_dram_buffer = CreateBuffer(dram_config);
const uint32_t input_dram_buffer_addr = input_dram_buffer->address();
auto output_dram_buffer = CreateBuffer(dram_config);
const uint32_t output_dram_buffer_addr = output_dram_buffer->address();
// Since all interleaved buffers have size == page_size, they are entirely contained in the first DRAM bank
const uint32_t input_bank_id = 0;
const uint32_t output_bank_id = 0;
/*
* Create input data and runtime arguments, then execute
*/
std::vector<uint32_t> input_vec = create_random_vector_of_bfloat16(
dram_buffer_size, 100, std::chrono::system_clock::now().time_since_epoch().count());
EnqueueWriteBuffer(cq, input_dram_buffer, input_vec, false);
const std::vector<uint32_t> runtime_args = {
l1_buffer->address(),
input_dram_buffer->address(),
input_bank_id,
output_dram_buffer->address(),
output_bank_id,
l1_buffer->size()};
SetRuntimeArgs(program, dram_copy_kernel_id, core, runtime_args);
EnqueueProgram(cq, program, false);
Finish(cq);
/*
* Validation & Teardown
*/
std::vector<uint32_t> result_vec;
EnqueueReadBuffer(cq, output_dram_buffer, result_vec, true);
pass &= input_vec == result_vec;
pass &= CloseDevice(device);
} catch (const std::exception& e) {
tt::log_error(tt::LogTest, "Test failed with exception!");
tt::log_error(tt::LogTest, "{}", e.what());
throw;
}
if (pass) {
tt::log_info(tt::LogTest, "Test Passed");
} else {
TT_THROW("Test Failed");
}
return 0;
}