Skip to content

Commit

Permalink
#4443: distribute data transfer between brisc and ncrisc
Browse files Browse the repository at this point in the history
  • Loading branch information
mywoodstock committed Feb 22, 2024
1 parent 8e402b0 commit 4db6308
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def test_generate_all_configs_and_references(
conv_params[i] for i in range(10)
]

if test_max_pool and batch_size > 8:
if test_max_pool and batch_size > 16:
pytest.skip(f"Skipping maxpool config with batch_size = {batch_size} due to mem limitations")

compute_grid_size = device.compute_with_storage_grid_size()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,6 @@ void kernel_main() {
cb_push_back(pad_cb_id, 1);
const uint64_t padding_noc_addr = get_noc_addr(get_read_ptr(pad_cb_id));

// cb_wait_front(in_cb_id, in_nsticks); // make sure untilized data is available

const uint32_t in_base_l1_addr = get_read_ptr(in_cb_id);
const uint32_t out_base_l1_addr = get_write_ptr(out_cb_id);

Expand All @@ -124,25 +122,29 @@ void kernel_main() {
uint32_t dst_size = local_pad_ss[i + 1];
uint32_t dst_addr = out_base_l1_addr + local_pad_ss[i] * stick_nbytes;
for (uint32_t j = 0; j < dst_size; ++ j) {
noc_async_read(padding_noc_addr, dst_addr, stick_nbytes);
// noc_async_read(padding_noc_addr, dst_addr, stick_nbytes);
noc_async_write(get_read_ptr(pad_cb_id), get_noc_addr(dst_addr), stick_nbytes);
dst_addr += stick_nbytes;
}
}
}

// // then insert all local data
// if (local_data_nsegments > 0) {
// // cb_wait_front(local_data_ss_cb_id, 1);
// in_l1_addr = in_base_l1_addr + local_data_src_start_offset * stick_nbytes;
// uint32_t local_data_ss_l1_addr = get_read_ptr(local_data_ss_cb_id);
// volatile tt_l1_ptr uint16_t* local_data_ss = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(local_data_ss_l1_addr);
// for (int32_t i = 0; i < 2 * local_data_nsegments; i += 2) {
// uint32_t dst_size = local_data_ss[i + 1] * stick_nbytes;
// uint32_t dst_addr = out_base_l1_addr + local_data_ss[i] * stick_nbytes;
// noc_async_read(get_noc_addr(in_l1_addr), dst_addr, dst_size);
// in_l1_addr += dst_size;
// }
// }
cb_wait_front(in_cb_id, in_nsticks); // make sure untilized data is available

// then insert all local data
if (local_data_nsegments > 0) {
// cb_wait_front(local_data_ss_cb_id, 1);
in_l1_addr = in_base_l1_addr + local_data_src_start_offset * stick_nbytes;
uint32_t local_data_ss_l1_addr = get_read_ptr(local_data_ss_cb_id);
volatile tt_l1_ptr uint16_t* local_data_ss = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(local_data_ss_l1_addr);
for (int32_t i = 0; i < 2 * local_data_nsegments; i += 2) {
uint32_t dst_size = local_data_ss[i + 1] * stick_nbytes;
uint32_t dst_addr = out_base_l1_addr + local_data_ss[i] * stick_nbytes;
noc_async_read(get_noc_addr(in_l1_addr), dst_addr, dst_size);
// noc_async_write(in_l1_addr, get_noc_addr(dst_addr), dst_size);
in_l1_addr += dst_size;
}
}

// // push data to neighbors
// if (has_ll && ll_data_nsegments > 0) {
Expand All @@ -163,7 +165,7 @@ void kernel_main() {
// }

noc_async_read_barrier();
// noc_async_write_barrier();
noc_async_write_barrier();

// DPRINT << "OUT:" << ENDL();
// print_sticks(out_base_l1_addr, 0, 500, 32);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@
// }
// }

// // Fill an L1 buffer with the given val
// inline bool fill_with_val(uint32_t begin_addr, uint32_t n, uint16_t val) {
// // simplest impl:
// volatile tt_l1_ptr uint16_t* ptr = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(begin_addr);
// for (uint32_t i = 0; i < n; ++ i) {
// ptr[i] = val;
// }
// return true;
// }
// Fill an L1 buffer with the given val
inline bool fill_with_val(uint32_t begin_addr, uint32_t n, uint16_t val) {
// simplest impl:
volatile tt_l1_ptr uint16_t* ptr = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(begin_addr);
for (uint32_t i = 0; i < n; ++ i) {
ptr[i] = val;
}
return true;
}

inline void push_to_neighbor_async(uint32_t noc_x,
uint32_t noc_y,
Expand Down Expand Up @@ -104,9 +104,6 @@ void kernel_main() {
const uint32_t in_base_l1_addr = get_read_ptr(in_cb_id);
const uint32_t out_base_l1_addr = get_write_ptr(out_cb_id);

// DPRINT << "IN:" << ENDL();
// print_sticks(in_base_l1_addr, 0, 300, 32);

uint32_t in_l1_addr = in_base_l1_addr;

// // insert all padding locally
Expand All @@ -124,20 +121,20 @@ void kernel_main() {
// }
// }

// then insert all local data
if (local_data_nsegments > 0) {
// cb_wait_front(local_data_ss_cb_id, 1);
in_l1_addr = in_base_l1_addr + local_data_src_start_offset * stick_nbytes;
uint32_t local_data_ss_l1_addr = get_read_ptr(local_data_ss_cb_id);
volatile tt_l1_ptr uint16_t* local_data_ss = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(local_data_ss_l1_addr);
for (int32_t i = 0; i < 2 * local_data_nsegments; i += 2) {
uint32_t dst_size = local_data_ss[i + 1] * stick_nbytes;
uint32_t dst_addr = out_base_l1_addr + local_data_ss[i] * stick_nbytes;
// noc_async_read(get_noc_addr(in_l1_addr), dst_addr, dst_size);
noc_async_write(in_l1_addr, get_noc_addr(dst_addr), dst_size);
in_l1_addr += dst_size;
}
}
// // then insert all local data
// if (local_data_nsegments > 0) {
// // cb_wait_front(local_data_ss_cb_id, 1);
// in_l1_addr = in_base_l1_addr + local_data_src_start_offset * stick_nbytes;
// uint32_t local_data_ss_l1_addr = get_read_ptr(local_data_ss_cb_id);
// volatile tt_l1_ptr uint16_t* local_data_ss = reinterpret_cast<volatile tt_l1_ptr uint16_t*>(local_data_ss_l1_addr);
// for (int32_t i = 0; i < 2 * local_data_nsegments; i += 2) {
// uint32_t dst_size = local_data_ss[i + 1] * stick_nbytes;
// uint32_t dst_addr = out_base_l1_addr + local_data_ss[i] * stick_nbytes;
// // noc_async_read(get_noc_addr(in_l1_addr), dst_addr, dst_size);
// noc_async_write(in_l1_addr, get_noc_addr(dst_addr), dst_size);
// in_l1_addr += dst_size;
// }
// }

// push data to neighbors
if (has_ll && ll_data_nsegments > 0) {
Expand All @@ -159,7 +156,4 @@ void kernel_main() {

// noc_async_read_barrier();
noc_async_write_barrier();

// DPRINT << "OUT:" << ENDL();
// print_sticks(out_base_l1_addr, 0, 500, 32);
}

0 comments on commit 4db6308

Please sign in to comment.