3 // SPDX-FileCopyrightText: (c) 2025 Tenstorrent AI ULC
5 // SPDX-License-Identifier: Apache-2.0
7 #ifndef TTMLIR_TARGET_TTKERNEL_LLKS_EXPERIMENTAL_UNTILIZE_LLKS_H
8 #define TTMLIR_TARGET_TTKERNEL_LLKS_EXPERIMENTAL_UNTILIZE_LLKS_H
10 namespace experimental {
14 template <bool first_pass = true>
15 ALWI void llk_unpack_untilize_pass(uint32_t operand, uint32_t block_tile_cols,
16 uint32_t start_tile_index = 0) {
17 const uint32_t operand_id = get_operand_id(operand);
18 const uint32_t base_address =
19 get_local_cb_interface(operand_id).fifo_rd_ptr - 1;
20 const uint32_t page_bytes = get_local_cb_interface(operand_id).fifo_page_size;
22 _llk_unpack_untilize_pass_<first_pass>(
23 base_address + (start_tile_index * page_bytes), block_tile_cols);
26 ALWI void llk_unpack_untilize(uint32_t operand, uint32_t block_c_tiles,
27 uint32_t start_tile_index = 0) {
29 llk_unpack_untilize_pass<true>(operand, block_c_tiles, start_tile_index);
30 llk_unpack_untilize_pass<false>(operand, block_c_tiles, start_tile_index);
33 #endif // TRISC_UNPACK
35 ALWI void untilize_block(uint32_t icb, uint32_t ocb, uint32_t block_r,
37 uint32_t start_tile_idx = 0;
38 for (uint32_t i = 0; i < block_r; i++) {
39 UNPACK((llk_unpack_untilize(icb, block_c, start_tile_idx)));
41 for (uint32_t t = 0; t < block_c; t++) {
42 MATH((llk_math_wait_for_dest_available()));
45 MATH((llk_math_eltwise_unary_datacopy<A2D, BroadcastType::NONE,
48 MATH((llk_math_dest_section_done<DST_ACCUM_MODE>()));
50 PACK((llk_packer_wait_for_math_done()));
53 PACK((llk_pack<false, false, DST_ACCUM_MODE>(0, ocb)));
56 PACK((llk_pack_dest_section_done<DST_ACCUM_MODE>()));
58 start_tile_idx += block_c;
61 } // namespace experimental
63 #endif // TTMLIR_TARGET_TTKERNEL_LLKS_EXPERIMENTAL_UNTILIZE_LLKS_H
static constexpr unsigned int experimental_untilize_llks_generated_len
Definition: experimental_untilize_llks_generated.h:65
static constexpr char experimental_untilize_llks_generated[]
Definition: experimental_untilize_llks_generated.h:2