ttnn.conv2d

This table is a trace for ttnn.conv2d op. Traces are generated from nightly tt-torch runs. To see nightly runs: Nightly Runs

Name	Input Shapes	Input Layouts	Attributes	Output Shapes	Output Layouts	PCC	ATOL
ttnn.conv2d	tensor<[1,1,262144,128,bf16]> tensor<[128,128,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (262144, 128, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 384 + d1 * 3 + d2, d3), memory_config: (49152, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 128 : i32 input_height: 512 : i32 input_width: 512 : i32 kernel_size: array<i32: 3, 3> out_channels: 128 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,262144,128,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (8192, 4, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,262144,128,bf16]> tensor<[3,128,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (262144, 128, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 384 + d1 * 3 + d2, d3), memory_config: (1152, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 128 : i32 input_height: 512 : i32 input_width: 512 : i32 kernel_size: array<i32: 3, 3> out_channels: 3 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,262144,3,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (8192, 1, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,196,16,bf16]> tensor<[4,16,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 196 + d1 * 196 + d2, d3), memory_config: (196, 16, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 48 + d1 * 3 + d2, d3), memory_config: (192, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 16 : i32 input_height: 14 : i32 input_width: 14 : i32 kernel_size: array<i32: 3, 3> out_channels: 4 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,196,4,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 224 + d1 * 224 + d2, d3), memory_config: (7, 1, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,262144,256,bf16]> tensor<[128,256,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (262144, 256, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 768 + d1 * 3 + d2, d3), memory_config: (98304, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 256 : i32 input_height: 512 : i32 input_width: 512 : i32 kernel_size: array<i32: 3, 3> out_channels: 128 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,262144,128,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (8192, 4, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,262144,256,bf16]> tensor<[256,256,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (262144, 256, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 768 + d1 * 3 + d2, d3), memory_config: (196608, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 256 : i32 input_height: 512 : i32 input_width: 512 : i32 kernel_size: array<i32: 3, 3> out_channels: 256 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,262144,256,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 262144 + d1 * 262144 + d2, d3), memory_config: (8192, 8, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,1048576,3,bf16]> tensor<[64,3,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1048576 + d1 * 1048576 + d2, d3), memory_config: (1048576, 3, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 9 + d1 * 3 + d2, d3), memory_config: (576, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 3 : i32 input_height: 1024 : i32 input_width: 1024 : i32 kernel_size: array<i32: 3, 3> out_channels: 64 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,1048576,64,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1048576 + d1 * 1048576 + d2, d3), memory_config: (32768, 2, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,65536,480,bf16]> tensor<[64,480,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (65536, 480, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1440 + d1 * 3 + d2, d3), memory_config: (92160, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 480 : i32 input_height: 256 : i32 input_width: 256 : i32 kernel_size: array<i32: 3, 3> out_channels: 64 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,65536,64,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (2048, 2, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,65536,512,bf16]> tensor<[256,512,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (65536, 512, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1536 + d1 * 3 + d2, d3), memory_config: (393216, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 512 : i32 input_height: 256 : i32 input_width: 256 : i32 kernel_size: array<i32: 3, 3> out_channels: 256 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,65536,256,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (2048, 8, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,65536,512,bf16]> tensor<[512,512,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (65536, 512, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1536 + d1 * 3 + d2, d3), memory_config: (786432, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 512 : i32 input_height: 256 : i32 input_width: 256 : i32 kernel_size: array<i32: 3, 3> out_channels: 512 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,65536,512,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (2048, 16, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,1048576,64,bf16]> tensor<[48,64,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1048576 + d1 * 1048576 + d2, d3), memory_config: (1048576, 64, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 192 + d1 * 3 + d2, d3), memory_config: (9216, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 64 : i32 input_height: 1024 : i32 input_width: 1024 : i32 kernel_size: array<i32: 3, 3> out_channels: 48 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,1048576,48,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 1048576 + d1 * 1048576 + d2, d3), memory_config: (32768, 2, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,65536,64,bf16]> tensor<[98,64,7,7,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (65536, 64, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 448 + d1 * 7 + d2, d3), memory_config: (43904, 7, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 64 : i32 input_height: 256 : i32 input_width: 256 : i32 kernel_size: array<i32: 7, 7> out_channels: 98 : i32 padding: array<i32: 3, 3> stride: array<i32: 1, 1>	tensor<[1,1,65536,98,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 65536 + d1 * 65536 + d2, d3), memory_config: (2048, 4, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,307200,64,bf16]> tensor<[1,64,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 307200 + d1 * 307200 + d2, d3), memory_config: (307200, 64, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 192 + d1 * 3 + d2, d3), memory_config: (192, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 64 : i32 input_height: 480 : i32 input_width: 640 : i32 kernel_size: array<i32: 3, 3> out_channels: 1 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,307200,1,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 307200 + d1 * 307200 + d2, d3), memory_config: (9600, 1, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,307200,64,bf16]> tensor<[64,64,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 307200 + d1 * 307200 + d2, d3), memory_config: (307200, 64, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 192 + d1 * 3 + d2, d3), memory_config: (12288, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 64 : i32 input_height: 480 : i32 input_width: 640 : i32 kernel_size: array<i32: 3, 3> out_channels: 64 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,307200,64,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 307200 + d1 * 307200 + d2, d3), memory_config: (9600, 2, 'tile<32x32, bf16>', 'dram')	nan	nan
ttnn.conv2d	tensor<[1,1,16384,960,bf16]> tensor<[64,960,3,3,bf16]> !ttnn.device	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 16384 + d1 * 16384 + d2, d3), memory_config: (16384, 960, 'bf16', 'dram') mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 2880 + d1 * 3 + d2, d3), memory_config: (184320, 3, 'bf16', 'system_memory')	batch_size: 1 : i32 dilation: array<i32: 1, 1> groups: 1 : i32 in_channels: 960 : i32 input_height: 128 : i32 input_width: 128 : i32 kernel_size: array<i32: 3, 3> out_channels: 64 : i32 padding: array<i32: 1, 1> stride: array<i32: 1, 1>	tensor<[1,1,16384,64,bf16]>	mapping_from: (d0, d1, d2, d3), mapping_to: (d0 * 16384 + d1 * 16384 + d2, d3), memory_config: (512, 2, 'tile<32x32, bf16>', 'dram')	nan	nan