Converting PyTorch Model to TT-NN ################################# .. note:: This particular example only works on Grayskull. Not all converted models may be functional on all Tenstorrent hardware (Grayskull, Wormhole, or others). Functionality is on a case-by-case basis. There are many ways to convert a torch model to ttnn. This is the recommend approach: #. Re-writing torch model using functional torch APIs #. Converting operations of the functional torch model to ttnn operations #. Optimizing functional ttnn model Step 1 - Rewriting the Model **************************** Given a torch model, it can be rewritten using functional torch APIs. For example, given the following torch model: .. code-block:: python # From transformers.models.bert.modeling_bert.BertIntermediate import torch class BertIntermediate(torch.nn.Module): def __init__(self, config): super().__init__() self.dense = torch.nn.Linear(config.hidden_size, config.intermediate_size) def forward(self, hidden_states): hidden_states = self.dense(hidden_states) hidden_states = torch.nn.functional.gelu(hidden_states) return hidden_states Following TDD, the first step is to write a test for the model: .. code-block:: python import pytest import torch import transformers import ttnn import torch_bert from models.utility_functions import torch_random from tests.ttnn.utils_for_testing import assert_with_pcc @pytest.mark.parametrize("model_name", ["phiyodr/bert-large-finetuned-squad2"]) @pytest.mark.parametrize("batch_size", [1]) @pytest.mark.parametrize("sequence_size", [384]) def test_bert_intermediate(model_name, batch_size, sequence_size): torch.manual_seed(0) config = transformers.BertConfig.from_pretrained(model_name) model = transformers.models.bert.modeling_bert.BertIntermediate(config).eval() torch_hidden_states = torch_random((batch_size, sequence_size, config.hidden_size), -0.1, 0.1, dtype=torch.float32) torch_output = model(torch_hidden_states) # Golden output parameters = preprocess_model_parameters( initialize_model=lambda: model, # Function to initialize the model convert_to_ttnn=lambda *_: False, # Keep the weights as torch tensors ) output = torch_bert.bert_intermediate( torch_hidden_states, parameters=parameters, ) assert_with_pcc(torch_output, output, 0.9999) And finally, the model can be rewritten using functional torch APIs to make the test pass: .. code-block:: python # torch_bert.py def bert_intermediate(hidden_states, *, parameters): hidden_states = hidden_states @ parameters.dense.weight hidden_states = hidden_states + parameters.dense.bias hidden_states = torch.nn.functional.gelu(hidden_states) return hidden_states .. note:: ``parameters`` is a dictionary which sets its keys as its attributes, so both ``parameters["dense"]["weight"]`` and ``parameters.dense.weight`` are valid. The structure of ``parameters`` follows the structure of the model class. In this case, ``BertIntermediate`` has a single attribute ``dense``, so ``parameters`` has a single attribute ``dense``. And ``dense`` is a ``torch.nn.Linear`` object, so it in turn has two attributes ``weight`` and ``bias``. Step 2 - Switching to ttnn Operations ************************************* Starting off with the test: .. code-block:: python import pytest import torch import transformers import ttnn import ttnn_bert from models.utility_functions import torch_random from tests.ttnn.utils_for_testing import assert_with_pcc @pytest.mark.parametrize("model_name", ["phiyodr/bert-large-finetuned-squad2"]) @pytest.mark.parametrize("batch_size", [1]) @pytest.mark.parametrize("sequence_size", [384]) def test_bert_intermediate(device, model_name, batch_size, sequence_size): torch.manual_seed(0) config = transformers.BertConfig.from_pretrained(model_name) model = transformers.models.bert.modeling_bert.BertIntermediate(config).eval() torch_hidden_states = torch_random((batch_size, sequence_size, config.hidden_size), -0.1, 0.1) torch_output = model(torch_hidden_states) parameters = preprocess_model_parameters( initialize_model=lambda: model, device=device, # Device to put the parameters on ) hidden_states = ttnn.from_torch(torch_hidden_states, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device) output = ttnn_bert.bert_intermediate( hidden_states, parameters=parameters, ) output = ttnn.to_torch(output) assert_with_pcc(torch_output, output.to(torch_output.dtype), 0.999) Then implementing the function using ttnn operations: .. code-block:: python # ttnn_bert.py import ttnn def bert_intermediate( hidden_states, *, parameters, ): output = hidden_states @ parameters.dense.weight output = output + parameters.dense.bias output = ttnn.gelu(output) return output Step 3 - Optimizing the Model ***************************** Starting off with the test: .. code-block:: python import pytest import torch import transformers import ttnn import ttnn_bert from models.utility_functions import torch_random from tests.ttnn.utils_for_testing import assert_with_pcc @pytest.mark.parametrize("model_name", ["phiyodr/bert-large-finetuned-squad2"]) @pytest.mark.parametrize("batch_size", [1]) @pytest.mark.parametrize("sequence_size", [384]) def test_bert_intermediate(device, model_name, batch_size, sequence_size): torch.manual_seed(0) config = transformers.BertConfig.from_pretrained(model_name) model = transformers.models.bert.modeling_bert.BertIntermediate(config).eval() torch_hidden_states = torch_random((batch_size, sequence_size, config.hidden_size), -0.1, 0.1) torch_output = model(torch_hidden_states) parameters = preprocess_model_parameters( initialize_model=lambda: model, device=device, # Device to put the parameters on custom_preprocessor=ttnn_bert.custom_preprocessor, # Use custom_preprocessor to set ttnn.bfloat8_b data type for the weights and biases ) hidden_states = ttnn.from_torch(torch_hidden_states, dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device) output = ttnn_bert.bert_intermediate( hidden_states, parameters=parameters, ) output = ttnn.to_torch(output) assert_with_pcc(torch_output, output.to(torch_output.dtype), 0.999) And the optimized model can be something like this: .. code-block:: python # ttnn_optimized_bert.py import ttnn import transformers def custom_preprocessor(model, name): parameters = {} if isinstance(model, transformers.models.bert.modeling_bert.BertIntermediate): parameters["weight"] = ttnn.model_preprocessing.preprocess_linear_weight(model.weight, dtype=ttnn.bfloat8_b) parameters["bias"] = ttnn.model_preprocessing.preprocess_linear_bias(model.bias, dtype=ttnn.bfloat8_b) return parameters def bert_intermediate( hidden_states, *, parameters, num_cores_x, ): batch_size, *_ = hidden_states.shape num_cores_x = 12 output = ttnn.linear( hidden_states, ff1_weight, bias=ff1_bias, memory_config=ttnn.L1_MEMORY_CONFIG, # Put the output into local core memory core_grid=(batch_size, num_cores_x), # Specify manual core grid to get the best possible performance activation="gelu", # Fuse Gelu ) return True More examples ************* Additional examples can be found in `the integration tests `_.