Skip to content

Commit

Permalink
Update at 2023-10-09 22:27:21.802413
Browse files Browse the repository at this point in the history
  • Loading branch information
mlc-bot committed Oct 10, 2023
1 parent 58dab86 commit e21d05f
Show file tree
Hide file tree
Showing 54 changed files with 2,507 additions and 61 deletions.
281 changes: 281 additions & 0 deletions _downloads/0b64717d4cc6027368b96fad40119738/ir_module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,281 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

"""
.. _ir_module:
IRModule
========
This tutorial presents the core abstraction of Apache TVM Unity, the IRModule.
The IRModule encompasses the **entirety** of the ML models, incorporating the
computational graph, tensor programs, and potential calls to external libraries.
.. contents:: Table of Contents
:local:
:depth: 1
"""

import numpy as np
import tvm
from tvm import relax

######################################################################
# Create IRModule
# ---------------
# IRModules can be initialized in various ways. We demonstrate a few of them
# below.

import torch
from torch import fx, nn
from tvm.relax.frontend.torch import from_fx

######################################################################
# Import from existing models
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
# The most common way to initialize an IRModule is to import from an existing
# model. Apache TVM Unity accommodates imports from a range of frameworks,
# such as PyTorch and ONNX. This tutorial solely demonstrates the import process
# from PyTorch. For other frameworks, please refer to the 🚧 corresponding tutorial.


# Create a dummy model
class TorchModel(nn.Module):
def __init__(self):
super(TorchModel, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(256, 10)

def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x


# Give the input shape and data type
input_info = [((1, 784), "float32")]

# Convert the model to IRModule
with torch.no_grad():
torch_fx_model = fx.symbolic_trace(TorchModel())
mod_from_torch = from_fx(torch_fx_model, input_info, keep_params_as_input=True)

mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch)
# Print the IRModule
mod_from_torch.show()

######################################################################
# Write with Relax NN Module
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users
# write the IRModule directly. For details, please refer to the 🚧 corresponding tutorial.

from tvm.relax.frontend import nn


class RelaxModel(nn.Module):
def __init__(self):
super(RelaxModel, self).__init__()
self.fc1 = nn.Linear(784, 256)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(256, 10)

def forward(self, x):
x = self.fc1(x)
x = self.relu1(x)
x = self.fc2(x)
return x


mod_from_relax, params_from_relax = RelaxModel().export_tvm(
{"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
)
mod_from_relax.show()

######################################################################
# Create via TVMScript
# ~~~~~~~~~~~~~~~~~~~~
# TVMScript is a Python-based DSL for IRModules. We are able to
# directly output the IRModule in the TVMScript syntax, or alternatively,
# parse the TVMScript to obtain an IRModule.

from tvm.script import ir as I
from tvm.script import relax as R


@I.ir_module
class TVMScriptModule:
@R.function
def main(
x: R.Tensor((1, 784), dtype="float32"),
fc1_weight: R.Tensor((256, 784), dtype="float32"),
fc1_bias: R.Tensor((256,), dtype="float32"),
fc2_weight: R.Tensor((10, 256), dtype="float32"),
fc2_bias: R.Tensor((10,), dtype="float32"),
) -> R.Tensor((1, 10), dtype="float32"):
R.func_attr({"num_input": 1})
with R.dataflow():
permute_dims = R.permute_dims(fc1_weight, axes=None)
matmul = R.matmul(x, permute_dims, out_dtype="void")
add = R.add(matmul, fc1_bias)
relu = R.nn.relu(add)
permute_dims1 = R.permute_dims(fc2_weight, axes=None)
matmul1 = R.matmul(relu, permute_dims1, out_dtype="void")
add1 = R.add(matmul1, fc2_bias)
gv = add1
R.output(gv)
return gv


mod_from_script = TVMScriptModule
mod_from_script.show()

######################################################################
# Attributes of an IRModule
# -------------------------
# An IRModule is a collection of functions, indexed by GlobalVars.

mod = mod_from_torch
print(mod.get_global_vars())

######################################################################
# We can access the functions in the IRModule by indexing with the GlobalVars
# or their names

# index by global var name
print(mod["main"])
# index by global var, and checking they are the same function
(gv,) = mod.get_global_vars()
assert mod[gv] == mod["main"]

######################################################################
# Transformations on IRModules
# ----------------------------
# Transformations are the import component of Apache TVM Unity. One transformation
# takes in an IRModule and outputs another IRModule. We can apply a sequence of
# transformations to an IRModule to obtain a new IRModule. That is the common way to
# optimize a model.
#
# In this getting started tutorial, we only demonstrate how to apply transformations
# to an IRModule. For details of each transformation, please refer to the
# :ref:`Transformation API Reference <api-relax-transformation>`

######################################################################
# We first apply **LegalizeOps** transformation to the IRModule. This transformation
# will convert the Relax module into a mixed stage, with both Relax and TensorIR function
# within the same module. Meanwhile, the Relax operators will be converted into ``call_tir``.

mod = mod_from_torch
mod = relax.transform.LegalizeOps()(mod)
mod.show()

######################################################################
# After the transformation, there are much more functions inside the module. Let's print
# the global vars again.

print(mod.get_global_vars())

######################################################################
# Next, Apache TVM Unity provides a set of default transformation pipelines for users,
# to simplify the transformation process. We can then apply the default pipeline to the module.
# The default **zero** pipeline contains very fundamental transformations, including:
#
# - **LegalizeOps**: This transform converts the Relax operators into `call_tir` functions
# with the corresponding TensorIR Functions. After this transform, the IRModule will
# contain both Relax functions and TensorIR functions.
# - **AnnotateTIROpPattern**: This transform annotates the pattern of the TensorIR functions,
# preparing them for subsequent operator fusion.
# - **FoldConstant**: This pass performs constant folding, optimizing operations
# involving constants.
# - **FuseOps and FuseTIR**: These two passes work together to fuse operators based on the
# patterns annotated in the previous step (AnnotateTIROpPattern). These passes transform
# both Relax functions and TensorIR functions.
#
# .. note::
#
# Here, we have applied **LegalizeOps** twice in the flow. The second time is useless but
# harmless.
#
# Every passes can be duplicated in the flow, since we ensure the passes can handle all legal
# IRModule inputs. This design can help users to construct their own pipeline.

mod = relax.get_pipeline("zero")(mod)
mod.show()

######################################################################
# Deploy the IRModule Universally
# -------------------------------
# After the optimization, we can compile the model into a TVM runtime module.
# Notably, Apache TVM Unity provides the ability of universal deployment, which means
# we can deploy the same IRModule on different backends, including CPU, GPU, and other emerging
# backends.
#
# Deploy on CPU
# ~~~~~~~~~~~~~
# We can deploy the IRModule on CPU by specifying the target as ``llvm``.

exec = relax.build(mod, target="llvm")
dev = tvm.cpu()
vm = relax.VirtualMachine(exec, dev)

raw_data = np.random.rand(1, 784).astype("float32")
data = tvm.nd.array(raw_data, dev)
cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
print(cpu_out)

######################################################################
# Deploy on GPU
# ~~~~~~~~~~~~~
# Besides, CPU backend, we can also deploy the IRModule on GPU. GPU requires
# programs containing extra information, such as the thread bindings and shared memory
# allocations. We need a further transformation to generate the GPU programs.
#
# We use ``DLight`` to generate the GPU programs. In this tutorial, we won't go into
# the details of ``DLight``. For more information, please refer to the 🚧 corresponding tutorial.
#

from tvm import dlight as dl

with tvm.target.Target("cuda"):
gpu_mod = dl.ApplyDefaultSchedule(
dl.gpu.Matmul(),
dl.gpu.Fallback(),
)(mod)

######################################################################
# Now we can compile the IRModule on GPU, the similar way as we did on CPU.

exec = relax.build(gpu_mod, target="cuda")
dev = tvm.device("cuda", 0)
vm = relax.VirtualMachine(exec, dev)
# Need to allocate data and params on GPU device
data = tvm.nd.array(raw_data, dev)
gpu_params = [tvm.nd.array(p, dev) for p in params_from_torch["main"]]
gpu_out = vm["main"](data, *gpu_params).numpy()
print(gpu_out)

# Check the correctness of the results
assert np.allclose(cpu_out, gpu_out, atol=1e-3)

######################################################################
# Deploy on Other Backends
# ~~~~~~~~~~~~~~~~~~~~~~~~
# Apache TVM Unity also supports other backends, such as different kinds of GPUs
# (Metal, ROCm, Vulkan and OpenCL), different kinds of CPUs (x86, ARM), and other
# emerging backends (e.g., WebAssembly). The deployment process is similar to the
# GPU backend.
Loading

0 comments on commit e21d05f

Please sign in to comment.