7
Accelerate inference speed of DNN on Intel CPU
source link: https://donghao.org/2022/07/21/accelerate-inference-speed-of-dnn-on-intel-cpu/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
Accelerate inference speed of DNN on Intel CPU
To save the cost on the inference server, I did some experiments on how to accelerate the speed of prediction for our model.
import torch.nn as nn import pycls.core.builders as model_builder from pycls.core.config import cfg def pressure_predict(net, tensor_img): t0 = time.time() for _ in range(10): result = net(tensor_img) result = softmax(result) values, indices = torch.topk(result, 10) t1 = time.time() print("time:", t1 - t0) print(values) if __name__ == "__main__": cfg.MODEL.TYPE = "regnet" # RegNetY-8.0GF cfg.REGNET.DEPTH = 17 cfg.REGNET.SE_ON = False cfg.REGNET.W0 = 192 cfg.REGNET.WA = 76.82 cfg.REGNET.WM = 2.19 cfg.REGNET.GROUP_W = 56 cfg.BN.NUM_GROUPS = 4 cfg.MODEL.NUM_CLASSES = 11120 net = model_builder.build_model() net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu")) net.eval() net = net.float() softmax = nn.Softmax(dim=1).eval() # read image img = cv2.imread("blujay.jpg") img = cv2.resize(img, (300, 300)) tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float() pressure_predict(net, tensor_img) dummy_input = torch.randn(1, 3, 300, 300) with torch.jit.optimized_execution(True): traced_script_module = torch.jit.trace(net, dummy_input) net = torch.jit.optimize_for_inference(traced_script_module) pressure_predict(net, tensor_img) import intel_extension_for_pytorch as ipex net = net.to(memory_format=torch.channels_last) net = ipex.optimize(net) tensor_img = tensor_img.to(memory_format=torch.channels_last) with torch.no_grad(): pressure_predict(net, tensor_img)
Python
import torch.nn as nn
import pycls.core.builders as model_builder
from pycls.core.config import cfg
def pressure_predict(net, tensor_img):
t0 = time.time()
for _ in range(10):
result = net(tensor_img)
result = softmax(result)
values, indices = torch.topk(result, 10)
t1 = time.time()
print("time:", t1 - t0)
print(values)
if __name__ == "__main__":
cfg.MODEL.TYPE = "regnet"
# RegNetY-8.0GF
cfg.REGNET.DEPTH = 17
cfg.REGNET.SE_ON = False
cfg.REGNET.W0 = 192
cfg.REGNET.WA = 76.82
cfg.REGNET.WM = 2.19
cfg.REGNET.GROUP_W = 56
cfg.BN.NUM_GROUPS = 4
cfg.MODEL.NUM_CLASSES = 11120
net = model_builder.build_model()
net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu"))
net.eval()
net = net.float()
softmax = nn.Softmax(dim=1).eval()
# read image
img = cv2.imread("blujay.jpg")
img = cv2.resize(img, (300, 300))
tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float()
pressure_predict(net, tensor_img)
dummy_input = torch.randn(1, 3, 300, 300)
with torch.jit.optimized_execution(True):
traced_script_module = torch.jit.trace(net, dummy_input)
net = torch.jit.optimize_for_inference(traced_script_module)
pressure_predict(net, tensor_img)
import intel_extension_for_pytorch as ipex
net = net.to(memory_format=torch.channels_last)
net = ipex.optimize(net)
tensor_img = tensor_img.to(memory_format=torch.channels_last)
with torch.no_grad():
pressure_predict(net, tensor_img)
Recommend
About Joyk
Aggregate valuable and interesting links.
Joyk means Joy of geeK