Accelerate inference speed of DNN on Intel CPU

To save the cost on the inference server, I did some experiments on how to accelerate the speed of prediction for our model.

import torch.nn as nn

import pycls.core.builders as model_builder
from pycls.core.config import cfg

def pressure_predict(net, tensor_img):
    t0 = time.time()
    for _ in range(10):
        result = net(tensor_img)
        result = softmax(result)
        values, indices = torch.topk(result, 10)
    t1 = time.time()
    print("time:", t1 - t0)
    print(values)

if __name__ == "__main__":
    cfg.MODEL.TYPE = "regnet"
    # RegNetY-8.0GF
    cfg.REGNET.DEPTH = 17
    cfg.REGNET.SE_ON = False
    cfg.REGNET.W0 = 192
    cfg.REGNET.WA = 76.82
    cfg.REGNET.WM = 2.19
    cfg.REGNET.GROUP_W = 56
    cfg.BN.NUM_GROUPS = 4
    cfg.MODEL.NUM_CLASSES = 11120
    net = model_builder.build_model()
    net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu"))
    net.eval()
    net = net.float()
    softmax = nn.Softmax(dim=1).eval()

    # read image
    img = cv2.imread("blujay.jpg")
    img = cv2.resize(img, (300, 300))
    tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float()
    pressure_predict(net, tensor_img)

    dummy_input = torch.randn(1, 3, 300, 300)
    with torch.jit.optimized_execution(True):
        traced_script_module = torch.jit.trace(net, dummy_input)

    net = torch.jit.optimize_for_inference(traced_script_module)
    pressure_predict(net, tensor_img)

    import intel_extension_for_pytorch as ipex
    net = net.to(memory_format=torch.channels_last)
    net = ipex.optimize(net)
    tensor_img = tensor_img.to(memory_format=torch.channels_last)

    with torch.no_grad():
        pressure_predict(net, tensor_img)

Python

import torch.nn as nn

import pycls.core.builders as model_builder

from pycls.core.config import cfg

def pressure_predict(net, tensor_img):

    t0 = time.time()

    for _ in range(10):

        result = net(tensor_img)

        result = softmax(result)

        values, indices = torch.topk(result, 10)

    t1 = time.time()

    print("time:", t1 - t0)

    print(values)

if __name__ == "__main__":

    cfg.MODEL.TYPE = "regnet"

    # RegNetY-8.0GF

    cfg.REGNET.DEPTH = 17

    cfg.REGNET.SE_ON = False

    cfg.REGNET.W0 = 192

    cfg.REGNET.WA = 76.82

    cfg.REGNET.WM = 2.19

    cfg.REGNET.GROUP_W = 56

    cfg.BN.NUM_GROUPS = 4

    cfg.MODEL.NUM_CLASSES = 11120

    net = model_builder.build_model()

    net.load_state_dict(torch.load("bird_cls_2754696.pth", map_location="cpu"))

    net.eval()

    net = net.float()

    softmax = nn.Softmax(dim=1).eval()

    # read image

    img = cv2.imread("blujay.jpg")

    img = cv2.resize(img, (300, 300))

    tensor_img = torch.from_numpy(img).unsqueeze(0).permute(0, 3, 1, 2).float()

    pressure_predict(net, tensor_img)

    dummy_input = torch.randn(1, 3, 300, 300)

    with torch.jit.optimized_execution(True):

        traced_script_module = torch.jit.trace(net, dummy_input)

    net = torch.jit.optimize_for_inference(traced_script_module)

    pressure_predict(net, tensor_img)

    import intel_extension_for_pytorch as ipex

    net = net.to(memory_format=torch.channels_last)

    net = ipex.optimize(net)

    tensor_img = tensor_img.to(memory_format=torch.channels_last)

    with torch.no_grad():

        pressure_predict(net, tensor_img)

Accelerate inference speed of DNN on Intel CPU

Accelerate inference speed of DNN on Intel CPU

Recommend

想要做软文营销推广，这些知识您必须了解！

Apple Killed My Precious iPod. Can an iPad Mini Be My New Everyday Messaging Dev...

Confirmed: You're getting new Samsung Galaxy foldables next month

Modelling an Uncertain World

Persona releases a new risk verification engine to combat identity fraud

How semantic-based knowledge graphs accelerate the value of data lakes

NVIDIA RTX 4090 benchmark sets a record! Twice faster than RTX 3090

Wondershare PDFelement 9 - PDF Editor: The Smartest Choice to Work with PDFs

Valve warns against using Steam Deck in hot weather

企业软文营销要有针对性，这样效果才会好

About Joyk