common.py 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871
  1. # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
  2. """
  3. Common modules
  4. """
  5. import ast
  6. import contextlib
  7. import json
  8. import math
  9. import platform
  10. import warnings
  11. import zipfile
  12. from collections import OrderedDict, namedtuple
  13. from copy import copy
  14. from pathlib import Path
  15. from urllib.parse import urlparse
  16. import cv2
  17. import numpy as np
  18. import pandas as pd
  19. import requests
  20. import torch
  21. import torch.nn as nn
  22. from PIL import Image
  23. from torch.cuda import amp
  24. from utils import TryExcept
  25. from utils.dataloaders import exif_transpose, letterbox
  26. from utils.general import (LOGGER, ROOT, Profile, check_requirements, check_suffix, check_version, colorstr,
  27. increment_path, is_jupyter, make_divisible, non_max_suppression, scale_boxes, xywh2xyxy,
  28. xyxy2xywh, yaml_load)
  29. from utils.plots import Annotator, colors, save_one_box
  30. from utils.torch_utils import copy_attr, smart_inference_mode
  31. def autopad(k, p=None, d=1): # kernel, padding, dilation
  32. # Pad to 'same' shape outputs
  33. if d > 1:
  34. k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
  35. if p is None:
  36. p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
  37. return p
  38. class Conv(nn.Module):
  39. # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
  40. default_act = nn.SiLU() # default activation
  41. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
  42. super().__init__()
  43. self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
  44. self.bn = nn.BatchNorm2d(c2)
  45. self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
  46. def forward(self, x):
  47. return self.act(self.bn(self.conv(x)))
  48. def forward_fuse(self, x):
  49. return self.act(self.conv(x))
  50. class DWConv(Conv):
  51. # Depth-wise convolution
  52. def __init__(self, c1, c2, k=1, s=1, d=1, act=True): # ch_in, ch_out, kernel, stride, dilation, activation
  53. super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
  54. class DWConvTranspose2d(nn.ConvTranspose2d):
  55. # Depth-wise transpose convolution
  56. def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0): # ch_in, ch_out, kernel, stride, padding, padding_out
  57. super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
  58. class TransformerLayer(nn.Module):
  59. # Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
  60. def __init__(self, c, num_heads):
  61. super().__init__()
  62. self.q = nn.Linear(c, c, bias=False)
  63. self.k = nn.Linear(c, c, bias=False)
  64. self.v = nn.Linear(c, c, bias=False)
  65. self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
  66. self.fc1 = nn.Linear(c, c, bias=False)
  67. self.fc2 = nn.Linear(c, c, bias=False)
  68. def forward(self, x):
  69. x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
  70. x = self.fc2(self.fc1(x)) + x
  71. return x
  72. class TransformerBlock(nn.Module):
  73. # Vision Transformer https://arxiv.org/abs/2010.11929
  74. def __init__(self, c1, c2, num_heads, num_layers):
  75. super().__init__()
  76. self.conv = None
  77. if c1 != c2:
  78. self.conv = Conv(c1, c2)
  79. self.linear = nn.Linear(c2, c2) # learnable position embedding
  80. self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
  81. self.c2 = c2
  82. def forward(self, x):
  83. if self.conv is not None:
  84. x = self.conv(x)
  85. b, _, w, h = x.shape
  86. p = x.flatten(2).permute(2, 0, 1)
  87. return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
  88. class Bottleneck(nn.Module):
  89. # Standard bottleneck
  90. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): # ch_in, ch_out, shortcut, groups, expansion
  91. super().__init__()
  92. c_ = int(c2 * e) # hidden channels
  93. self.cv1 = Conv(c1, c_, 1, 1)
  94. self.cv2 = Conv(c_, c2, 3, 1, g=g)
  95. self.add = shortcut and c1 == c2
  96. def forward(self, x):
  97. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  98. class BottleneckCSP(nn.Module):
  99. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  100. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  101. super().__init__()
  102. c_ = int(c2 * e) # hidden channels
  103. self.cv1 = Conv(c1, c_, 1, 1)
  104. self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
  105. self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
  106. self.cv4 = Conv(2 * c_, c2, 1, 1)
  107. self.bn = nn.BatchNorm2d(2 * c_) # applied to cat(cv2, cv3)
  108. self.act = nn.SiLU()
  109. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  110. def forward(self, x):
  111. y1 = self.cv3(self.m(self.cv1(x)))
  112. y2 = self.cv2(x)
  113. return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
  114. class CrossConv(nn.Module):
  115. # Cross Convolution Downsample
  116. def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
  117. # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
  118. super().__init__()
  119. c_ = int(c2 * e) # hidden channels
  120. self.cv1 = Conv(c1, c_, (1, k), (1, s))
  121. self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
  122. self.add = shortcut and c1 == c2
  123. def forward(self, x):
  124. return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
  125. class C3(nn.Module):
  126. # CSP Bottleneck with 3 convolutions
  127. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
  128. super().__init__()
  129. c_ = int(c2 * e) # hidden channels
  130. self.cv1 = Conv(c1, c_, 1, 1)
  131. self.cv2 = Conv(c1, c_, 1, 1)
  132. self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
  133. self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
  134. def forward(self, x):
  135. return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
  136. class C3x(C3):
  137. # C3 module with cross-convolutions
  138. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  139. super().__init__(c1, c2, n, shortcut, g, e)
  140. c_ = int(c2 * e)
  141. self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
  142. class C3TR(C3):
  143. # C3 module with TransformerBlock()
  144. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  145. super().__init__(c1, c2, n, shortcut, g, e)
  146. c_ = int(c2 * e)
  147. self.m = TransformerBlock(c_, c_, 4, n)
  148. class C3SPP(C3):
  149. # C3 module with SPP()
  150. def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
  151. super().__init__(c1, c2, n, shortcut, g, e)
  152. c_ = int(c2 * e)
  153. self.m = SPP(c_, c_, k)
  154. class C3Ghost(C3):
  155. # C3 module with GhostBottleneck()
  156. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
  157. super().__init__(c1, c2, n, shortcut, g, e)
  158. c_ = int(c2 * e) # hidden channels
  159. self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
  160. class SPP(nn.Module):
  161. # Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
  162. def __init__(self, c1, c2, k=(5, 9, 13)):
  163. super().__init__()
  164. c_ = c1 // 2 # hidden channels
  165. self.cv1 = Conv(c1, c_, 1, 1)
  166. self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
  167. self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
  168. def forward(self, x):
  169. x = self.cv1(x)
  170. with warnings.catch_warnings():
  171. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  172. return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
  173. class SPPF(nn.Module):
  174. # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
  175. def __init__(self, c1, c2, k=5): # equivalent to SPP(k=(5, 9, 13))
  176. super().__init__()
  177. c_ = c1 // 2 # hidden channels
  178. self.cv1 = Conv(c1, c_, 1, 1)
  179. self.cv2 = Conv(c_ * 4, c2, 1, 1)
  180. self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
  181. def forward(self, x):
  182. x = self.cv1(x)
  183. with warnings.catch_warnings():
  184. warnings.simplefilter('ignore') # suppress torch 1.9.0 max_pool2d() warning
  185. y1 = self.m(x)
  186. y2 = self.m(y1)
  187. return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))
  188. class Focus(nn.Module):
  189. # Focus wh information into c-space
  190. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
  191. super().__init__()
  192. self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
  193. # self.contract = Contract(gain=2)
  194. def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
  195. return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
  196. # return self.conv(self.contract(x))
  197. class GhostConv(nn.Module):
  198. # Ghost Convolution https://github.com/huawei-noah/ghostnet
  199. def __init__(self, c1, c2, k=1, s=1, g=1, act=True): # ch_in, ch_out, kernel, stride, groups
  200. super().__init__()
  201. c_ = c2 // 2 # hidden channels
  202. self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
  203. self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
  204. def forward(self, x):
  205. y = self.cv1(x)
  206. return torch.cat((y, self.cv2(y)), 1)
  207. class GhostBottleneck(nn.Module):
  208. # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
  209. def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
  210. super().__init__()
  211. c_ = c2 // 2
  212. self.conv = nn.Sequential(
  213. GhostConv(c1, c_, 1, 1), # pw
  214. DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
  215. GhostConv(c_, c2, 1, 1, act=False)) # pw-linear
  216. self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1,
  217. act=False)) if s == 2 else nn.Identity()
  218. def forward(self, x):
  219. return self.conv(x) + self.shortcut(x)
  220. class Contract(nn.Module):
  221. # Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
  222. def __init__(self, gain=2):
  223. super().__init__()
  224. self.gain = gain
  225. def forward(self, x):
  226. b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
  227. s = self.gain
  228. x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
  229. x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
  230. return x.view(b, c * s * s, h // s, w // s) # x(1,256,40,40)
  231. class Expand(nn.Module):
  232. # Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
  233. def __init__(self, gain=2):
  234. super().__init__()
  235. self.gain = gain
  236. def forward(self, x):
  237. b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
  238. s = self.gain
  239. x = x.view(b, s, s, c // s ** 2, h, w) # x(1,2,2,16,80,80)
  240. x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
  241. return x.view(b, c // s ** 2, h * s, w * s) # x(1,16,160,160)
  242. class Concat(nn.Module):
  243. # Concatenate a list of tensors along dimension
  244. def __init__(self, dimension=1):
  245. super().__init__()
  246. self.d = dimension
  247. def forward(self, x):
  248. return torch.cat(x, self.d)
  249. class DetectMultiBackend(nn.Module):
  250. # YOLOv5 MultiBackend class for python inference on various backends
  251. def __init__(self, weights='yolov5s.pt', device=torch.device('cpu'), dnn=False, data=None, fp16=False, fuse=True):
  252. # Usage:
  253. # PyTorch: weights = *.pt
  254. # TorchScript: *.torchscript
  255. # ONNX Runtime: *.onnx
  256. # ONNX OpenCV DNN: *.onnx --dnn
  257. # OpenVINO: *_openvino_model
  258. # CoreML: *.mlmodel
  259. # TensorRT: *.engine
  260. # TensorFlow SavedModel: *_saved_model
  261. # TensorFlow GraphDef: *.pb
  262. # TensorFlow Lite: *.tflite
  263. # TensorFlow Edge TPU: *_edgetpu.tflite
  264. # PaddlePaddle: *_paddle_model
  265. from models.experimental import attempt_download, attempt_load # scoped to avoid circular import
  266. super().__init__()
  267. w = str(weights[0] if isinstance(weights, list) else weights)
  268. pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)
  269. fp16 &= pt or jit or onnx or engine or triton # FP16
  270. nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH)
  271. stride = 32 # default stride
  272. cuda = torch.cuda.is_available() and device.type != 'cpu' # use CUDA
  273. if not (pt or triton):
  274. w = attempt_download(w) # download if not local
  275. if pt: # PyTorch
  276. model = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)
  277. stride = max(int(model.stride.max()), 32) # model stride
  278. names = model.module.names if hasattr(model, 'module') else model.names # get class names
  279. model.half() if fp16 else model.float()
  280. self.model = model # explicitly assign for to(), cpu(), cuda(), half()
  281. elif jit: # TorchScript
  282. LOGGER.info(f'Loading {w} for TorchScript inference...')
  283. extra_files = {'config.txt': ''} # model metadata
  284. model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
  285. model.half() if fp16 else model.float()
  286. if extra_files['config.txt']: # load metadata dict
  287. d = json.loads(extra_files['config.txt'],
  288. object_hook=lambda d: {
  289. int(k) if k.isdigit() else k: v
  290. for k, v in d.items()})
  291. stride, names = int(d['stride']), d['names']
  292. elif dnn: # ONNX OpenCV DNN
  293. LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
  294. check_requirements('opencv-python>=4.5.4')
  295. net = cv2.dnn.readNetFromONNX(w)
  296. elif onnx: # ONNX Runtime
  297. LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
  298. check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))
  299. import onnxruntime
  300. providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']
  301. session = onnxruntime.InferenceSession(w, providers=providers)
  302. output_names = [x.name for x in session.get_outputs()]
  303. meta = session.get_modelmeta().custom_metadata_map # metadata
  304. if 'stride' in meta:
  305. stride, names = int(meta['stride']), eval(meta['names'])
  306. elif xml: # OpenVINO
  307. LOGGER.info(f'Loading {w} for OpenVINO inference...')
  308. check_requirements('openvino') # requires openvino-dev: https://pypi.org/project/openvino-dev/
  309. from openvino.runtime import Core, Layout, get_batch
  310. ie = Core()
  311. if not Path(w).is_file(): # if not *.xml
  312. w = next(Path(w).glob('*.xml')) # get *.xml file from *_openvino_model dir
  313. network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))
  314. if network.get_parameters()[0].get_layout().empty:
  315. network.get_parameters()[0].set_layout(Layout('NCHW'))
  316. batch_dim = get_batch(network)
  317. if batch_dim.is_static:
  318. batch_size = batch_dim.get_length()
  319. executable_network = ie.compile_model(network, device_name='CPU') # device_name="MYRIAD" for Intel NCS2
  320. stride, names = self._load_metadata(Path(w).with_suffix('.yaml')) # load metadata
  321. elif engine: # TensorRT
  322. LOGGER.info(f'Loading {w} for TensorRT inference...')
  323. import tensorrt as trt # https://developer.nvidia.com/nvidia-tensorrt-download
  324. check_version(trt.__version__, '7.0.0', hard=True) # require tensorrt>=7.0.0
  325. if device.type == 'cpu':
  326. device = torch.device('cuda:0')
  327. Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
  328. logger = trt.Logger(trt.Logger.INFO)
  329. with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
  330. model = runtime.deserialize_cuda_engine(f.read())
  331. context = model.create_execution_context()
  332. bindings = OrderedDict()
  333. output_names = []
  334. fp16 = False # default updated below
  335. dynamic = False
  336. for i in range(model.num_bindings):
  337. name = model.get_binding_name(i)
  338. dtype = trt.nptype(model.get_binding_dtype(i))
  339. if model.binding_is_input(i):
  340. if -1 in tuple(model.get_binding_shape(i)): # dynamic
  341. dynamic = True
  342. context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))
  343. if dtype == np.float16:
  344. fp16 = True
  345. else: # output
  346. output_names.append(name)
  347. shape = tuple(context.get_binding_shape(i))
  348. im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
  349. bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
  350. binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
  351. batch_size = bindings['images'].shape[0] # if dynamic, this is instead max batch size
  352. elif coreml: # CoreML
  353. LOGGER.info(f'Loading {w} for CoreML inference...')
  354. import coremltools as ct
  355. model = ct.models.MLModel(w)
  356. elif saved_model: # TF SavedModel
  357. LOGGER.info(f'Loading {w} for TensorFlow SavedModel inference...')
  358. import tensorflow as tf
  359. keras = False # assume TF1 saved_model
  360. model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
  361. elif pb: # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
  362. LOGGER.info(f'Loading {w} for TensorFlow GraphDef inference...')
  363. import tensorflow as tf
  364. def wrap_frozen_graph(gd, inputs, outputs):
  365. x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=''), []) # wrapped
  366. ge = x.graph.as_graph_element
  367. return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
  368. def gd_outputs(gd):
  369. name_list, input_list = [], []
  370. for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
  371. name_list.append(node.name)
  372. input_list.extend(node.input)
  373. return sorted(f'{x}:0' for x in list(set(name_list) - set(input_list)) if not x.startswith('NoOp'))
  374. gd = tf.Graph().as_graph_def() # TF GraphDef
  375. with open(w, 'rb') as f:
  376. gd.ParseFromString(f.read())
  377. frozen_func = wrap_frozen_graph(gd, inputs='x:0', outputs=gd_outputs(gd))
  378. elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
  379. try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
  380. from tflite_runtime.interpreter import Interpreter, load_delegate
  381. except ImportError:
  382. import tensorflow as tf
  383. Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,
  384. if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
  385. LOGGER.info(f'Loading {w} for TensorFlow Lite Edge TPU inference...')
  386. delegate = {
  387. 'Linux': 'libedgetpu.so.1',
  388. 'Darwin': 'libedgetpu.1.dylib',
  389. 'Windows': 'edgetpu.dll'}[platform.system()]
  390. interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
  391. else: # TFLite
  392. LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
  393. interpreter = Interpreter(model_path=w) # load TFLite model
  394. interpreter.allocate_tensors() # allocate
  395. input_details = interpreter.get_input_details() # inputs
  396. output_details = interpreter.get_output_details() # outputs
  397. # load metadata
  398. with contextlib.suppress(zipfile.BadZipFile):
  399. with zipfile.ZipFile(w, 'r') as model:
  400. meta_file = model.namelist()[0]
  401. meta = ast.literal_eval(model.read(meta_file).decode('utf-8'))
  402. stride, names = int(meta['stride']), meta['names']
  403. elif tfjs: # TF.js
  404. raise NotImplementedError('ERROR: YOLOv5 TF.js inference is not supported')
  405. elif paddle: # PaddlePaddle
  406. LOGGER.info(f'Loading {w} for PaddlePaddle inference...')
  407. check_requirements('paddlepaddle-gpu' if cuda else 'paddlepaddle')
  408. import paddle.inference as pdi
  409. if not Path(w).is_file(): # if not *.pdmodel
  410. w = next(Path(w).rglob('*.pdmodel')) # get *.pdmodel file from *_paddle_model dir
  411. weights = Path(w).with_suffix('.pdiparams')
  412. config = pdi.Config(str(w), str(weights))
  413. if cuda:
  414. config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
  415. predictor = pdi.create_predictor(config)
  416. input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
  417. output_names = predictor.get_output_names()
  418. elif triton: # NVIDIA Triton Inference Server
  419. LOGGER.info(f'Using {w} as Triton Inference Server...')
  420. check_requirements('tritonclient[all]')
  421. from utils.triton import TritonRemoteModel
  422. model = TritonRemoteModel(url=w)
  423. nhwc = model.runtime.startswith('tensorflow')
  424. else:
  425. raise NotImplementedError(f'ERROR: {w} is not a supported format')
  426. # class names
  427. if 'names' not in locals():
  428. names = yaml_load(data)['names'] if data else {i: f'class{i}' for i in range(999)}
  429. if names[0] == 'n01440764' and len(names) == 1000: # ImageNet
  430. names = yaml_load(ROOT / 'data/ImageNet.yaml')['names'] # human-readable names
  431. self.__dict__.update(locals()) # assign all variables to self
  432. def forward(self, im, augment=False, visualize=False):
  433. # YOLOv5 MultiBackend inference
  434. b, ch, h, w = im.shape # batch, channel, height, width
  435. if self.fp16 and im.dtype != torch.float16:
  436. im = im.half() # to FP16
  437. if self.nhwc:
  438. im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3)
  439. if self.pt: # PyTorch
  440. y = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)
  441. elif self.jit: # TorchScript
  442. y = self.model(im)
  443. elif self.dnn: # ONNX OpenCV DNN
  444. im = im.cpu().numpy() # torch to numpy
  445. self.net.setInput(im)
  446. y = self.net.forward()
  447. elif self.onnx: # ONNX Runtime
  448. im = im.cpu().numpy() # torch to numpy
  449. y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
  450. elif self.xml: # OpenVINO
  451. im = im.cpu().numpy() # FP32
  452. y = list(self.executable_network([im]).values())
  453. elif self.engine: # TensorRT
  454. if self.dynamic and im.shape != self.bindings['images'].shape:
  455. i = self.model.get_binding_index('images')
  456. self.context.set_binding_shape(i, im.shape) # reshape if dynamic
  457. self.bindings['images'] = self.bindings['images']._replace(shape=im.shape)
  458. for name in self.output_names:
  459. i = self.model.get_binding_index(name)
  460. self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
  461. s = self.bindings['images'].shape
  462. assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
  463. self.binding_addrs['images'] = int(im.data_ptr())
  464. self.context.execute_v2(list(self.binding_addrs.values()))
  465. y = [self.bindings[x].data for x in sorted(self.output_names)]
  466. elif self.coreml: # CoreML
  467. im = im.cpu().numpy()
  468. im = Image.fromarray((im[0] * 255).astype('uint8'))
  469. # im = im.resize((192, 320), Image.BILINEAR)
  470. y = self.model.predict({'image': im}) # coordinates are xywh normalized
  471. if 'confidence' in y:
  472. box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels
  473. conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
  474. y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
  475. else:
  476. y = list(reversed(y.values())) # reversed for segmentation models (pred, proto)
  477. elif self.paddle: # PaddlePaddle
  478. im = im.cpu().numpy().astype(np.float32)
  479. self.input_handle.copy_from_cpu(im)
  480. self.predictor.run()
  481. y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
  482. elif self.triton: # NVIDIA Triton Inference Server
  483. y = self.model(im)
  484. else: # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
  485. im = im.cpu().numpy()
  486. if self.saved_model: # SavedModel
  487. y = self.model(im, training=False) if self.keras else self.model(im)
  488. elif self.pb: # GraphDef
  489. y = self.frozen_func(x=self.tf.constant(im))
  490. else: # Lite or Edge TPU
  491. input = self.input_details[0]
  492. int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model
  493. if int8:
  494. scale, zero_point = input['quantization']
  495. im = (im / scale + zero_point).astype(np.uint8) # de-scale
  496. self.interpreter.set_tensor(input['index'], im)
  497. self.interpreter.invoke()
  498. y = []
  499. for output in self.output_details:
  500. x = self.interpreter.get_tensor(output['index'])
  501. if int8:
  502. scale, zero_point = output['quantization']
  503. x = (x.astype(np.float32) - zero_point) * scale # re-scale
  504. y.append(x)
  505. y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
  506. y[0][..., :4] *= [w, h, w, h] # xywh normalized to pixels
  507. if isinstance(y, (list, tuple)):
  508. return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
  509. else:
  510. return self.from_numpy(y)
  511. def from_numpy(self, x):
  512. return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
  513. def warmup(self, imgsz=(1, 3, 640, 640)):
  514. # Warmup model by running inference once
  515. warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
  516. if any(warmup_types) and (self.device.type != 'cpu' or self.triton):
  517. im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
  518. for _ in range(2 if self.jit else 1): #
  519. self.forward(im) # warmup
  520. @staticmethod
  521. def _model_type(p='path/to/model.pt'):
  522. # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
  523. # types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
  524. from export import export_formats
  525. from utils.downloads import is_url
  526. sf = list(export_formats().Suffix) # export suffixes
  527. if not is_url(p, check=False):
  528. check_suffix(p, sf) # checks
  529. url = urlparse(p) # if url may be Triton inference server
  530. types = [s in Path(p).name for s in sf]
  531. types[8] &= not types[9] # tflite &= not edgetpu
  532. triton = not any(types) and all([any(s in url.scheme for s in ['http', 'grpc']), url.netloc])
  533. return types + [triton]
  534. @staticmethod
  535. def _load_metadata(f=Path('path/to/meta.yaml')):
  536. # Load metadata from meta.yaml if it exists
  537. if f.exists():
  538. d = yaml_load(f)
  539. return d['stride'], d['names'] # assign stride, names
  540. return None, None
  541. class AutoShape(nn.Module):
  542. # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
  543. conf = 0.25 # NMS confidence threshold
  544. iou = 0.45 # NMS IoU threshold
  545. agnostic = False # NMS class-agnostic
  546. multi_label = False # NMS multiple labels per box
  547. classes = None # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogs
  548. max_det = 1000 # maximum number of detections per image
  549. amp = False # Automatic Mixed Precision (AMP) inference
  550. def __init__(self, model, verbose=True):
  551. super().__init__()
  552. if verbose:
  553. LOGGER.info('Adding AutoShape... ')
  554. copy_attr(self, model, include=('yaml', 'nc', 'hyp', 'names', 'stride', 'abc'), exclude=()) # copy attributes
  555. self.dmb = isinstance(model, DetectMultiBackend) # DetectMultiBackend() instance
  556. self.pt = not self.dmb or model.pt # PyTorch model
  557. self.model = model.eval()
  558. if self.pt:
  559. m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
  560. m.inplace = False # Detect.inplace=False for safe multithread inference
  561. m.export = True # do not output loss values
  562. def _apply(self, fn):
  563. # Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
  564. self = super()._apply(fn)
  565. if self.pt:
  566. m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
  567. m.stride = fn(m.stride)
  568. m.grid = list(map(fn, m.grid))
  569. if isinstance(m.anchor_grid, list):
  570. m.anchor_grid = list(map(fn, m.anchor_grid))
  571. return self
  572. @smart_inference_mode()
  573. def forward(self, ims, size=640, augment=False, profile=False):
  574. # Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
  575. # file: ims = 'data/images/zidane.jpg' # str or PosixPath
  576. # URI: = 'https://ultralytics.com/images/zidane.jpg'
  577. # OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
  578. # PIL: = Image.open('image.jpg') or ImageGrab.grab() # HWC x(640,1280,3)
  579. # numpy: = np.zeros((640,1280,3)) # HWC
  580. # torch: = torch.zeros(16,3,320,640) # BCHW (scaled to size=640, 0-1 values)
  581. # multiple: = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...] # list of images
  582. dt = (Profile(), Profile(), Profile())
  583. with dt[0]:
  584. if isinstance(size, int): # expand
  585. size = (size, size)
  586. p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device) # param
  587. autocast = self.amp and (p.device.type != 'cpu') # Automatic Mixed Precision (AMP) inference
  588. if isinstance(ims, torch.Tensor): # torch
  589. with amp.autocast(autocast):
  590. return self.model(ims.to(p.device).type_as(p), augment=augment) # inference
  591. # Pre-process
  592. n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims]) # number, list of images
  593. shape0, shape1, files = [], [], [] # image and inference shapes, filenames
  594. for i, im in enumerate(ims):
  595. f = f'image{i}' # filename
  596. if isinstance(im, (str, Path)): # filename or uri
  597. im, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith('http') else im), im
  598. im = np.asarray(exif_transpose(im))
  599. elif isinstance(im, Image.Image): # PIL Image
  600. im, f = np.asarray(exif_transpose(im)), getattr(im, 'filename', f) or f
  601. files.append(Path(f).with_suffix('.jpg').name)
  602. if im.shape[0] < 5: # image in CHW
  603. im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
  604. im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR) # enforce 3ch input
  605. s = im.shape[:2] # HWC
  606. shape0.append(s) # image shape
  607. g = max(size) / max(s) # gain
  608. shape1.append([int(y * g) for y in s])
  609. ims[i] = im if im.data.contiguous else np.ascontiguousarray(im) # update
  610. shape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)] # inf shape
  611. x = [letterbox(im, shape1, auto=False)[0] for im in ims] # pad
  612. x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2))) # stack and BHWC to BCHW
  613. x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32
  614. with amp.autocast(autocast):
  615. # Inference
  616. with dt[1]:
  617. y = self.model(x, augment=augment) # forward
  618. # Post-process
  619. with dt[2]:
  620. y = non_max_suppression(y if self.dmb else y[0],
  621. self.conf,
  622. self.iou,
  623. self.classes,
  624. self.agnostic,
  625. self.multi_label,
  626. max_det=self.max_det) # NMS
  627. for i in range(n):
  628. scale_boxes(shape1, y[i][:, :4], shape0[i])
  629. return Detections(ims, y, files, dt, self.names, x.shape)
  630. class Detections:
  631. # YOLOv5 detections class for inference results
  632. def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
  633. super().__init__()
  634. d = pred[0].device # device
  635. gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
  636. self.ims = ims # list of images as numpy arrays
  637. self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
  638. self.names = names # class names
  639. self.files = files # image filenames
  640. self.times = times # profiling times
  641. self.xyxy = pred # xyxy pixels
  642. self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
  643. self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
  644. self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
  645. self.n = len(self.pred) # number of images (batch size)
  646. self.t = tuple(x.t / self.n * 1E3 for x in times) # timestamps (ms)
  647. self.s = tuple(shape) # inference BCHW shape
  648. def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path('')):
  649. s, crops = '', []
  650. for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
  651. s += f'\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} ' # string
  652. if pred.shape[0]:
  653. for c in pred[:, -1].unique():
  654. n = (pred[:, -1] == c).sum() # detections per class
  655. s += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
  656. s = s.rstrip(', ')
  657. if show or save or render or crop:
  658. annotator = Annotator(im, example=str(self.names))
  659. for *box, conf, cls in reversed(pred): # xyxy, confidence, class
  660. label = f'{self.names[int(cls)]} {conf:.2f}'
  661. if crop:
  662. file = save_dir / 'crops' / self.names[int(cls)] / self.files[i] if save else None
  663. crops.append({
  664. 'box': box,
  665. 'conf': conf,
  666. 'cls': cls,
  667. 'label': label,
  668. 'im': save_one_box(box, im, file=file, save=save)})
  669. else: # all others
  670. annotator.box_label(box, label if labels else '', color=colors(cls))
  671. im = annotator.im
  672. else:
  673. s += '(no detections)'
  674. im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im # from np
  675. if show:
  676. if is_jupyter():
  677. from IPython.display import display
  678. display(im)
  679. else:
  680. im.show(self.files[i])
  681. if save:
  682. f = self.files[i]
  683. im.save(save_dir / f) # save
  684. if i == self.n - 1:
  685. LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")
  686. if render:
  687. self.ims[i] = np.asarray(im)
  688. if pprint:
  689. s = s.lstrip('\n')
  690. return f'{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}' % self.t
  691. if crop:
  692. if save:
  693. LOGGER.info(f'Saved results to {save_dir}\n')
  694. return crops
  695. @TryExcept('Showing images is not supported in this environment')
  696. def show(self, labels=True):
  697. self._run(show=True, labels=labels) # show results
  698. def save(self, labels=True, save_dir='runs/detect/exp', exist_ok=False):
  699. save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
  700. self._run(save=True, labels=labels, save_dir=save_dir) # save results
  701. def crop(self, save=True, save_dir='runs/detect/exp', exist_ok=False):
  702. save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
  703. return self._run(crop=True, save=save, save_dir=save_dir) # crop results
  704. def render(self, labels=True):
  705. self._run(render=True, labels=labels) # render results
  706. return self.ims
  707. def pandas(self):
  708. # return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
  709. new = copy(self) # return copy
  710. ca = 'xmin', 'ymin', 'xmax', 'ymax', 'confidence', 'class', 'name' # xyxy columns
  711. cb = 'xcenter', 'ycenter', 'width', 'height', 'confidence', 'class', 'name' # xywh columns
  712. for k, c in zip(['xyxy', 'xyxyn', 'xywh', 'xywhn'], [ca, ca, cb, cb]):
  713. a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)] # update
  714. setattr(new, k, [pd.DataFrame(x, columns=c) for x in a])
  715. return new
  716. def tolist(self):
  717. # return a list of Detections objects, i.e. 'for result in results.tolist():'
  718. r = range(self.n) # iterable
  719. x = [Detections([self.ims[i]], [self.pred[i]], [self.files[i]], self.times, self.names, self.s) for i in r]
  720. # for d in x:
  721. # for k in ['ims', 'pred', 'xyxy', 'xyxyn', 'xywh', 'xywhn']:
  722. # setattr(d, k, getattr(d, k)[0]) # pop out of list
  723. return x
  724. def print(self):
  725. LOGGER.info(self.__str__())
  726. def __len__(self): # override len(results)
  727. return self.n
  728. def __str__(self): # override print(results)
  729. return self._run(pprint=True) # print results
  730. def __repr__(self):
  731. return f'YOLOv5 {self.__class__} instance\n' + self.__str__()
  732. class Proto(nn.Module):
  733. # YOLOv5 mask Proto module for segmentation models
  734. def __init__(self, c1, c_=256, c2=32): # ch_in, number of protos, number of masks
  735. super().__init__()
  736. self.cv1 = Conv(c1, c_, k=3)
  737. self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
  738. self.cv2 = Conv(c_, c_, k=3)
  739. self.cv3 = Conv(c_, c2)
  740. def forward(self, x):
  741. return self.cv3(self.cv2(self.upsample(self.cv1(x))))
  742. class Classify(nn.Module):
  743. # YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
  744. def __init__(self,
  745. c1,
  746. c2,
  747. k=1,
  748. s=1,
  749. p=None,
  750. g=1,
  751. dropout_p=0.0): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
  752. super().__init__()
  753. c_ = 1280 # efficientnet_b0 size
  754. self.conv = Conv(c1, c_, k, s, autopad(k, p), g)
  755. self.pool = nn.AdaptiveAvgPool2d(1) # to x(b,c_,1,1)
  756. self.drop = nn.Dropout(p=dropout_p, inplace=True)
  757. self.linear = nn.Linear(c_, c2) # to x(b,c2)
  758. def forward(self, x):
  759. if isinstance(x, list):
  760. x = torch.cat(x, 1)
  761. return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))