tf.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
  2. """
  3. TensorFlow, Keras and TFLite versions of YOLOv5
  4. Authored by https://github.com/zldrobit in PR https://github.com/ultralytics/yolov5/pull/1127
  5. Usage:
  6. $ python models/tf.py --weights yolov5s.pt
  7. Export:
  8. $ python export.py --weights yolov5s.pt --include saved_model pb tflite tfjs
  9. """
  10. import argparse
  11. import sys
  12. from copy import deepcopy
  13. from pathlib import Path
  14. FILE = Path(__file__).resolve()
  15. ROOT = FILE.parents[1] # YOLOv5 root directory
  16. if str(ROOT) not in sys.path:
  17. sys.path.append(str(ROOT)) # add ROOT to PATH
  18. # ROOT = ROOT.relative_to(Path.cwd()) # relative
  19. import numpy as np
  20. import tensorflow as tf
  21. import torch
  22. import torch.nn as nn
  23. from tensorflow import keras
  24. from models.common import (C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv,
  25. DWConvTranspose2d, Focus, autopad)
  26. from models.experimental import MixConv2d, attempt_load
  27. from models.yolo import Detect, Segment
  28. from utils.activations import SiLU
  29. from utils.general import LOGGER, make_divisible, print_args
  30. class TFBN(keras.layers.Layer):
  31. # TensorFlow BatchNormalization wrapper
  32. def __init__(self, w=None):
  33. super().__init__()
  34. self.bn = keras.layers.BatchNormalization(
  35. beta_initializer=keras.initializers.Constant(w.bias.numpy()),
  36. gamma_initializer=keras.initializers.Constant(w.weight.numpy()),
  37. moving_mean_initializer=keras.initializers.Constant(w.running_mean.numpy()),
  38. moving_variance_initializer=keras.initializers.Constant(w.running_var.numpy()),
  39. epsilon=w.eps)
  40. def call(self, inputs):
  41. return self.bn(inputs)
  42. class TFPad(keras.layers.Layer):
  43. # Pad inputs in spatial dimensions 1 and 2
  44. def __init__(self, pad):
  45. super().__init__()
  46. if isinstance(pad, int):
  47. self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
  48. else: # tuple/list
  49. self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
  50. def call(self, inputs):
  51. return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
  52. class TFConv(keras.layers.Layer):
  53. # Standard convolution
  54. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  55. # ch_in, ch_out, weights, kernel, stride, padding, groups
  56. super().__init__()
  57. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  58. # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
  59. # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
  60. conv = keras.layers.Conv2D(
  61. filters=c2,
  62. kernel_size=k,
  63. strides=s,
  64. padding='SAME' if s == 1 else 'VALID',
  65. use_bias=not hasattr(w, 'bn'),
  66. kernel_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
  67. bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
  68. self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
  69. self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
  70. self.act = activations(w.act) if act else tf.identity
  71. def call(self, inputs):
  72. return self.act(self.bn(self.conv(inputs)))
  73. class TFDWConv(keras.layers.Layer):
  74. # Depthwise convolution
  75. def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
  76. # ch_in, ch_out, weights, kernel, stride, padding, groups
  77. super().__init__()
  78. assert c2 % c1 == 0, f'TFDWConv() output={c2} must be a multiple of input={c1} channels'
  79. conv = keras.layers.DepthwiseConv2D(
  80. kernel_size=k,
  81. depth_multiplier=c2 // c1,
  82. strides=s,
  83. padding='SAME' if s == 1 else 'VALID',
  84. use_bias=not hasattr(w, 'bn'),
  85. depthwise_initializer=keras.initializers.Constant(w.conv.weight.permute(2, 3, 1, 0).numpy()),
  86. bias_initializer='zeros' if hasattr(w, 'bn') else keras.initializers.Constant(w.conv.bias.numpy()))
  87. self.conv = conv if s == 1 else keras.Sequential([TFPad(autopad(k, p)), conv])
  88. self.bn = TFBN(w.bn) if hasattr(w, 'bn') else tf.identity
  89. self.act = activations(w.act) if act else tf.identity
  90. def call(self, inputs):
  91. return self.act(self.bn(self.conv(inputs)))
  92. class TFDWConvTranspose2d(keras.layers.Layer):
  93. # Depthwise ConvTranspose2d
  94. def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
  95. # ch_in, ch_out, weights, kernel, stride, padding, groups
  96. super().__init__()
  97. assert c1 == c2, f'TFDWConv() output={c2} must be equal to input={c1} channels'
  98. assert k == 4 and p1 == 1, 'TFDWConv() only valid for k=4 and p1=1'
  99. weight, bias = w.weight.permute(2, 3, 1, 0).numpy(), w.bias.numpy()
  100. self.c1 = c1
  101. self.conv = [
  102. keras.layers.Conv2DTranspose(filters=1,
  103. kernel_size=k,
  104. strides=s,
  105. padding='VALID',
  106. output_padding=p2,
  107. use_bias=True,
  108. kernel_initializer=keras.initializers.Constant(weight[..., i:i + 1]),
  109. bias_initializer=keras.initializers.Constant(bias[i])) for i in range(c1)]
  110. def call(self, inputs):
  111. return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
  112. class TFFocus(keras.layers.Layer):
  113. # Focus wh information into c-space
  114. def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
  115. # ch_in, ch_out, kernel, stride, padding, groups
  116. super().__init__()
  117. self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
  118. def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c)
  119. # inputs = inputs / 255 # normalize 0-255 to 0-1
  120. inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
  121. return self.conv(tf.concat(inputs, 3))
  122. class TFBottleneck(keras.layers.Layer):
  123. # Standard bottleneck
  124. def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None): # ch_in, ch_out, shortcut, groups, expansion
  125. super().__init__()
  126. c_ = int(c2 * e) # hidden channels
  127. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  128. self.cv2 = TFConv(c_, c2, 3, 1, g=g, w=w.cv2)
  129. self.add = shortcut and c1 == c2
  130. def call(self, inputs):
  131. return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
  132. class TFCrossConv(keras.layers.Layer):
  133. # Cross Convolution
  134. def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
  135. super().__init__()
  136. c_ = int(c2 * e) # hidden channels
  137. self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
  138. self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
  139. self.add = shortcut and c1 == c2
  140. def call(self, inputs):
  141. return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
  142. class TFConv2d(keras.layers.Layer):
  143. # Substitution for PyTorch nn.Conv2D
  144. def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
  145. super().__init__()
  146. assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
  147. self.conv = keras.layers.Conv2D(filters=c2,
  148. kernel_size=k,
  149. strides=s,
  150. padding='VALID',
  151. use_bias=bias,
  152. kernel_initializer=keras.initializers.Constant(
  153. w.weight.permute(2, 3, 1, 0).numpy()),
  154. bias_initializer=keras.initializers.Constant(w.bias.numpy()) if bias else None)
  155. def call(self, inputs):
  156. return self.conv(inputs)
  157. class TFBottleneckCSP(keras.layers.Layer):
  158. # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
  159. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  160. # ch_in, ch_out, number, shortcut, groups, expansion
  161. super().__init__()
  162. c_ = int(c2 * e) # hidden channels
  163. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  164. self.cv2 = TFConv2d(c1, c_, 1, 1, bias=False, w=w.cv2)
  165. self.cv3 = TFConv2d(c_, c_, 1, 1, bias=False, w=w.cv3)
  166. self.cv4 = TFConv(2 * c_, c2, 1, 1, w=w.cv4)
  167. self.bn = TFBN(w.bn)
  168. self.act = lambda x: keras.activations.swish(x)
  169. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  170. def call(self, inputs):
  171. y1 = self.cv3(self.m(self.cv1(inputs)))
  172. y2 = self.cv2(inputs)
  173. return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
  174. class TFC3(keras.layers.Layer):
  175. # CSP Bottleneck with 3 convolutions
  176. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  177. # ch_in, ch_out, number, shortcut, groups, expansion
  178. super().__init__()
  179. c_ = int(c2 * e) # hidden channels
  180. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  181. self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
  182. self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
  183. self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
  184. def call(self, inputs):
  185. return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
  186. class TFC3x(keras.layers.Layer):
  187. # 3 module with cross-convolutions
  188. def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
  189. # ch_in, ch_out, number, shortcut, groups, expansion
  190. super().__init__()
  191. c_ = int(c2 * e) # hidden channels
  192. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  193. self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
  194. self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
  195. self.m = keras.Sequential([
  196. TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
  197. def call(self, inputs):
  198. return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
  199. class TFSPP(keras.layers.Layer):
  200. # Spatial pyramid pooling layer used in YOLOv3-SPP
  201. def __init__(self, c1, c2, k=(5, 9, 13), w=None):
  202. super().__init__()
  203. c_ = c1 // 2 # hidden channels
  204. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  205. self.cv2 = TFConv(c_ * (len(k) + 1), c2, 1, 1, w=w.cv2)
  206. self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding='SAME') for x in k]
  207. def call(self, inputs):
  208. x = self.cv1(inputs)
  209. return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
  210. class TFSPPF(keras.layers.Layer):
  211. # Spatial pyramid pooling-Fast layer
  212. def __init__(self, c1, c2, k=5, w=None):
  213. super().__init__()
  214. c_ = c1 // 2 # hidden channels
  215. self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
  216. self.cv2 = TFConv(c_ * 4, c2, 1, 1, w=w.cv2)
  217. self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding='SAME')
  218. def call(self, inputs):
  219. x = self.cv1(inputs)
  220. y1 = self.m(x)
  221. y2 = self.m(y1)
  222. return self.cv2(tf.concat([x, y1, y2, self.m(y2)], 3))
  223. class TFDetect(keras.layers.Layer):
  224. # TF YOLOv5 Detect layer
  225. def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None): # detection layer
  226. super().__init__()
  227. self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
  228. self.nc = nc # number of classes
  229. self.no = nc + 5 # number of outputs per anchor
  230. self.nl = len(anchors) # number of detection layers
  231. self.na = len(anchors[0]) // 2 # number of anchors
  232. self.grid = [tf.zeros(1)] * self.nl # init grid
  233. self.anchors = tf.convert_to_tensor(w.anchors.numpy(), dtype=tf.float32)
  234. self.anchor_grid = tf.reshape(self.anchors * tf.reshape(self.stride, [self.nl, 1, 1]), [self.nl, 1, -1, 1, 2])
  235. self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)]
  236. self.training = False # set to False after building model
  237. self.imgsz = imgsz
  238. for i in range(self.nl):
  239. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  240. self.grid[i] = self._make_grid(nx, ny)
  241. def call(self, inputs):
  242. z = [] # inference output
  243. x = []
  244. for i in range(self.nl):
  245. x.append(self.m[i](inputs[i]))
  246. # x(bs,20,20,255) to x(bs,3,20,20,85)
  247. ny, nx = self.imgsz[0] // self.stride[i], self.imgsz[1] // self.stride[i]
  248. x[i] = tf.reshape(x[i], [-1, ny * nx, self.na, self.no])
  249. if not self.training: # inference
  250. y = x[i]
  251. grid = tf.transpose(self.grid[i], [0, 2, 1, 3]) - 0.5
  252. anchor_grid = tf.transpose(self.anchor_grid[i], [0, 2, 1, 3]) * 4
  253. xy = (tf.sigmoid(y[..., 0:2]) * 2 + grid) * self.stride[i] # xy
  254. wh = tf.sigmoid(y[..., 2:4]) ** 2 * anchor_grid
  255. # Normalize xywh to 0-1 to reduce calibration error
  256. xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  257. wh /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
  258. y = tf.concat([xy, wh, tf.sigmoid(y[..., 4:5 + self.nc]), y[..., 5 + self.nc:]], -1)
  259. z.append(tf.reshape(y, [-1, self.na * ny * nx, self.no]))
  260. return tf.transpose(x, [0, 2, 1, 3]) if self.training else (tf.concat(z, 1), )
  261. @staticmethod
  262. def _make_grid(nx=20, ny=20):
  263. # yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
  264. # return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
  265. xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
  266. return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
  267. class TFSegment(TFDetect):
  268. # YOLOv5 Segment head for segmentation models
  269. def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
  270. super().__init__(nc, anchors, ch, imgsz, w)
  271. self.nm = nm # number of masks
  272. self.npr = npr # number of protos
  273. self.no = 5 + nc + self.nm # number of outputs per anchor
  274. self.m = [TFConv2d(x, self.no * self.na, 1, w=w.m[i]) for i, x in enumerate(ch)] # output conv
  275. self.proto = TFProto(ch[0], self.npr, self.nm, w=w.proto) # protos
  276. self.detect = TFDetect.call
  277. def call(self, x):
  278. p = self.proto(x[0])
  279. # p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
  280. p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
  281. x = self.detect(self, x)
  282. return (x, p) if self.training else (x[0], p)
  283. class TFProto(keras.layers.Layer):
  284. def __init__(self, c1, c_=256, c2=32, w=None):
  285. super().__init__()
  286. self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
  287. self.upsample = TFUpsample(None, scale_factor=2, mode='nearest')
  288. self.cv2 = TFConv(c_, c_, k=3, w=w.cv2)
  289. self.cv3 = TFConv(c_, c2, w=w.cv3)
  290. def call(self, inputs):
  291. return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
  292. class TFUpsample(keras.layers.Layer):
  293. # TF version of torch.nn.Upsample()
  294. def __init__(self, size, scale_factor, mode, w=None): # warning: all arguments needed including 'w'
  295. super().__init__()
  296. assert scale_factor % 2 == 0, 'scale_factor must be multiple of 2'
  297. self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
  298. # self.upsample = keras.layers.UpSampling2D(size=scale_factor, interpolation=mode)
  299. # with default arguments: align_corners=False, half_pixel_centers=False
  300. # self.upsample = lambda x: tf.raw_ops.ResizeNearestNeighbor(images=x,
  301. # size=(x.shape[1] * 2, x.shape[2] * 2))
  302. def call(self, inputs):
  303. return self.upsample(inputs)
  304. class TFConcat(keras.layers.Layer):
  305. # TF version of torch.concat()
  306. def __init__(self, dimension=1, w=None):
  307. super().__init__()
  308. assert dimension == 1, 'convert only NCHW to NHWC concat'
  309. self.d = 3
  310. def call(self, inputs):
  311. return tf.concat(inputs, self.d)
  312. def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
  313. LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
  314. anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
  315. na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
  316. no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
  317. layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
  318. for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
  319. m_str = m
  320. m = eval(m) if isinstance(m, str) else m # eval strings
  321. for j, a in enumerate(args):
  322. try:
  323. args[j] = eval(a) if isinstance(a, str) else a # eval strings
  324. except NameError:
  325. pass
  326. n = max(round(n * gd), 1) if n > 1 else n # depth gain
  327. if m in [
  328. nn.Conv2d, Conv, DWConv, DWConvTranspose2d, Bottleneck, SPP, SPPF, MixConv2d, Focus, CrossConv,
  329. BottleneckCSP, C3, C3x]:
  330. c1, c2 = ch[f], args[0]
  331. c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
  332. args = [c1, c2, *args[1:]]
  333. if m in [BottleneckCSP, C3, C3x]:
  334. args.insert(2, n)
  335. n = 1
  336. elif m is nn.BatchNorm2d:
  337. args = [ch[f]]
  338. elif m is Concat:
  339. c2 = sum(ch[-1 if x == -1 else x + 1] for x in f)
  340. elif m in [Detect, Segment]:
  341. args.append([ch[x + 1] for x in f])
  342. if isinstance(args[1], int): # number of anchors
  343. args[1] = [list(range(args[1] * 2))] * len(f)
  344. if m is Segment:
  345. args[3] = make_divisible(args[3] * gw, 8)
  346. args.append(imgsz)
  347. else:
  348. c2 = ch[f]
  349. tf_m = eval('TF' + m_str.replace('nn.', ''))
  350. m_ = keras.Sequential([tf_m(*args, w=model.model[i][j]) for j in range(n)]) if n > 1 \
  351. else tf_m(*args, w=model.model[i]) # module
  352. torch_m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
  353. t = str(m)[8:-2].replace('__main__.', '') # module type
  354. np = sum(x.numel() for x in torch_m_.parameters()) # number params
  355. m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
  356. LOGGER.info(f'{i:>3}{str(f):>18}{str(n):>3}{np:>10} {t:<40}{str(args):<30}') # print
  357. save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
  358. layers.append(m_)
  359. ch.append(c2)
  360. return keras.Sequential(layers), sorted(save)
  361. class TFModel:
  362. # TF YOLOv5 model
  363. def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, model=None, imgsz=(640, 640)): # model, channels, classes
  364. super().__init__()
  365. if isinstance(cfg, dict):
  366. self.yaml = cfg # model dict
  367. else: # is *.yaml
  368. import yaml # for torch hub
  369. self.yaml_file = Path(cfg).name
  370. with open(cfg) as f:
  371. self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
  372. # Define model
  373. if nc and nc != self.yaml['nc']:
  374. LOGGER.info(f"Overriding {cfg} nc={self.yaml['nc']} with nc={nc}")
  375. self.yaml['nc'] = nc # override yaml value
  376. self.model, self.savelist = parse_model(deepcopy(self.yaml), ch=[ch], model=model, imgsz=imgsz)
  377. def predict(self,
  378. inputs,
  379. tf_nms=False,
  380. agnostic_nms=False,
  381. topk_per_class=100,
  382. topk_all=100,
  383. iou_thres=0.45,
  384. conf_thres=0.25):
  385. y = [] # outputs
  386. x = inputs
  387. for m in self.model.layers:
  388. if m.f != -1: # if not from previous layer
  389. x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
  390. x = m(x) # run
  391. y.append(x if m.i in self.savelist else None) # save output
  392. # Add TensorFlow NMS
  393. if tf_nms:
  394. boxes = self._xywh2xyxy(x[0][..., :4])
  395. probs = x[0][:, :, 4:5]
  396. classes = x[0][:, :, 5:]
  397. scores = probs * classes
  398. if agnostic_nms:
  399. nms = AgnosticNMS()((boxes, classes, scores), topk_all, iou_thres, conf_thres)
  400. else:
  401. boxes = tf.expand_dims(boxes, 2)
  402. nms = tf.image.combined_non_max_suppression(boxes,
  403. scores,
  404. topk_per_class,
  405. topk_all,
  406. iou_thres,
  407. conf_thres,
  408. clip_boxes=False)
  409. return (nms, )
  410. return x # output [1,6300,85] = [xywh, conf, class0, class1, ...]
  411. # x = x[0] # [x(1,6300,85), ...] to x(6300,85)
  412. # xywh = x[..., :4] # x(6300,4) boxes
  413. # conf = x[..., 4:5] # x(6300,1) confidences
  414. # cls = tf.reshape(tf.cast(tf.argmax(x[..., 5:], axis=1), tf.float32), (-1, 1)) # x(6300,1) classes
  415. # return tf.concat([conf, cls, xywh], 1)
  416. @staticmethod
  417. def _xywh2xyxy(xywh):
  418. # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
  419. x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
  420. return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
  421. class AgnosticNMS(keras.layers.Layer):
  422. # TF Agnostic NMS
  423. def call(self, input, topk_all, iou_thres, conf_thres):
  424. # wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
  425. return tf.map_fn(lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
  426. input,
  427. fn_output_signature=(tf.float32, tf.float32, tf.float32, tf.int32),
  428. name='agnostic_nms')
  429. @staticmethod
  430. def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25): # agnostic NMS
  431. boxes, classes, scores = x
  432. class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
  433. scores_inp = tf.reduce_max(scores, -1)
  434. selected_inds = tf.image.non_max_suppression(boxes,
  435. scores_inp,
  436. max_output_size=topk_all,
  437. iou_threshold=iou_thres,
  438. score_threshold=conf_thres)
  439. selected_boxes = tf.gather(boxes, selected_inds)
  440. padded_boxes = tf.pad(selected_boxes,
  441. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]], [0, 0]],
  442. mode='CONSTANT',
  443. constant_values=0.0)
  444. selected_scores = tf.gather(scores_inp, selected_inds)
  445. padded_scores = tf.pad(selected_scores,
  446. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  447. mode='CONSTANT',
  448. constant_values=-1.0)
  449. selected_classes = tf.gather(class_inds, selected_inds)
  450. padded_classes = tf.pad(selected_classes,
  451. paddings=[[0, topk_all - tf.shape(selected_boxes)[0]]],
  452. mode='CONSTANT',
  453. constant_values=-1.0)
  454. valid_detections = tf.shape(selected_inds)[0]
  455. return padded_boxes, padded_scores, padded_classes, valid_detections
  456. def activations(act=nn.SiLU):
  457. # Returns TF activation from input PyTorch activation
  458. if isinstance(act, nn.LeakyReLU):
  459. return lambda x: keras.activations.relu(x, alpha=0.1)
  460. elif isinstance(act, nn.Hardswish):
  461. return lambda x: x * tf.nn.relu6(x + 3) * 0.166666667
  462. elif isinstance(act, (nn.SiLU, SiLU)):
  463. return lambda x: keras.activations.swish(x)
  464. else:
  465. raise Exception(f'no matching TensorFlow activation found for PyTorch activation {act}')
  466. def representative_dataset_gen(dataset, ncalib=100):
  467. # Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
  468. for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
  469. im = np.transpose(img, [1, 2, 0])
  470. im = np.expand_dims(im, axis=0).astype(np.float32)
  471. im /= 255
  472. yield [im]
  473. if n >= ncalib:
  474. break
  475. def run(
  476. weights=ROOT / 'yolov5s.pt', # weights path
  477. imgsz=(640, 640), # inference size h,w
  478. batch_size=1, # batch size
  479. dynamic=False, # dynamic batch size
  480. ):
  481. # PyTorch model
  482. im = torch.zeros((batch_size, 3, *imgsz)) # BCHW image
  483. model = attempt_load(weights, device=torch.device('cpu'), inplace=True, fuse=False)
  484. _ = model(im) # inference
  485. model.info()
  486. # TensorFlow model
  487. im = tf.zeros((batch_size, *imgsz, 3)) # BHWC image
  488. tf_model = TFModel(cfg=model.yaml, model=model, nc=model.nc, imgsz=imgsz)
  489. _ = tf_model.predict(im) # inference
  490. # Keras model
  491. im = keras.Input(shape=(*imgsz, 3), batch_size=None if dynamic else batch_size)
  492. keras_model = keras.Model(inputs=im, outputs=tf_model.predict(im))
  493. keras_model.summary()
  494. LOGGER.info('PyTorch, TensorFlow and Keras models successfully verified.\nUse export.py for TF model export.')
  495. def parse_opt():
  496. parser = argparse.ArgumentParser()
  497. parser.add_argument('--weights', type=str, default=ROOT / 'yolov5s.pt', help='weights path')
  498. parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
  499. parser.add_argument('--batch-size', type=int, default=1, help='batch size')
  500. parser.add_argument('--dynamic', action='store_true', help='dynamic batch size')
  501. opt = parser.parse_args()
  502. opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand
  503. print_args(vars(opt))
  504. return opt
  505. def main(opt):
  506. run(**vars(opt))
  507. if __name__ == '__main__':
  508. opt = parse_opt()
  509. main(opt)