__init__.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
  2. """
  3. Logging utils
  4. """
  5. import os
  6. import warnings
  7. from pathlib import Path
  8. import pkg_resources as pkg
  9. import torch
  10. from utils.general import LOGGER, colorstr, cv2
  11. from utils.loggers.clearml.clearml_utils import ClearmlLogger
  12. from utils.loggers.wandb.wandb_utils import WandbLogger
  13. from utils.plots import plot_images, plot_labels, plot_results
  14. from utils.torch_utils import de_parallel
  15. LOGGERS = ('csv', 'tb', 'wandb', 'clearml', 'comet') # *.csv, TensorBoard, Weights & Biases, ClearML
  16. RANK = int(os.getenv('RANK', -1))
  17. try:
  18. from torch.utils.tensorboard import SummaryWriter
  19. except ImportError:
  20. SummaryWriter = lambda *args: None # None = SummaryWriter(str)
  21. try:
  22. import wandb
  23. assert hasattr(wandb, '__version__') # verify package import not local dir
  24. if pkg.parse_version(wandb.__version__) >= pkg.parse_version('0.12.2') and RANK in {0, -1}:
  25. try:
  26. wandb_login_success = wandb.login(timeout=30)
  27. except wandb.errors.UsageError: # known non-TTY terminal issue
  28. wandb_login_success = False
  29. if not wandb_login_success:
  30. wandb = None
  31. except (ImportError, AssertionError):
  32. wandb = None
  33. try:
  34. import clearml
  35. assert hasattr(clearml, '__version__') # verify package import not local dir
  36. except (ImportError, AssertionError):
  37. clearml = None
  38. try:
  39. if RANK in {0, -1}:
  40. import comet_ml
  41. assert hasattr(comet_ml, '__version__') # verify package import not local dir
  42. from utils.loggers.comet import CometLogger
  43. else:
  44. comet_ml = None
  45. except (ImportError, AssertionError):
  46. comet_ml = None
  47. class Loggers():
  48. # YOLOv5 Loggers class
  49. def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
  50. self.save_dir = save_dir
  51. self.weights = weights
  52. self.opt = opt
  53. self.hyp = hyp
  54. self.plots = not opt.noplots # plot results
  55. self.logger = logger # for printing results to console
  56. self.include = include
  57. self.keys = [
  58. 'train/box_loss',
  59. 'train/obj_loss',
  60. 'train/cls_loss', # train loss
  61. 'metrics/precision',
  62. 'metrics/recall',
  63. 'metrics/mAP_0.5',
  64. 'metrics/mAP_0.5:0.95', # metrics
  65. 'val/box_loss',
  66. 'val/obj_loss',
  67. 'val/cls_loss', # val loss
  68. 'x/lr0',
  69. 'x/lr1',
  70. 'x/lr2'] # params
  71. self.best_keys = ['best/epoch', 'best/precision', 'best/recall', 'best/mAP_0.5', 'best/mAP_0.5:0.95']
  72. for k in LOGGERS:
  73. setattr(self, k, None) # init empty logger dictionary
  74. self.csv = True # always log to csv
  75. # Messages
  76. if not comet_ml:
  77. prefix = colorstr('Comet: ')
  78. s = f"{prefix}run 'pip install comet_ml' to automatically track and visualize YOLOv5 🚀 runs in Comet"
  79. self.logger.info(s)
  80. # TensorBoard
  81. s = self.save_dir
  82. if 'tb' in self.include and not self.opt.evolve:
  83. prefix = colorstr('TensorBoard: ')
  84. self.logger.info(f"{prefix}Start with 'tensorboard --logdir {s.parent}', view at http://localhost:6006/")
  85. self.tb = SummaryWriter(str(s))
  86. # W&B
  87. if wandb and 'wandb' in self.include:
  88. self.opt.hyp = self.hyp # add hyperparameters
  89. self.wandb = WandbLogger(self.opt)
  90. else:
  91. self.wandb = None
  92. # ClearML
  93. if clearml and 'clearml' in self.include:
  94. try:
  95. self.clearml = ClearmlLogger(self.opt, self.hyp)
  96. except Exception:
  97. self.clearml = None
  98. prefix = colorstr('ClearML: ')
  99. LOGGER.warning(f'{prefix}WARNING ⚠️ ClearML is installed but not configured, skipping ClearML logging.'
  100. f' See https://docs.ultralytics.com/yolov5/tutorials/clearml_logging_integration#readme')
  101. else:
  102. self.clearml = None
  103. # Comet
  104. if comet_ml and 'comet' in self.include:
  105. if isinstance(self.opt.resume, str) and self.opt.resume.startswith('comet://'):
  106. run_id = self.opt.resume.split('/')[-1]
  107. self.comet_logger = CometLogger(self.opt, self.hyp, run_id=run_id)
  108. else:
  109. self.comet_logger = CometLogger(self.opt, self.hyp)
  110. else:
  111. self.comet_logger = None
  112. @property
  113. def remote_dataset(self):
  114. # Get data_dict if custom dataset artifact link is provided
  115. data_dict = None
  116. if self.clearml:
  117. data_dict = self.clearml.data_dict
  118. if self.wandb:
  119. data_dict = self.wandb.data_dict
  120. if self.comet_logger:
  121. data_dict = self.comet_logger.data_dict
  122. return data_dict
  123. def on_train_start(self):
  124. if self.comet_logger:
  125. self.comet_logger.on_train_start()
  126. def on_pretrain_routine_start(self):
  127. if self.comet_logger:
  128. self.comet_logger.on_pretrain_routine_start()
  129. def on_pretrain_routine_end(self, labels, names):
  130. # Callback runs on pre-train routine end
  131. if self.plots:
  132. plot_labels(labels, names, self.save_dir)
  133. paths = self.save_dir.glob('*labels*.jpg') # training labels
  134. if self.wandb:
  135. self.wandb.log({'Labels': [wandb.Image(str(x), caption=x.name) for x in paths]})
  136. # if self.clearml:
  137. # pass # ClearML saves these images automatically using hooks
  138. if self.comet_logger:
  139. self.comet_logger.on_pretrain_routine_end(paths)
  140. def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
  141. log_dict = dict(zip(self.keys[:3], vals))
  142. # Callback runs on train batch end
  143. # ni: number integrated batches (since train start)
  144. if self.plots:
  145. if ni < 3:
  146. f = self.save_dir / f'train_batch{ni}.jpg' # filename
  147. plot_images(imgs, targets, paths, f)
  148. if ni == 0 and self.tb and not self.opt.sync_bn:
  149. log_tensorboard_graph(self.tb, model, imgsz=(self.opt.imgsz, self.opt.imgsz))
  150. if ni == 10 and (self.wandb or self.clearml):
  151. files = sorted(self.save_dir.glob('train*.jpg'))
  152. if self.wandb:
  153. self.wandb.log({'Mosaics': [wandb.Image(str(f), caption=f.name) for f in files if f.exists()]})
  154. if self.clearml:
  155. self.clearml.log_debug_samples(files, title='Mosaics')
  156. if self.comet_logger:
  157. self.comet_logger.on_train_batch_end(log_dict, step=ni)
  158. def on_train_epoch_end(self, epoch):
  159. # Callback runs on train epoch end
  160. if self.wandb:
  161. self.wandb.current_epoch = epoch + 1
  162. if self.comet_logger:
  163. self.comet_logger.on_train_epoch_end(epoch)
  164. def on_val_start(self):
  165. if self.comet_logger:
  166. self.comet_logger.on_val_start()
  167. def on_val_image_end(self, pred, predn, path, names, im):
  168. # Callback runs on val image end
  169. if self.wandb:
  170. self.wandb.val_one_image(pred, predn, path, names, im)
  171. if self.clearml:
  172. self.clearml.log_image_with_boxes(path, pred, names, im)
  173. def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
  174. if self.comet_logger:
  175. self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
  176. def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
  177. # Callback runs on val end
  178. if self.wandb or self.clearml:
  179. files = sorted(self.save_dir.glob('val*.jpg'))
  180. if self.wandb:
  181. self.wandb.log({'Validation': [wandb.Image(str(f), caption=f.name) for f in files]})
  182. if self.clearml:
  183. self.clearml.log_debug_samples(files, title='Validation')
  184. if self.comet_logger:
  185. self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
  186. def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
  187. # Callback runs at the end of each fit (train+val) epoch
  188. x = dict(zip(self.keys, vals))
  189. if self.csv:
  190. file = self.save_dir / 'results.csv'
  191. n = len(x) + 1 # number of cols
  192. s = '' if file.exists() else (('%20s,' * n % tuple(['epoch'] + self.keys)).rstrip(',') + '\n') # add header
  193. with open(file, 'a') as f:
  194. f.write(s + ('%20.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
  195. if self.tb:
  196. for k, v in x.items():
  197. self.tb.add_scalar(k, v, epoch)
  198. elif self.clearml: # log to ClearML if TensorBoard not used
  199. for k, v in x.items():
  200. title, series = k.split('/')
  201. self.clearml.task.get_logger().report_scalar(title, series, v, epoch)
  202. if self.wandb:
  203. if best_fitness == fi:
  204. best_results = [epoch] + vals[3:7]
  205. for i, name in enumerate(self.best_keys):
  206. self.wandb.wandb_run.summary[name] = best_results[i] # log best results in the summary
  207. self.wandb.log(x)
  208. self.wandb.end_epoch()
  209. if self.clearml:
  210. self.clearml.current_epoch_logged_images = set() # reset epoch image limit
  211. self.clearml.current_epoch += 1
  212. if self.comet_logger:
  213. self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
  214. def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
  215. # Callback runs on model save event
  216. if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
  217. if self.wandb:
  218. self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
  219. if self.clearml:
  220. self.clearml.task.update_output_model(model_path=str(last),
  221. model_name='Latest Model',
  222. auto_delete_file=False)
  223. if self.comet_logger:
  224. self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
  225. def on_train_end(self, last, best, epoch, results):
  226. # Callback runs on training end, i.e. saving best model
  227. if self.plots:
  228. plot_results(file=self.save_dir / 'results.csv') # save results.png
  229. files = ['results.png', 'confusion_matrix.png', *(f'{x}_curve.png' for x in ('F1', 'PR', 'P', 'R'))]
  230. files = [(self.save_dir / f) for f in files if (self.save_dir / f).exists()] # filter
  231. self.logger.info(f"Results saved to {colorstr('bold', self.save_dir)}")
  232. if self.tb and not self.clearml: # These images are already captured by ClearML by now, we don't want doubles
  233. for f in files:
  234. self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
  235. if self.wandb:
  236. self.wandb.log(dict(zip(self.keys[3:10], results)))
  237. self.wandb.log({'Results': [wandb.Image(str(f), caption=f.name) for f in files]})
  238. # Calling wandb.log. TODO: Refactor this into WandbLogger.log_model
  239. if not self.opt.evolve:
  240. wandb.log_artifact(str(best if best.exists() else last),
  241. type='model',
  242. name=f'run_{self.wandb.wandb_run.id}_model',
  243. aliases=['latest', 'best', 'stripped'])
  244. self.wandb.finish_run()
  245. if self.clearml and not self.opt.evolve:
  246. self.clearml.task.update_output_model(model_path=str(best if best.exists() else last),
  247. name='Best Model',
  248. auto_delete_file=False)
  249. if self.comet_logger:
  250. final_results = dict(zip(self.keys[3:10], results))
  251. self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
  252. def on_params_update(self, params: dict):
  253. # Update hyperparams or configs of the experiment
  254. if self.wandb:
  255. self.wandb.wandb_run.config.update(params, allow_val_change=True)
  256. if self.comet_logger:
  257. self.comet_logger.on_params_update(params)
  258. class GenericLogger:
  259. """
  260. YOLOv5 General purpose logger for non-task specific logging
  261. Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
  262. Arguments
  263. opt: Run arguments
  264. console_logger: Console logger
  265. include: loggers to include
  266. """
  267. def __init__(self, opt, console_logger, include=('tb', 'wandb')):
  268. # init default loggers
  269. self.save_dir = Path(opt.save_dir)
  270. self.include = include
  271. self.console_logger = console_logger
  272. self.csv = self.save_dir / 'results.csv' # CSV logger
  273. if 'tb' in self.include:
  274. prefix = colorstr('TensorBoard: ')
  275. self.console_logger.info(
  276. f"{prefix}Start with 'tensorboard --logdir {self.save_dir.parent}', view at http://localhost:6006/")
  277. self.tb = SummaryWriter(str(self.save_dir))
  278. if wandb and 'wandb' in self.include:
  279. self.wandb = wandb.init(project=web_project_name(str(opt.project)),
  280. name=None if opt.name == 'exp' else opt.name,
  281. config=opt)
  282. else:
  283. self.wandb = None
  284. def log_metrics(self, metrics, epoch):
  285. # Log metrics dictionary to all loggers
  286. if self.csv:
  287. keys, vals = list(metrics.keys()), list(metrics.values())
  288. n = len(metrics) + 1 # number of cols
  289. s = '' if self.csv.exists() else (('%23s,' * n % tuple(['epoch'] + keys)).rstrip(',') + '\n') # header
  290. with open(self.csv, 'a') as f:
  291. f.write(s + ('%23.5g,' * n % tuple([epoch] + vals)).rstrip(',') + '\n')
  292. if self.tb:
  293. for k, v in metrics.items():
  294. self.tb.add_scalar(k, v, epoch)
  295. if self.wandb:
  296. self.wandb.log(metrics, step=epoch)
  297. def log_images(self, files, name='Images', epoch=0):
  298. # Log images to all loggers
  299. files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path
  300. files = [f for f in files if f.exists()] # filter by exists
  301. if self.tb:
  302. for f in files:
  303. self.tb.add_image(f.stem, cv2.imread(str(f))[..., ::-1], epoch, dataformats='HWC')
  304. if self.wandb:
  305. self.wandb.log({name: [wandb.Image(str(f), caption=f.name) for f in files]}, step=epoch)
  306. def log_graph(self, model, imgsz=(640, 640)):
  307. # Log model graph to all loggers
  308. if self.tb:
  309. log_tensorboard_graph(self.tb, model, imgsz)
  310. def log_model(self, model_path, epoch=0, metadata={}):
  311. # Log model to all loggers
  312. if self.wandb:
  313. art = wandb.Artifact(name=f'run_{wandb.run.id}_model', type='model', metadata=metadata)
  314. art.add_file(str(model_path))
  315. wandb.log_artifact(art)
  316. def update_params(self, params):
  317. # Update the parameters logged
  318. if self.wandb:
  319. wandb.run.config.update(params, allow_val_change=True)
  320. def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
  321. # Log model graph to TensorBoard
  322. try:
  323. p = next(model.parameters()) # for device, type
  324. imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand
  325. im = torch.zeros((1, 3, *imgsz)).to(p.device).type_as(p) # input image (WARNING: must be zeros, not empty)
  326. with warnings.catch_warnings():
  327. warnings.simplefilter('ignore') # suppress jit trace warning
  328. tb.add_graph(torch.jit.trace(de_parallel(model), im, strict=False), [])
  329. except Exception as e:
  330. LOGGER.warning(f'WARNING ⚠️ TensorBoard graph visualization failure {e}')
  331. def web_project_name(project):
  332. # Convert local project name to web project name
  333. if not project.startswith('runs/train'):
  334. return project
  335. suffix = '-Classify' if project.endswith('-cls') else '-Segment' if project.endswith('-seg') else ''
  336. return f'YOLOv5{suffix}'