clearml_utils.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. """Main Logger class for ClearML experiment tracking."""
  2. import glob
  3. import re
  4. from pathlib import Path
  5. import numpy as np
  6. import yaml
  7. from utils.plots import Annotator, colors
  8. try:
  9. import clearml
  10. from clearml import Dataset, Task
  11. assert hasattr(clearml, '__version__') # verify package import not local dir
  12. except (ImportError, AssertionError):
  13. clearml = None
  14. def construct_dataset(clearml_info_string):
  15. """Load in a clearml dataset and fill the internal data_dict with its contents.
  16. """
  17. dataset_id = clearml_info_string.replace('clearml://', '')
  18. dataset = Dataset.get(dataset_id=dataset_id)
  19. dataset_root_path = Path(dataset.get_local_copy())
  20. # We'll search for the yaml file definition in the dataset
  21. yaml_filenames = list(glob.glob(str(dataset_root_path / '*.yaml')) + glob.glob(str(dataset_root_path / '*.yml')))
  22. if len(yaml_filenames) > 1:
  23. raise ValueError('More than one yaml file was found in the dataset root, cannot determine which one contains '
  24. 'the dataset definition this way.')
  25. elif len(yaml_filenames) == 0:
  26. raise ValueError('No yaml definition found in dataset root path, check that there is a correct yaml file '
  27. 'inside the dataset root path.')
  28. with open(yaml_filenames[0]) as f:
  29. dataset_definition = yaml.safe_load(f)
  30. assert set(dataset_definition.keys()).issuperset(
  31. {'train', 'test', 'val', 'nc', 'names'}
  32. ), "The right keys were not found in the yaml file, make sure it at least has the following keys: ('train', 'test', 'val', 'nc', 'names')"
  33. data_dict = dict()
  34. data_dict['train'] = str(
  35. (dataset_root_path / dataset_definition['train']).resolve()) if dataset_definition['train'] else None
  36. data_dict['test'] = str(
  37. (dataset_root_path / dataset_definition['test']).resolve()) if dataset_definition['test'] else None
  38. data_dict['val'] = str(
  39. (dataset_root_path / dataset_definition['val']).resolve()) if dataset_definition['val'] else None
  40. data_dict['nc'] = dataset_definition['nc']
  41. data_dict['names'] = dataset_definition['names']
  42. return data_dict
  43. class ClearmlLogger:
  44. """Log training runs, datasets, models, and predictions to ClearML.
  45. This logger sends information to ClearML at app.clear.ml or to your own hosted server. By default,
  46. this information includes hyperparameters, system configuration and metrics, model metrics, code information and
  47. basic data metrics and analyses.
  48. By providing additional command line arguments to train.py, datasets,
  49. models and predictions can also be logged.
  50. """
  51. def __init__(self, opt, hyp):
  52. """
  53. - Initialize ClearML Task, this object will capture the experiment
  54. - Upload dataset version to ClearML Data if opt.upload_dataset is True
  55. arguments:
  56. opt (namespace) -- Commandline arguments for this run
  57. hyp (dict) -- Hyperparameters for this run
  58. """
  59. self.current_epoch = 0
  60. # Keep tracked of amount of logged images to enforce a limit
  61. self.current_epoch_logged_images = set()
  62. # Maximum number of images to log to clearML per epoch
  63. self.max_imgs_to_log_per_epoch = 16
  64. # Get the interval of epochs when bounding box images should be logged
  65. self.bbox_interval = opt.bbox_interval
  66. self.clearml = clearml
  67. self.task = None
  68. self.data_dict = None
  69. if self.clearml:
  70. self.task = Task.init(
  71. project_name=opt.project if opt.project != 'runs/train' else 'YOLOv5',
  72. task_name=opt.name if opt.name != 'exp' else 'Training',
  73. tags=['YOLOv5'],
  74. output_uri=True,
  75. reuse_last_task_id=opt.exist_ok,
  76. auto_connect_frameworks={'pytorch': False}
  77. # We disconnect pytorch auto-detection, because we added manual model save points in the code
  78. )
  79. # ClearML's hooks will already grab all general parameters
  80. # Only the hyperparameters coming from the yaml config file
  81. # will have to be added manually!
  82. self.task.connect(hyp, name='Hyperparameters')
  83. self.task.connect(opt, name='Args')
  84. # Make sure the code is easily remotely runnable by setting the docker image to use by the remote agent
  85. self.task.set_base_docker('ultralytics/yolov5:latest',
  86. docker_arguments='--ipc=host -e="CLEARML_AGENT_SKIP_PYTHON_ENV_INSTALL=1"',
  87. docker_setup_bash_script='pip install clearml')
  88. # Get ClearML Dataset Version if requested
  89. if opt.data.startswith('clearml://'):
  90. # data_dict should have the following keys:
  91. # names, nc (number of classes), test, train, val (all three relative paths to ../datasets)
  92. self.data_dict = construct_dataset(opt.data)
  93. # Set data to data_dict because wandb will crash without this information and opt is the best way
  94. # to give it to them
  95. opt.data = self.data_dict
  96. def log_debug_samples(self, files, title='Debug Samples'):
  97. """
  98. Log files (images) as debug samples in the ClearML task.
  99. arguments:
  100. files (List(PosixPath)) a list of file paths in PosixPath format
  101. title (str) A title that groups together images with the same values
  102. """
  103. for f in files:
  104. if f.exists():
  105. it = re.search(r'_batch(\d+)', f.name)
  106. iteration = int(it.groups()[0]) if it else 0
  107. self.task.get_logger().report_image(title=title,
  108. series=f.name.replace(it.group(), ''),
  109. local_path=str(f),
  110. iteration=iteration)
  111. def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_threshold=0.25):
  112. """
  113. Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
  114. arguments:
  115. image_path (PosixPath) the path the original image file
  116. boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
  117. class_names (dict): dict containing mapping of class int to class name
  118. image (Tensor): A torch tensor containing the actual image data
  119. """
  120. if len(self.current_epoch_logged_images) < self.max_imgs_to_log_per_epoch and self.current_epoch >= 0:
  121. # Log every bbox_interval times and deduplicate for any intermittend extra eval runs
  122. if self.current_epoch % self.bbox_interval == 0 and image_path not in self.current_epoch_logged_images:
  123. im = np.ascontiguousarray(np.moveaxis(image.mul(255).clamp(0, 255).byte().cpu().numpy(), 0, 2))
  124. annotator = Annotator(im=im, pil=True)
  125. for i, (conf, class_nr, box) in enumerate(zip(boxes[:, 4], boxes[:, 5], boxes[:, :4])):
  126. color = colors(i)
  127. class_name = class_names[int(class_nr)]
  128. confidence_percentage = round(float(conf) * 100, 2)
  129. label = f'{class_name}: {confidence_percentage}%'
  130. if conf > conf_threshold:
  131. annotator.rectangle(box.cpu().numpy(), outline=color)
  132. annotator.box_label(box.cpu().numpy(), label=label, color=color)
  133. annotated_image = annotator.result()
  134. self.task.get_logger().report_image(title='Bounding Boxes',
  135. series=image_path.name,
  136. iteration=self.current_epoch,
  137. image=annotated_image)
  138. self.current_epoch_logged_images.add(image_path)