downloads.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
  2. """
  3. Download utils
  4. """
  5. import logging
  6. import subprocess
  7. import urllib
  8. from pathlib import Path
  9. import requests
  10. import torch
  11. def is_url(url, check=True):
  12. # Check if string is URL and check if URL exists
  13. try:
  14. url = str(url)
  15. result = urllib.parse.urlparse(url)
  16. assert all([result.scheme, result.netloc]) # check if is url
  17. return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
  18. except (AssertionError, urllib.request.HTTPError):
  19. return False
  20. def gsutil_getsize(url=''):
  21. # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
  22. output = subprocess.check_output(['gsutil', 'du', url], shell=True, encoding='utf-8')
  23. if output:
  24. return int(output.split()[0])
  25. return 0
  26. def url_getsize(url='https://ultralytics.com/images/bus.jpg'):
  27. # Return downloadable file size in bytes
  28. response = requests.head(url, allow_redirects=True)
  29. return int(response.headers.get('content-length', -1))
  30. def curl_download(url, filename, *, silent: bool = False) -> bool:
  31. """
  32. Download a file from a url to a filename using curl.
  33. """
  34. silent_option = 'sS' if silent else '' # silent
  35. proc = subprocess.run([
  36. 'curl',
  37. '-#',
  38. f'-{silent_option}L',
  39. url,
  40. '--output',
  41. filename,
  42. '--retry',
  43. '9',
  44. '-C',
  45. '-', ])
  46. return proc.returncode == 0
  47. def safe_download(file, url, url2=None, min_bytes=1E0, error_msg=''):
  48. # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
  49. from utils.general import LOGGER
  50. file = Path(file)
  51. assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
  52. try: # url1
  53. LOGGER.info(f'Downloading {url} to {file}...')
  54. torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
  55. assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
  56. except Exception as e: # url2
  57. if file.exists():
  58. file.unlink() # remove partial downloads
  59. LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
  60. # curl download, retry and resume on fail
  61. curl_download(url2 or url, file)
  62. finally:
  63. if not file.exists() or file.stat().st_size < min_bytes: # check
  64. if file.exists():
  65. file.unlink() # remove partial downloads
  66. LOGGER.info(f'ERROR: {assert_msg}\n{error_msg}')
  67. LOGGER.info('')
  68. def attempt_download(file, repo='ultralytics/yolov5', release='v7.0'):
  69. # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
  70. from utils.general import LOGGER
  71. def github_assets(repository, version='latest'):
  72. # Return GitHub repo tag (i.e. 'v7.0') and assets (i.e. ['yolov5s.pt', 'yolov5m.pt', ...])
  73. if version != 'latest':
  74. version = f'tags/{version}' # i.e. tags/v7.0
  75. response = requests.get(f'https://api.github.com/repos/{repository}/releases/{version}').json() # github api
  76. return response['tag_name'], [x['name'] for x in response['assets']] # tag, assets
  77. file = Path(str(file).strip().replace("'", ''))
  78. if not file.exists():
  79. # URL specified
  80. name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
  81. if str(file).startswith(('http:/', 'https:/')): # download
  82. url = str(file).replace(':/', '://') # Pathlib turns :// -> :/
  83. file = name.split('?')[0] # parse authentication https://url.com/file.txt?auth...
  84. if Path(file).is_file():
  85. LOGGER.info(f'Found {url} locally at {file}') # file already exists
  86. else:
  87. safe_download(file=file, url=url, min_bytes=1E5)
  88. return file
  89. # GitHub assets
  90. assets = [f'yolov5{size}{suffix}.pt' for size in 'nsmlx' for suffix in ('', '6', '-cls', '-seg')] # default
  91. try:
  92. tag, assets = github_assets(repo, release)
  93. except Exception:
  94. try:
  95. tag, assets = github_assets(repo) # latest release
  96. except Exception:
  97. try:
  98. tag = subprocess.check_output('git tag', shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
  99. except Exception:
  100. tag = release
  101. if name in assets:
  102. file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
  103. safe_download(file,
  104. url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
  105. min_bytes=1E5,
  106. error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')
  107. return str(file)