autobatch.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
  2. """
  3. Auto-batch utils
  4. """
  5. from copy import deepcopy
  6. import numpy as np
  7. import torch
  8. from torch.cuda import amp
  9. from utils.general import LOGGER, colorstr
  10. from utils.torch_utils import profile
  11. def check_train_batch_size(model, imgsz=640):
  12. # Check YOLOv5 training batch size
  13. with amp.autocast():
  14. return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
  15. def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
  16. # Automatically estimate best batch size to use `fraction` of available CUDA memory
  17. # Usage:
  18. # import torch
  19. # from utils.autobatch import autobatch
  20. # model = torch.hub.load('ultralytics/yolov5', 'yolov5s', autoshape=False)
  21. # print(autobatch(model))
  22. prefix = colorstr('AutoBatch: ')
  23. LOGGER.info(f'{prefix}Computing optimal batch size for --imgsz {imgsz}')
  24. device = next(model.parameters()).device # get model device
  25. if device.type == 'cpu':
  26. LOGGER.info(f'{prefix}CUDA not detected, using default CPU batch-size {batch_size}')
  27. return batch_size
  28. gb = 1 << 30 # bytes to GiB (1024 ** 3)
  29. d = str(device).upper() # 'CUDA:0'
  30. properties = torch.cuda.get_device_properties(device) # device properties
  31. t = properties.total_memory / gb # (GiB)
  32. r = torch.cuda.memory_reserved(device) / gb # (GiB)
  33. a = torch.cuda.memory_allocated(device) / gb # (GiB)
  34. f = t - (r + a) # free inside reserved
  35. LOGGER.info(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
  36. batch_sizes = [1, 2, 4, 8, 16]
  37. try:
  38. img = [torch.zeros(b, 3, imgsz, imgsz) for b in batch_sizes]
  39. y = profile(img, model, n=3, device=device)
  40. except Exception as e:
  41. LOGGER.warning(f'{prefix}{e}')
  42. y = [x[2] for x in y if x] # memory [2]
  43. batch_sizes = batch_sizes[:len(y)]
  44. p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit
  45. b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size)
  46. LOGGER.info(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%)')
  47. return b