Clean the Docker Registry by removing untagged repositories https://github.com/ricardobranco777/clean_registry
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

clean_registry.py 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. #!/usr/bin/env python3
  2. #
  3. # This script purges untagged repositories and runs the garbage collector in Docker Registry >= 2.4.0.
  4. # It works on the whole registry or the specified repositories.
  5. # The optional -x flag may be used to completely remove the specified repositories or tagged images.
  6. #
  7. # NOTES:
  8. # - This script stops the Registry container during cleanup to prevent corruption,
  9. # making it temporarily unavailable to clients.
  10. # - This script assumes local storage (the filesystem storage driver).
  11. # - This script may run stand-alone (on local setups) or dockerized (which supports remote Docker setups).
  12. # - This script is Python 3 only.
  13. #
  14. # v1.2 by Ricardo Branco
  15. #
  16. # MIT License
  17. #
  18. import os
  19. import re
  20. import sys
  21. import tarfile
  22. import subprocess
  23. from argparse import ArgumentParser
  24. from distutils.version import LooseVersion
  25. from glob import iglob
  26. from io import BytesIO
  27. from shutil import rmtree
  28. from requests import exceptions
  29. from docker.errors import APIError, NotFound, TLSParameterError
  30. try:
  31. import docker
  32. except ImportError:
  33. error("Please install docker-py with: pip3 install docker")
  34. try:
  35. import yaml
  36. except ImportError:
  37. error("Please install PyYaml with: pip3 install pyyaml")
  38. VERSION = "1.2"
  39. def dockerized():
  40. '''Returns True if we're inside a Docker container, False otherwise.'''
  41. return os.path.isfile("/.dockerenv")
  42. def error(msg, Exit=True):
  43. '''Prints an error message and optionally exit with a status code of 1'''
  44. print("ERROR: " + str(msg), file=sys.stderr)
  45. if Exit:
  46. sys.exit(1)
  47. def remove(path):
  48. '''Run rmtree() in verbose mode'''
  49. rmtree(path)
  50. if not args.quiet:
  51. print("removed directory " + path)
  52. def clean_revisions(repo):
  53. '''Remove the revision manifests that are not present in the tags directory'''
  54. revisions = set(os.listdir(repo + "/_manifests/revisions/sha256/"))
  55. manifests = set(map(os.path.basename, iglob(repo + "/_manifests/tags/*/*/sha256/*")))
  56. revisions.difference_update(manifests)
  57. for revision in revisions:
  58. remove(repo + "/_manifests/revisions/sha256/" + revision)
  59. def clean_tag(repo, tag):
  60. '''Clean a specific repo:tag'''
  61. link = repo + "/_manifests/tags/" + tag + "/current/link"
  62. if not os.path.isfile(link):
  63. error("No such tag: %s in repository %s" % (tag, repo), Exit=False)
  64. return False
  65. if args.remove:
  66. remove(repo + "/_manifests/tags/" + tag)
  67. else:
  68. with open(link) as f:
  69. current = f.read()[len("sha256:"):]
  70. path = repo + "/_manifests/tags/" + tag + "/index/sha256/"
  71. for index in os.listdir(path):
  72. if index == current:
  73. continue
  74. remove(path + index)
  75. clean_revisions(repo)
  76. return True
  77. def clean_repo(image):
  78. '''Clean all tags (or a specific one, if specified) from a specific repository'''
  79. repo, tag = image.split(":", 1) if ":" in image else (image, "")
  80. if not os.path.isdir(repo):
  81. error("No such repository: " + repo, Exit=False)
  82. return False
  83. if args.remove:
  84. tags = os.listdir(repo + "/_manifests/tags/")
  85. if not tag or len(tags) == 1 and tag in tags:
  86. remove(repo)
  87. return True
  88. if tag:
  89. return clean_tag(repo, tag)
  90. currents = set()
  91. for link in iglob(repo + "/_manifests/tags/*/current/link"):
  92. with open(link) as f:
  93. currents.add(f.read()[len("sha256:"):])
  94. for index in iglob(repo + "/_manifests/tags/*/index/sha256/*"):
  95. if os.path.basename(index) not in currents:
  96. remove(index)
  97. clean_revisions(repo)
  98. return True
  99. def check_name(image):
  100. '''Checks the whole repository:tag name'''
  101. repo, tag = image.split(":", 1) if ":" in image else (image, "latest")
  102. # From https://github.com/moby/moby/blob/master/image/spec/v1.2.md
  103. # Tag values are limited to the set of characters [a-zA-Z0-9_.-], except they may not start with a . or - character.
  104. # Tags are limited to 128 characters.
  105. #
  106. # From https://github.com/docker/distribution/blob/master/docs/spec/api.md
  107. # 1. A repository name is broken up into path components. A component of a repository name must be at least
  108. # one lowercase, alpha-numeric characters, optionally separated by periods, dashes or underscores.
  109. # More strictly, it must match the regular expression [a-z0-9]+(?:[._-][a-z0-9]+)*
  110. # 2. If a repository name has two or more path components, they must be separated by a forward slash ("/").
  111. # 3. The total length of a repository name, including slashes, must be less than 256 characters.
  112. # Note: Internally, distribution permits multiple dashes and up to 2 underscores as separators.
  113. # See https://github.com/docker/distribution/blob/master/reference/regexp.go
  114. return len(image) < 256 and len(tag) < 129 and re.match('[a-zA-Z0-9_][a-zA-Z0-9_.-]*$', tag) and \
  115. all(re.match('[a-z0-9]+(?:(?:[._]|__|[-]*)[a-z0-9]+)*$', path) for path in repo.split("/"))
  116. class RegistryCleaner():
  117. '''Simple callable class for Docker Registry cleaning duties'''
  118. def __init__(self, container=None, volume=None):
  119. try:
  120. self.docker = docker.from_env()
  121. except TLSParameterError as err:
  122. error(err)
  123. if container is None:
  124. self.container = None
  125. try:
  126. self.volume = self.docker.volumes.get(volume)
  127. self.registry_dir = self.volume.attrs['Mountpoint']
  128. except (APIError, exceptions.ConnectionError) as err:
  129. error(err)
  130. if dockerized():
  131. if not os.getenv("REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY"):
  132. os.environ['REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY'] = "/var/lib/registry"
  133. self.registry_dir = os.environ['REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY']
  134. return
  135. try:
  136. self.info = self.docker.api.inspect_container(container)
  137. self.container = self.info['Id']
  138. except (APIError, exceptions.ConnectionError) as err:
  139. error(err)
  140. if self.info['Config']['Image'] != "registry:2":
  141. error("The container %s is not running the registry:2 image" % (container))
  142. if LooseVersion(self.get_image_version()) < LooseVersion("v2.4.0"):
  143. error("You're not running Docker Registry 2.4.0+")
  144. self.registry_dir = self.get_registry_dir()
  145. if dockerized() and not os.getenv("REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY"):
  146. os.environ['REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY'] = self.registry_dir
  147. def __call__(self):
  148. try:
  149. os.chdir(self.registry_dir + "/docker/registry/v2/repositories")
  150. except FileNotFoundError as err:
  151. error(err)
  152. if self.container is not None:
  153. self.docker.api.stop(self.container)
  154. images = args.images if args.images else map(os.path.dirname, iglob("**/_manifests", recursive=True))
  155. rc = 0
  156. for image in images:
  157. if not clean_repo(image):
  158. rc = 1
  159. if not self.garbage_collect():
  160. rc = 1
  161. if self.container is not None:
  162. self.docker.api.start(self.container)
  163. return rc
  164. def get_file(self, filename):
  165. '''Returns the contents of the specified file from the container'''
  166. try:
  167. with self.docker.api.get_archive(self.container, filename)[0] as tar_stream:
  168. with BytesIO(tar_stream.data) as buf:
  169. with tarfile.open(fileobj=buf) as tarf:
  170. with tarf.extractfile(os.path.basename(filename)) as f:
  171. data = f.read()
  172. except NotFound as err:
  173. error(err)
  174. return data
  175. def get_registry_dir(self):
  176. '''Gets the Registry directory'''
  177. registry_dir = os.getenv("REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY")
  178. if registry_dir:
  179. return registry_dir
  180. registry_dir = ""
  181. for env in self.info['Config']['Env']:
  182. var, value = env.split("=", 1)
  183. if var == "REGISTRY_STORAGE_FILESYSTEM_ROOTDIRECTORY":
  184. registry_dir = value
  185. break
  186. if not registry_dir:
  187. config_yml = self.info['Args'][0]
  188. data = yaml.load(self.get_file(config_yml))
  189. try:
  190. registry_dir = data['storage']['filesystem']['rootdirectory']
  191. except KeyError:
  192. error("Unsupported storage driver")
  193. if dockerized():
  194. return registry_dir
  195. for item in self.info['Mounts']:
  196. if item['Destination'] == registry_dir:
  197. return item['Source']
  198. def get_image_version(self):
  199. '''Gets the Docker distribution version running on the container'''
  200. if self.info['State']['Running']:
  201. data = self.docker.containers.get(self.container).exec_run("/bin/registry --version").decode('utf-8')
  202. else:
  203. data = self.docker.containers.run(self.info["Image"], command="--version", remove=True).decode('utf-8')
  204. return data.split()[2]
  205. def garbage_collect(self):
  206. '''Runs garbage-collect'''
  207. command = "garbage-collect " + "/etc/docker/registry/config.yml"
  208. if dockerized():
  209. command = "/bin/registry " + command
  210. with subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) as proc:
  211. if not args.quiet:
  212. print(proc.stdout.read().decode('utf-8'))
  213. status = proc.wait()
  214. else:
  215. cli = self.docker.containers.run("registry:2", command=command, detach=True, stderr=True,
  216. volumes={self.registry_dir: {'bind': "/var/lib/registry", 'mode': "rw"}})
  217. if not args.quiet:
  218. for line in cli.logs(stream=True):
  219. print(line.decode('utf-8'), end="")
  220. status = True if cli.wait() == 0 else False
  221. cli.remove()
  222. return status
  223. def main():
  224. '''Main function'''
  225. progname = os.path.basename(sys.argv[0])
  226. usage = "\rUsage: " + progname + " [OPTIONS] VOLUME|CONTAINER [REPOSITORY[:TAG]]..." + """
  227. Options:
  228. -x, --remove Remove the specified images or repositories.
  229. -v, --volume Specify a volume instead of container.
  230. -q, --quiet Supress non-error messages.
  231. -V, --version Show version and exit."""
  232. parser = ArgumentParser(usage=usage, add_help=False)
  233. parser.add_argument('-h', '--help', action='store_true')
  234. parser.add_argument('-q', '--quiet', action='store_true')
  235. parser.add_argument('-x', '--remove', action='store_true')
  236. parser.add_argument('-v', '--volume', action='store_true')
  237. parser.add_argument('-V', '--version', action='store_true')
  238. parser.add_argument('container_or_volume', nargs='?')
  239. parser.add_argument('images', nargs='*')
  240. global args
  241. args = parser.parse_args()
  242. if args.help or not args.container_or_volume:
  243. print('usage: ' + usage)
  244. sys.exit(0 if args.help else 1)
  245. elif args.version:
  246. print(progname + " " + VERSION)
  247. sys.exit(0)
  248. for image in args.images:
  249. if not check_name(image):
  250. error("Invalid Docker repository/tag: " + image)
  251. if args.remove and not args.images:
  252. error("The -x option requires that you specify at least one repository...")
  253. if args.volume:
  254. rc = RegistryCleaner(volume=args.container_or_volume)
  255. else:
  256. rc = RegistryCleaner(container=args.container_or_volume)
  257. sys.exit(rc())
  258. if __name__ == "__main__":
  259. try:
  260. main()
  261. except KeyboardInterrupt:
  262. sys.exit(1)