compare.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. """
  2. Utilities for comparing image results.
  3. """
  4. import atexit
  5. import hashlib
  6. import os
  7. from pathlib import Path
  8. import re
  9. import shutil
  10. import subprocess
  11. import sys
  12. from tempfile import TemporaryDirectory, TemporaryFile
  13. import numpy as np
  14. from PIL import Image
  15. import matplotlib as mpl
  16. from matplotlib import cbook
  17. from matplotlib.testing.exceptions import ImageComparisonFailure
  18. __all__ = ['compare_images', 'comparable_formats']
  19. def make_test_filename(fname, purpose):
  20. """
  21. Make a new filename by inserting *purpose* before the file's extension.
  22. """
  23. base, ext = os.path.splitext(fname)
  24. return '%s-%s%s' % (base, purpose, ext)
  25. def get_cache_dir():
  26. cache_dir = Path(mpl.get_cachedir(), 'test_cache')
  27. cache_dir.mkdir(parents=True, exist_ok=True)
  28. return str(cache_dir)
  29. def get_file_hash(path, block_size=2 ** 20):
  30. md5 = hashlib.md5()
  31. with open(path, 'rb') as fd:
  32. while True:
  33. data = fd.read(block_size)
  34. if not data:
  35. break
  36. md5.update(data)
  37. if Path(path).suffix == '.pdf':
  38. md5.update(str(mpl._get_executable_info("gs").version)
  39. .encode('utf-8'))
  40. elif Path(path).suffix == '.svg':
  41. md5.update(str(mpl._get_executable_info("inkscape").version)
  42. .encode('utf-8'))
  43. return md5.hexdigest()
  44. @cbook.deprecated("3.3")
  45. def make_external_conversion_command(cmd):
  46. def convert(old, new):
  47. cmdline = cmd(old, new)
  48. pipe = subprocess.Popen(cmdline, universal_newlines=True,
  49. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  50. stdout, stderr = pipe.communicate()
  51. errcode = pipe.wait()
  52. if not os.path.exists(new) or errcode:
  53. msg = "Conversion command failed:\n%s\n" % ' '.join(cmdline)
  54. if stdout:
  55. msg += "Standard output:\n%s\n" % stdout
  56. if stderr:
  57. msg += "Standard error:\n%s\n" % stderr
  58. raise IOError(msg)
  59. return convert
  60. # Modified from https://bugs.python.org/issue25567.
  61. _find_unsafe_bytes = re.compile(br'[^a-zA-Z0-9_@%+=:,./-]').search
  62. def _shlex_quote_bytes(b):
  63. return (b if _find_unsafe_bytes(b) is None
  64. else b"'" + b.replace(b"'", b"'\"'\"'") + b"'")
  65. class _ConverterError(Exception):
  66. pass
  67. class _Converter:
  68. def __init__(self):
  69. self._proc = None
  70. # Explicitly register deletion from an atexit handler because if we
  71. # wait until the object is GC'd (which occurs later), then some module
  72. # globals (e.g. signal.SIGKILL) has already been set to None, and
  73. # kill() doesn't work anymore...
  74. atexit.register(self.__del__)
  75. def __del__(self):
  76. if self._proc:
  77. self._proc.kill()
  78. self._proc.wait()
  79. for stream in filter(None, [self._proc.stdin,
  80. self._proc.stdout,
  81. self._proc.stderr]):
  82. stream.close()
  83. self._proc = None
  84. def _read_until(self, terminator):
  85. """Read until the prompt is reached."""
  86. buf = bytearray()
  87. while True:
  88. c = self._proc.stdout.read(1)
  89. if not c:
  90. raise _ConverterError
  91. buf.extend(c)
  92. if buf.endswith(terminator):
  93. return bytes(buf[:-len(terminator)])
  94. class _GSConverter(_Converter):
  95. def __call__(self, orig, dest):
  96. if not self._proc:
  97. self._proc = subprocess.Popen(
  98. [mpl._get_executable_info("gs").executable,
  99. "-dNOSAFER", "-dNOPAUSE", "-sDEVICE=png16m"],
  100. # As far as I can see, ghostscript never outputs to stderr.
  101. stdin=subprocess.PIPE, stdout=subprocess.PIPE)
  102. try:
  103. self._read_until(b"\nGS")
  104. except _ConverterError as err:
  105. raise OSError("Failed to start Ghostscript") from err
  106. def encode_and_escape(name):
  107. return (os.fsencode(name)
  108. .replace(b"\\", b"\\\\")
  109. .replace(b"(", br"\(")
  110. .replace(b")", br"\)"))
  111. self._proc.stdin.write(
  112. b"<< /OutputFile ("
  113. + encode_and_escape(dest)
  114. + b") >> setpagedevice ("
  115. + encode_and_escape(orig)
  116. + b") run flush\n")
  117. self._proc.stdin.flush()
  118. # GS> if nothing left on the stack; GS<n> if n items left on the stack.
  119. err = self._read_until(b"GS")
  120. stack = self._read_until(b">")
  121. if stack or not os.path.exists(dest):
  122. stack_size = int(stack[1:]) if stack else 0
  123. self._proc.stdin.write(b"pop\n" * stack_size)
  124. # Using the systemencoding should at least get the filenames right.
  125. raise ImageComparisonFailure(
  126. (err + b"GS" + stack + b">")
  127. .decode(sys.getfilesystemencoding(), "replace"))
  128. class _SVGConverter(_Converter):
  129. def __call__(self, orig, dest):
  130. old_inkscape = mpl._get_executable_info("inkscape").version < "1"
  131. terminator = b"\n>" if old_inkscape else b"> "
  132. if not hasattr(self, "_tmpdir"):
  133. self._tmpdir = TemporaryDirectory()
  134. if (not self._proc # First run.
  135. or self._proc.poll() is not None): # Inkscape terminated.
  136. env = {
  137. **os.environ,
  138. # If one passes e.g. a png file to Inkscape, it will try to
  139. # query the user for conversion options via a GUI (even with
  140. # `--without-gui`). Unsetting `DISPLAY` prevents this (and
  141. # causes GTK to crash and Inkscape to terminate, but that'll
  142. # just be reported as a regular exception below).
  143. "DISPLAY": "",
  144. # Do not load any user options.
  145. "INKSCAPE_PROFILE_DIR": os.devnull,
  146. }
  147. # Old versions of Inkscape (e.g. 0.48.3.1) seem to sometimes
  148. # deadlock when stderr is redirected to a pipe, so we redirect it
  149. # to a temporary file instead. This is not necessary anymore as of
  150. # Inkscape 0.92.1.
  151. stderr = TemporaryFile()
  152. self._proc = subprocess.Popen(
  153. ["inkscape", "--without-gui", "--shell"] if old_inkscape else
  154. ["inkscape", "--shell"],
  155. stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=stderr,
  156. env=env, cwd=self._tmpdir.name)
  157. # Slight abuse, but makes shutdown handling easier.
  158. self._proc.stderr = stderr
  159. try:
  160. self._read_until(terminator)
  161. except _ConverterError as err:
  162. raise OSError("Failed to start Inkscape in interactive "
  163. "mode") from err
  164. # Inkscape's shell mode does not support escaping metacharacters in the
  165. # filename ("\n", and ":;" for inkscape>=1). Avoid any problems by
  166. # running from a temporary directory and using fixed filenames.
  167. inkscape_orig = Path(self._tmpdir.name, os.fsdecode(b"f.svg"))
  168. inkscape_dest = Path(self._tmpdir.name, os.fsdecode(b"f.png"))
  169. try:
  170. inkscape_orig.symlink_to(Path(orig).resolve())
  171. except OSError:
  172. shutil.copyfile(orig, inkscape_orig)
  173. self._proc.stdin.write(
  174. b"f.svg --export-png=f.png\n" if old_inkscape else
  175. b"file-open:f.svg;export-filename:f.png;export-do;file-close\n")
  176. self._proc.stdin.flush()
  177. try:
  178. self._read_until(terminator)
  179. except _ConverterError as err:
  180. # Inkscape's output is not localized but gtk's is, so the output
  181. # stream probably has a mixed encoding. Using the filesystem
  182. # encoding should at least get the filenames right...
  183. self._proc.stderr.seek(0)
  184. raise ImageComparisonFailure(
  185. self._proc.stderr.read().decode(
  186. sys.getfilesystemencoding(), "replace")) from err
  187. os.remove(inkscape_orig)
  188. shutil.move(inkscape_dest, dest)
  189. def __del__(self):
  190. super().__del__()
  191. if hasattr(self, "_tmpdir"):
  192. self._tmpdir.cleanup()
  193. def _update_converter():
  194. try:
  195. mpl._get_executable_info("gs")
  196. except mpl.ExecutableNotFoundError:
  197. pass
  198. else:
  199. converter['pdf'] = converter['eps'] = _GSConverter()
  200. try:
  201. mpl._get_executable_info("inkscape")
  202. except mpl.ExecutableNotFoundError:
  203. pass
  204. else:
  205. converter['svg'] = _SVGConverter()
  206. #: A dictionary that maps filename extensions to functions which
  207. #: themselves map arguments `old` and `new` (filenames) to a list of strings.
  208. #: The list can then be passed to Popen to convert files with that
  209. #: extension to png format.
  210. converter = {}
  211. _update_converter()
  212. def comparable_formats():
  213. """
  214. Return the list of file formats that `.compare_images` can compare
  215. on this system.
  216. Returns
  217. -------
  218. list of str
  219. E.g. ``['png', 'pdf', 'svg', 'eps']``.
  220. """
  221. return ['png', *converter]
  222. def convert(filename, cache):
  223. """
  224. Convert the named file to png; return the name of the created file.
  225. If *cache* is True, the result of the conversion is cached in
  226. `matplotlib.get_cachedir() + '/test_cache/'`. The caching is based on a
  227. hash of the exact contents of the input file. There is no limit on the
  228. size of the cache, so it may need to be manually cleared periodically.
  229. """
  230. path = Path(filename)
  231. if not path.exists():
  232. raise IOError(f"{path} does not exist")
  233. if path.suffix[1:] not in converter:
  234. import pytest
  235. pytest.skip(f"Don't know how to convert {path.suffix} files to png")
  236. newpath = path.parent / f"{path.stem}_{path.suffix[1:]}.png"
  237. # Only convert the file if the destination doesn't already exist or
  238. # is out of date.
  239. if not newpath.exists() or newpath.stat().st_mtime < path.stat().st_mtime:
  240. cache_dir = Path(get_cache_dir()) if cache else None
  241. if cache_dir is not None:
  242. hash_value = get_file_hash(path)
  243. cached_path = cache_dir / (hash_value + newpath.suffix)
  244. if cached_path.exists():
  245. shutil.copyfile(cached_path, newpath)
  246. return str(newpath)
  247. converter[path.suffix[1:]](path, newpath)
  248. if cache_dir is not None:
  249. shutil.copyfile(newpath, cached_path)
  250. return str(newpath)
  251. def crop_to_same(actual_path, actual_image, expected_path, expected_image):
  252. # clip the images to the same size -- this is useful only when
  253. # comparing eps to pdf
  254. if actual_path[-7:-4] == 'eps' and expected_path[-7:-4] == 'pdf':
  255. aw, ah, ad = actual_image.shape
  256. ew, eh, ed = expected_image.shape
  257. actual_image = actual_image[int(aw / 2 - ew / 2):int(
  258. aw / 2 + ew / 2), int(ah / 2 - eh / 2):int(ah / 2 + eh / 2)]
  259. return actual_image, expected_image
  260. def calculate_rms(expected_image, actual_image):
  261. """
  262. Calculate the per-pixel errors, then compute the root mean square error.
  263. """
  264. if expected_image.shape != actual_image.shape:
  265. raise ImageComparisonFailure(
  266. "Image sizes do not match expected size: {} "
  267. "actual size {}".format(expected_image.shape, actual_image.shape))
  268. # Convert to float to avoid overflowing finite integer types.
  269. return np.sqrt(((expected_image - actual_image).astype(float) ** 2).mean())
  270. # NOTE: compare_image and save_diff_image assume that the image does not have
  271. # 16-bit depth, as Pillow converts these to RGB incorrectly.
  272. def compare_images(expected, actual, tol, in_decorator=False):
  273. """
  274. Compare two "image" files checking differences within a tolerance.
  275. The two given filenames may point to files which are convertible to
  276. PNG via the `.converter` dictionary. The underlying RMS is calculated
  277. with the `.calculate_rms` function.
  278. Parameters
  279. ----------
  280. expected : str
  281. The filename of the expected image.
  282. actual : str
  283. The filename of the actual image.
  284. tol : float
  285. The tolerance (a color value difference, where 255 is the
  286. maximal difference). The test fails if the average pixel
  287. difference is greater than this value.
  288. in_decorator : bool
  289. Determines the output format. If called from image_comparison
  290. decorator, this should be True. (default=False)
  291. Returns
  292. -------
  293. None or dict or str
  294. Return *None* if the images are equal within the given tolerance.
  295. If the images differ, the return value depends on *in_decorator*.
  296. If *in_decorator* is true, a dict with the following entries is
  297. returned:
  298. - *rms*: The RMS of the image difference.
  299. - *expected*: The filename of the expected image.
  300. - *actual*: The filename of the actual image.
  301. - *diff_image*: The filename of the difference image.
  302. - *tol*: The comparison tolerance.
  303. Otherwise, a human-readable multi-line string representation of this
  304. information is returned.
  305. Examples
  306. --------
  307. ::
  308. img1 = "./baseline/plot.png"
  309. img2 = "./output/plot.png"
  310. compare_images(img1, img2, 0.001)
  311. """
  312. actual = os.fspath(actual)
  313. if not os.path.exists(actual):
  314. raise Exception("Output image %s does not exist." % actual)
  315. if os.stat(actual).st_size == 0:
  316. raise Exception("Output image file %s is empty." % actual)
  317. # Convert the image to png
  318. expected = os.fspath(expected)
  319. if not os.path.exists(expected):
  320. raise IOError('Baseline image %r does not exist.' % expected)
  321. extension = expected.split('.')[-1]
  322. if extension != 'png':
  323. actual = convert(actual, cache=False)
  324. expected = convert(expected, cache=True)
  325. # open the image files and remove the alpha channel (if it exists)
  326. expected_image = np.asarray(Image.open(expected).convert("RGB"))
  327. actual_image = np.asarray(Image.open(actual).convert("RGB"))
  328. actual_image, expected_image = crop_to_same(
  329. actual, actual_image, expected, expected_image)
  330. diff_image = make_test_filename(actual, 'failed-diff')
  331. if tol <= 0:
  332. if np.array_equal(expected_image, actual_image):
  333. return None
  334. # convert to signed integers, so that the images can be subtracted without
  335. # overflow
  336. expected_image = expected_image.astype(np.int16)
  337. actual_image = actual_image.astype(np.int16)
  338. rms = calculate_rms(expected_image, actual_image)
  339. if rms <= tol:
  340. return None
  341. save_diff_image(expected, actual, diff_image)
  342. results = dict(rms=rms, expected=str(expected),
  343. actual=str(actual), diff=str(diff_image), tol=tol)
  344. if not in_decorator:
  345. # Then the results should be a string suitable for stdout.
  346. template = ['Error: Image files did not match.',
  347. 'RMS Value: {rms}',
  348. 'Expected: \n {expected}',
  349. 'Actual: \n {actual}',
  350. 'Difference:\n {diff}',
  351. 'Tolerance: \n {tol}', ]
  352. results = '\n '.join([line.format(**results) for line in template])
  353. return results
  354. def save_diff_image(expected, actual, output):
  355. """
  356. Parameters
  357. ----------
  358. expected : str
  359. File path of expected image.
  360. actual : str
  361. File path of actual image.
  362. output : str
  363. File path to save difference image to.
  364. """
  365. # Drop alpha channels, similarly to compare_images.
  366. expected_image = np.asarray(Image.open(expected).convert("RGB"))
  367. actual_image = np.asarray(Image.open(actual).convert("RGB"))
  368. actual_image, expected_image = crop_to_same(
  369. actual, actual_image, expected, expected_image)
  370. expected_image = np.array(expected_image).astype(float)
  371. actual_image = np.array(actual_image).astype(float)
  372. if expected_image.shape != actual_image.shape:
  373. raise ImageComparisonFailure(
  374. "Image sizes do not match expected size: {} "
  375. "actual size {}".format(expected_image.shape, actual_image.shape))
  376. abs_diff_image = np.abs(expected_image - actual_image)
  377. # expand differences in luminance domain
  378. abs_diff_image *= 255 * 10
  379. save_image_np = np.clip(abs_diff_image, 0, 255).astype(np.uint8)
  380. height, width, depth = save_image_np.shape
  381. # The PDF renderer doesn't produce an alpha channel, but the
  382. # matplotlib PNG writer requires one, so expand the array
  383. if depth == 3:
  384. with_alpha = np.empty((height, width, 4), dtype=np.uint8)
  385. with_alpha[:, :, 0:3] = save_image_np
  386. save_image_np = with_alpha
  387. # Hard-code the alpha channel to fully solid
  388. save_image_np[:, :, 3] = 255
  389. Image.fromarray(save_image_np).save(output, format="png")