1. import datetime
    
  2. import inspect
    
  3. import sys
    
  4. import unittest
    
  5. from pathlib import Path
    
  6. from unittest import mock
    
  7. from urllib.parse import quote, quote_plus
    
  8. 
    
  9. from django.test import SimpleTestCase
    
  10. from django.utils.encoding import (
    
  11.     DjangoUnicodeDecodeError,
    
  12.     escape_uri_path,
    
  13.     filepath_to_uri,
    
  14.     force_bytes,
    
  15.     force_str,
    
  16.     get_system_encoding,
    
  17.     iri_to_uri,
    
  18.     repercent_broken_unicode,
    
  19.     smart_bytes,
    
  20.     smart_str,
    
  21.     uri_to_iri,
    
  22. )
    
  23. from django.utils.functional import SimpleLazyObject
    
  24. from django.utils.translation import gettext_lazy
    
  25. 
    
  26. 
    
  27. class TestEncodingUtils(SimpleTestCase):
    
  28.     def test_force_str_exception(self):
    
  29.         """
    
  30.         Broken __str__ actually raises an error.
    
  31.         """
    
  32. 
    
  33.         class MyString:
    
  34.             def __str__(self):
    
  35.                 return b"\xc3\xb6\xc3\xa4\xc3\xbc"
    
  36. 
    
  37.         # str(s) raises a TypeError if the result is not a text type.
    
  38.         with self.assertRaises(TypeError):
    
  39.             force_str(MyString())
    
  40. 
    
  41.     def test_force_str_lazy(self):
    
  42.         s = SimpleLazyObject(lambda: "x")
    
  43.         self.assertIs(type(force_str(s)), str)
    
  44. 
    
  45.     def test_force_str_DjangoUnicodeDecodeError(self):
    
  46.         msg = (
    
  47.             "'utf-8' codec can't decode byte 0xff in position 0: invalid "
    
  48.             "start byte. You passed in b'\\xff' (<class 'bytes'>)"
    
  49.         )
    
  50.         with self.assertRaisesMessage(DjangoUnicodeDecodeError, msg):
    
  51.             force_str(b"\xff")
    
  52. 
    
  53.     def test_force_bytes_exception(self):
    
  54.         """
    
  55.         force_bytes knows how to convert to bytes an exception
    
  56.         containing non-ASCII characters in its args.
    
  57.         """
    
  58.         error_msg = "This is an exception, voilà"
    
  59.         exc = ValueError(error_msg)
    
  60.         self.assertEqual(force_bytes(exc), error_msg.encode())
    
  61.         self.assertEqual(
    
  62.             force_bytes(exc, encoding="ascii", errors="ignore"),
    
  63.             b"This is an exception, voil",
    
  64.         )
    
  65. 
    
  66.     def test_force_bytes_strings_only(self):
    
  67.         today = datetime.date.today()
    
  68.         self.assertEqual(force_bytes(today, strings_only=True), today)
    
  69. 
    
  70.     def test_force_bytes_encoding(self):
    
  71.         error_msg = "This is an exception, voilà".encode()
    
  72.         result = force_bytes(error_msg, encoding="ascii", errors="ignore")
    
  73.         self.assertEqual(result, b"This is an exception, voil")
    
  74. 
    
  75.     def test_force_bytes_memory_view(self):
    
  76.         data = b"abc"
    
  77.         result = force_bytes(memoryview(data))
    
  78.         # Type check is needed because memoryview(bytes) == bytes.
    
  79.         self.assertIs(type(result), bytes)
    
  80.         self.assertEqual(result, data)
    
  81. 
    
  82.     def test_smart_bytes(self):
    
  83.         class Test:
    
  84.             def __str__(self):
    
  85.                 return "ŠĐĆŽćžšđ"
    
  86. 
    
  87.         lazy_func = gettext_lazy("x")
    
  88.         self.assertIs(smart_bytes(lazy_func), lazy_func)
    
  89.         self.assertEqual(
    
  90.             smart_bytes(Test()),
    
  91.             b"\xc5\xa0\xc4\x90\xc4\x86\xc5\xbd\xc4\x87\xc5\xbe\xc5\xa1\xc4\x91",
    
  92.         )
    
  93.         self.assertEqual(smart_bytes(1), b"1")
    
  94.         self.assertEqual(smart_bytes("foo"), b"foo")
    
  95. 
    
  96.     def test_smart_str(self):
    
  97.         class Test:
    
  98.             def __str__(self):
    
  99.                 return "ŠĐĆŽćžšđ"
    
  100. 
    
  101.         lazy_func = gettext_lazy("x")
    
  102.         self.assertIs(smart_str(lazy_func), lazy_func)
    
  103.         self.assertEqual(
    
  104.             smart_str(Test()), "\u0160\u0110\u0106\u017d\u0107\u017e\u0161\u0111"
    
  105.         )
    
  106.         self.assertEqual(smart_str(1), "1")
    
  107.         self.assertEqual(smart_str("foo"), "foo")
    
  108. 
    
  109.     def test_get_default_encoding(self):
    
  110.         with mock.patch("locale.getlocale", side_effect=Exception):
    
  111.             self.assertEqual(get_system_encoding(), "ascii")
    
  112. 
    
  113.     def test_repercent_broken_unicode_recursion_error(self):
    
  114.         # Prepare a string long enough to force a recursion error if the tested
    
  115.         # function uses recursion.
    
  116.         data = b"\xfc" * sys.getrecursionlimit()
    
  117.         try:
    
  118.             self.assertEqual(
    
  119.                 repercent_broken_unicode(data), b"%FC" * sys.getrecursionlimit()
    
  120.             )
    
  121.         except RecursionError:
    
  122.             self.fail("Unexpected RecursionError raised.")
    
  123. 
    
  124.     def test_repercent_broken_unicode_small_fragments(self):
    
  125.         data = b"test\xfctest\xfctest\xfc"
    
  126.         decoded_paths = []
    
  127. 
    
  128.         def mock_quote(*args, **kwargs):
    
  129.             # The second frame is the call to repercent_broken_unicode().
    
  130.             decoded_paths.append(inspect.currentframe().f_back.f_locals["path"])
    
  131.             return quote(*args, **kwargs)
    
  132. 
    
  133.         with mock.patch("django.utils.encoding.quote", mock_quote):
    
  134.             self.assertEqual(repercent_broken_unicode(data), b"test%FCtest%FCtest%FC")
    
  135. 
    
  136.         # decode() is called on smaller fragment of the path each time.
    
  137.         self.assertEqual(
    
  138.             decoded_paths,
    
  139.             [b"test\xfctest\xfctest\xfc", b"test\xfctest\xfc", b"test\xfc"],
    
  140.         )
    
  141. 
    
  142. 
    
  143. class TestRFC3987IEncodingUtils(unittest.TestCase):
    
  144.     def test_filepath_to_uri(self):
    
  145.         self.assertIsNone(filepath_to_uri(None))
    
  146.         self.assertEqual(
    
  147.             filepath_to_uri("upload\\чубака.mp4"),
    
  148.             "upload/%D1%87%D1%83%D0%B1%D0%B0%D0%BA%D0%B0.mp4",
    
  149.         )
    
  150.         self.assertEqual(filepath_to_uri(Path("upload/test.png")), "upload/test.png")
    
  151.         self.assertEqual(filepath_to_uri(Path("upload\\test.png")), "upload/test.png")
    
  152. 
    
  153.     def test_iri_to_uri(self):
    
  154.         cases = [
    
  155.             # Valid UTF-8 sequences are encoded.
    
  156.             ("red%09rosé#red", "red%09ros%C3%A9#red"),
    
  157.             ("/blog/for/Jürgen Münster/", "/blog/for/J%C3%BCrgen%20M%C3%BCnster/"),
    
  158.             (
    
  159.                 "locations/%s" % quote_plus("Paris & Orléans"),
    
  160.                 "locations/Paris+%26+Orl%C3%A9ans",
    
  161.             ),
    
  162.             # Reserved chars remain unescaped.
    
  163.             ("%&", "%&"),
    
  164.             ("red&♥ros%#red", "red&%E2%99%A5ros%#red"),
    
  165.             (gettext_lazy("red&♥ros%#red"), "red&%E2%99%A5ros%#red"),
    
  166.         ]
    
  167. 
    
  168.         for iri, uri in cases:
    
  169.             with self.subTest(iri):
    
  170.                 self.assertEqual(iri_to_uri(iri), uri)
    
  171. 
    
  172.                 # Test idempotency.
    
  173.                 self.assertEqual(iri_to_uri(iri_to_uri(iri)), uri)
    
  174. 
    
  175.     def test_uri_to_iri(self):
    
  176.         cases = [
    
  177.             (None, None),
    
  178.             # Valid UTF-8 sequences are decoded.
    
  179.             ("/%e2%89%Ab%E2%99%a5%E2%89%aB/", "/≫♥≫/"),
    
  180.             ("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
    
  181.             ("/%41%5a%6B/", "/AZk/"),
    
  182.             # Reserved and non-URL valid ASCII chars are not decoded.
    
  183.             ("/%25%20%02%41%7b/", "/%25%20%02A%7b/"),
    
  184.             # Broken UTF-8 sequences remain escaped.
    
  185.             ("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
    
  186.             ("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
    
  187.             ("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
    
  188.             ("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
    
  189.             (
    
  190.                 "/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
    
  191.                 "/♥♥/?utf8=%9C%93✓%9C%93",
    
  192.             ),
    
  193.         ]
    
  194. 
    
  195.         for uri, iri in cases:
    
  196.             with self.subTest(uri):
    
  197.                 self.assertEqual(uri_to_iri(uri), iri)
    
  198. 
    
  199.                 # Test idempotency.
    
  200.                 self.assertEqual(uri_to_iri(uri_to_iri(uri)), iri)
    
  201. 
    
  202.     def test_complementarity(self):
    
  203.         cases = [
    
  204.             (
    
  205.                 "/blog/for/J%C3%BCrgen%20M%C3%BCnster/",
    
  206.                 "/blog/for/J\xfcrgen%20M\xfcnster/",
    
  207.             ),
    
  208.             ("%&", "%&"),
    
  209.             ("red&%E2%99%A5ros%#red", "red&♥ros%#red"),
    
  210.             ("/%E2%99%A5%E2%99%A5/", "/♥♥/"),
    
  211.             ("/%E2%99%A5%E2%99%A5/?utf8=%E2%9C%93", "/♥♥/?utf8=✓"),
    
  212.             ("/%25%20%02%7b/", "/%25%20%02%7b/"),
    
  213.             ("/%AAd%AAj%AAa%AAn%AAg%AAo%AA/", "/%AAd%AAj%AAa%AAn%AAg%AAo%AA/"),
    
  214.             ("/%E2%99%A5%E2%E2%99%A5/", "/♥%E2♥/"),
    
  215.             ("/%E2%99%A5%E2%99%E2%99%A5/", "/♥%E2%99♥/"),
    
  216.             ("/%E2%E2%99%A5%E2%99%A5%99/", "/%E2♥♥%99/"),
    
  217.             (
    
  218.                 "/%E2%99%A5%E2%99%A5/?utf8=%9C%93%E2%9C%93%9C%93",
    
  219.                 "/♥♥/?utf8=%9C%93✓%9C%93",
    
  220.             ),
    
  221.         ]
    
  222. 
    
  223.         for uri, iri in cases:
    
  224.             with self.subTest(uri):
    
  225.                 self.assertEqual(iri_to_uri(uri_to_iri(uri)), uri)
    
  226.                 self.assertEqual(uri_to_iri(iri_to_uri(iri)), iri)
    
  227. 
    
  228.     def test_escape_uri_path(self):
    
  229.         cases = [
    
  230.             (
    
  231.                 "/;some/=awful/?path/:with/@lots/&of/+awful/chars",
    
  232.                 "/%3Bsome/%3Dawful/%3Fpath/:with/@lots/&of/+awful/chars",
    
  233.             ),
    
  234.             ("/foo#bar", "/foo%23bar"),
    
  235.             ("/foo?bar", "/foo%3Fbar"),
    
  236.         ]
    
  237.         for uri, expected in cases:
    
  238.             with self.subTest(uri):
    
  239.                 self.assertEqual(escape_uri_path(uri), expected)