Skip to content

Commit 4e999e0

Browse files
committed
Set a library-specific user agent when automatically retrieving $refs.
This behavior is (already) deprecated, but oddly there are "real world" webservers which seem to have allowed the `requests` default user agent (i.e. respond correctly) but not the `urllib` default vague `Python/3.x` one (i.e. respond with a 403 or whatever). Since it seems polite to set this anyhow, we may as well fix behavior for anyone who happens to be encountering such a webserver.
1 parent 4fdc365 commit 4e999e0

File tree

3 files changed

+58
-5
lines changed

3 files changed

+58
-5
lines changed

Diff for: CHANGELOG.rst

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
v4.18.6
2+
=======
3+
4+
* Set a ``jsonschema`` specific user agent when automatically retrieving remote references (which is deprecated).
5+
16
v4.18.5
27
=======
38

Diff for: jsonschema/tests/test_deprecations.py

+49-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1-
from unittest import TestCase
2-
import importlib
1+
from contextlib import contextmanager
2+
from io import BytesIO
3+
from unittest import TestCase, mock
4+
import importlib.metadata
5+
import json
36
import subprocess
47
import sys
8+
import urllib.request
59

610
import referencing.exceptions
711

@@ -16,8 +20,9 @@ def test_version(self):
1620

1721
message = "Accessing jsonschema.__version__ is deprecated"
1822
with self.assertWarnsRegex(DeprecationWarning, message) as w:
19-
from jsonschema import __version__ # noqa: F401
23+
from jsonschema import __version__
2024

25+
self.assertEqual(__version__, importlib.metadata.version("jsonschema"))
2126
self.assertEqual(w.filename, __file__)
2227

2328
def test_validators_ErrorTree(self):
@@ -357,3 +362,44 @@ def test_cli(self):
357362
capture_output=True,
358363
)
359364
self.assertIn(b"The jsonschema CLI is deprecated ", process.stderr)
365+
366+
def test_automatic_remote_retrieval(self):
367+
"""
368+
Automatic retrieval of remote references is deprecated as of v4.18.0.
369+
"""
370+
ref = "http://bar#/$defs/baz"
371+
schema = {
372+
"$schema": "https://json-schema.org/draft/2020-12/schema",
373+
"$defs": {"baz": {"type": "integer"}},
374+
}
375+
376+
if "requests" in sys.modules: # pragma: no cover
377+
self.addCleanup(
378+
sys.modules.__setitem__, "requests", sys.modules["requests"],
379+
)
380+
sys.modules["requests"] = None
381+
382+
@contextmanager
383+
def fake_urlopen(request):
384+
self.assertIsInstance(request, urllib.request.Request)
385+
self.assertEqual(request.full_url, "http://bar")
386+
387+
# Ha ha urllib.request.Request "normalizes" header names and
388+
# Request.get_header does not also normalize them...
389+
(header, value), = request.header_items()
390+
self.assertEqual(header.lower(), "user-agent")
391+
self.assertEqual(
392+
value, "python-jsonschema (deprecated $ref resolution)",
393+
)
394+
yield BytesIO(json.dumps(schema).encode("utf8"))
395+
396+
validator = validators.Draft202012Validator({"$ref": ref})
397+
398+
message = "Automatically retrieving remote references "
399+
patch = mock.patch.object(urllib.request, "urlopen", new=fake_urlopen)
400+
401+
with patch, self.assertWarnsRegex(DeprecationWarning, message):
402+
self.assertEqual(
403+
(validator.is_valid({}), validator.is_valid(37)),
404+
(False, True),
405+
)

Diff for: jsonschema/validators.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,10 @@ def _validates(cls):
103103

104104

105105
def _warn_for_remote_retrieve(uri: str):
106-
from urllib.request import urlopen
107-
with urlopen(uri) as response:
106+
from urllib.request import Request, urlopen
107+
headers = {"User-Agent": "python-jsonschema (deprecated $ref resolution)"}
108+
request = Request(uri, headers=headers)
109+
with urlopen(request) as response:
108110
warnings.warn(
109111
"Automatically retrieving remote references can be a security "
110112
"vulnerability and is discouraged by the JSON Schema "

0 commit comments

Comments
 (0)