Skip to content

PYTHON-5306 - Fix use of public MongoClient attributes before connection #2285

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Apr 21, 2025
41 changes: 33 additions & 8 deletions pymongo/asynchronous/mongo_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@
)
from pymongo.read_preferences import ReadPreference, _ServerMode
from pymongo.results import ClientBulkWriteResult
from pymongo.server_description import ServerDescription
from pymongo.server_selectors import writable_server_selector
from pymongo.server_type import SERVER_TYPE
from pymongo.topology_description import TOPOLOGY_TYPE, TopologyDescription
Expand Down Expand Up @@ -767,6 +768,7 @@ def __init__(
self._timeout: float | None = None
self._topology_settings: TopologySettings = None # type: ignore[assignment]
self._event_listeners: _EventListeners | None = None
self._initial_topology_id: Optional[ObjectId] = None

# _pool_class, _monitor_class, and _condition_class are for deep
# customization of PyMongo, e.g. Motor.
Expand All @@ -779,7 +781,7 @@ def __init__(
keyword_opts["document_class"] = doc_class
self._resolve_srv_info: dict[str, Any] = {"keyword_opts": keyword_opts}

seeds = set()
self._seeds = set()
is_srv = False
username = None
password = None
Expand All @@ -804,18 +806,18 @@ def __init__(
srv_max_hosts=srv_max_hosts,
)
is_srv = entity.startswith(SRV_SCHEME)
seeds.update(res["nodelist"])
self._seeds.update(res["nodelist"])
username = res["username"] or username
password = res["password"] or password
dbase = res["database"] or dbase
opts = res["options"]
fqdn = res["fqdn"]
else:
seeds.update(split_hosts(entity, self._port))
if not seeds:
self._seeds.update(split_hosts(entity, self._port))
if not self._seeds:
raise ConfigurationError("need to specify at least one host")

for hostname in [node[0] for node in seeds]:
for hostname in [node[0] for node in self._seeds]:
if _detect_external_db(hostname):
break

Expand All @@ -838,7 +840,7 @@ def __init__(
srv_service_name = opts.get("srvServiceName", common.SRV_SERVICE_NAME)

srv_max_hosts = srv_max_hosts or opts.get("srvmaxhosts")
opts = self._normalize_and_validate_options(opts, seeds)
opts = self._normalize_and_validate_options(opts, self._seeds)

# Username and password passed as kwargs override user info in URI.
username = opts.get("username", username)
Expand All @@ -857,7 +859,7 @@ def __init__(
"username": username,
"password": password,
"dbase": dbase,
"seeds": seeds,
"seeds": self._seeds,
"fqdn": fqdn,
"srv_service_name": srv_service_name,
"pool_class": pool_class,
Expand All @@ -874,7 +876,7 @@ def __init__(
)

if not is_srv:
self._init_based_on_options(seeds, srv_max_hosts, srv_service_name)
self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name)

self._opened = False
self._closed = False
Expand Down Expand Up @@ -975,6 +977,7 @@ def _init_based_on_options(
srv_service_name=srv_service_name,
srv_max_hosts=srv_max_hosts,
server_monitoring_mode=self._options.server_monitoring_mode,
topology_id=self._initial_topology_id,
)
if self._options.auto_encryption_opts:
from pymongo.asynchronous.encryption import _Encrypter
Expand Down Expand Up @@ -1205,6 +1208,18 @@ def topology_description(self) -> TopologyDescription:

.. versionadded:: 4.0
"""
if self._topology is None:
Copy link
Contributor Author

@NoahStapp NoahStapp Apr 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the client has not connected yet, return a TopologyDescription with placeholder servers derived from the seeds passed to the client's constructor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should make sure the returned TopologyDescription has the same _topology_id. Otherwise it could be confusing because _topology_id will change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also can you give an example of what topology_description looks like in the connect=False srv case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<TopologyDescription id: 67ffb68a6e12ca176991f521, topology_type: Unknown, servers: [<ServerDescription (('test1.test.build.10gen.cc', None), 27017) server_type: Unknown, rtt: None>]>

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please include an example of this new/old behavior in the Jira ticket when closing.

servers = {(host, port): ServerDescription((host, port)) for host, port in self._seeds}
td = TopologyDescription(
TOPOLOGY_TYPE.Unknown,
servers,
None,
None,
None,
TopologySettings(),
)
self._initial_topology_id = td._topology_settings._topology_id
return td
return self._topology.description

@property
Expand All @@ -1218,6 +1233,8 @@ def nodes(self) -> FrozenSet[_Address]:
to any servers, or a network partition causes it to lose connection
to all servers.
"""
if self._topology is None:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the docstring defines, we expect nodes to return an empty set if the client has not yet connected.

return frozenset()
description = self._topology.description
return frozenset(s.address for s in description.known_servers)

Expand Down Expand Up @@ -1576,6 +1593,8 @@ async def address(self) -> Optional[tuple[str, int]]:

.. versionadded:: 3.0
"""
if self._topology is None:
await self._get_topology()
topology_type = self._topology._description.topology_type
if (
topology_type == TOPOLOGY_TYPE.Sharded
Expand All @@ -1598,6 +1617,8 @@ async def primary(self) -> Optional[tuple[str, int]]:
.. versionadded:: 3.0
AsyncMongoClient gained this property in version 3.0.
"""
if self._topology is None:
await self._get_topology()
return await self._topology.get_primary() # type: ignore[return-value]

@property
Expand All @@ -1611,6 +1632,8 @@ async def secondaries(self) -> set[_Address]:
.. versionadded:: 3.0
AsyncMongoClient gained this property in version 3.0.
"""
if self._topology is None:
await self._get_topology()
return await self._topology.get_secondaries()

@property
Expand All @@ -1621,6 +1644,8 @@ async def arbiters(self) -> set[_Address]:
connected to a replica set, there are no arbiters, or this client was
created without the `replicaSet` option.
"""
if self._topology is None:
await self._get_topology()
return await self._topology.get_arbiters()

@property
Expand Down
7 changes: 5 additions & 2 deletions pymongo/asynchronous/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(
srv_service_name: str = common.SRV_SERVICE_NAME,
srv_max_hosts: int = 0,
server_monitoring_mode: str = common.SERVER_MONITORING_MODE,
topology_id: Optional[ObjectId] = None,
):
"""Represent MongoClient's configuration.

Expand Down Expand Up @@ -78,8 +79,10 @@ def __init__(
self._srv_service_name = srv_service_name
self._srv_max_hosts = srv_max_hosts or 0
self._server_monitoring_mode = server_monitoring_mode

self._topology_id = ObjectId()
if topology_id is not None:
self._topology_id = topology_id
else:
self._topology_id = ObjectId()
# Store the allocation traceback to catch unclosed clients in the
# test suite.
self._stack = "".join(traceback.format_stack()[:-2])
Expand Down
41 changes: 33 additions & 8 deletions pymongo/synchronous/mongo_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
)
from pymongo.read_preferences import ReadPreference, _ServerMode
from pymongo.results import ClientBulkWriteResult
from pymongo.server_description import ServerDescription
from pymongo.server_selectors import writable_server_selector
from pymongo.server_type import SERVER_TYPE
from pymongo.synchronous import client_session, database, uri_parser
Expand Down Expand Up @@ -765,6 +766,7 @@ def __init__(
self._timeout: float | None = None
self._topology_settings: TopologySettings = None # type: ignore[assignment]
self._event_listeners: _EventListeners | None = None
self._initial_topology_id: Optional[ObjectId] = None

# _pool_class, _monitor_class, and _condition_class are for deep
# customization of PyMongo, e.g. Motor.
Expand All @@ -777,7 +779,7 @@ def __init__(
keyword_opts["document_class"] = doc_class
self._resolve_srv_info: dict[str, Any] = {"keyword_opts": keyword_opts}

seeds = set()
self._seeds = set()
is_srv = False
username = None
password = None
Expand All @@ -802,18 +804,18 @@ def __init__(
srv_max_hosts=srv_max_hosts,
)
is_srv = entity.startswith(SRV_SCHEME)
seeds.update(res["nodelist"])
self._seeds.update(res["nodelist"])
username = res["username"] or username
password = res["password"] or password
dbase = res["database"] or dbase
opts = res["options"]
fqdn = res["fqdn"]
else:
seeds.update(split_hosts(entity, self._port))
if not seeds:
self._seeds.update(split_hosts(entity, self._port))
if not self._seeds:
raise ConfigurationError("need to specify at least one host")

for hostname in [node[0] for node in seeds]:
for hostname in [node[0] for node in self._seeds]:
if _detect_external_db(hostname):
break

Expand All @@ -836,7 +838,7 @@ def __init__(
srv_service_name = opts.get("srvServiceName", common.SRV_SERVICE_NAME)

srv_max_hosts = srv_max_hosts or opts.get("srvmaxhosts")
opts = self._normalize_and_validate_options(opts, seeds)
opts = self._normalize_and_validate_options(opts, self._seeds)

# Username and password passed as kwargs override user info in URI.
username = opts.get("username", username)
Expand All @@ -855,7 +857,7 @@ def __init__(
"username": username,
"password": password,
"dbase": dbase,
"seeds": seeds,
"seeds": self._seeds,
"fqdn": fqdn,
"srv_service_name": srv_service_name,
"pool_class": pool_class,
Expand All @@ -872,7 +874,7 @@ def __init__(
)

if not is_srv:
self._init_based_on_options(seeds, srv_max_hosts, srv_service_name)
self._init_based_on_options(self._seeds, srv_max_hosts, srv_service_name)

self._opened = False
self._closed = False
Expand Down Expand Up @@ -973,6 +975,7 @@ def _init_based_on_options(
srv_service_name=srv_service_name,
srv_max_hosts=srv_max_hosts,
server_monitoring_mode=self._options.server_monitoring_mode,
topology_id=self._initial_topology_id,
)
if self._options.auto_encryption_opts:
from pymongo.synchronous.encryption import _Encrypter
Expand Down Expand Up @@ -1203,6 +1206,18 @@ def topology_description(self) -> TopologyDescription:

.. versionadded:: 4.0
"""
if self._topology is None:
servers = {(host, port): ServerDescription((host, port)) for host, port in self._seeds}
td = TopologyDescription(
TOPOLOGY_TYPE.Unknown,
servers,
None,
None,
None,
TopologySettings(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is missing certain settings that are publicly accessible, like TopologyDescription.heartbeat_frequency.

In the ticket I suggested making the MongoClient constructor always create the Topology instance, and then mutate it later once the SRV info is resolved. Did you consider that approach? It would avoid the problem of attempting to mock out these attributes and avoid the type-ignores since everything would always be non-None.

(It could be that this approach is a better fit, I just want to know if an alternative was considered.)

Copy link
Contributor Author

@NoahStapp NoahStapp Apr 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I started with the approach you suggested, but switched to the current implementation for a few reasons:

  1. topology_description is the only public attribute that requires changes of meaningful size. The rest either already do IO or are nodes, which has a trivial workaround.
  2. We don't currently mutate Topology instances or TopologySettings instances directly. To do so, we'd need to make more code changes and be confident that we haven't overlooked any problematic edge cases.
  3. I foresee more potential bugs/confusion caused by introducing a new state specifically for SRV Topologies where we know that the Topology we currently have is not actually correct, and will be mutated upon SRV resolution.

This approach has fewer changes that could cause errors, especially since it isolates changes to resolving the specific issue at hand.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, could you update TopologySettings() here to be more accurate then?

Copy link
Contributor Author

@NoahStapp NoahStapp Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TopologySettings sets defaults for each attribute, is passing no arguments to it invalid?

Copy link
Member

@ShaneHarvey ShaneHarvey Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like I said above, client.topology_description.heartbeat_frequency is one example of an attribute we need to set here to avoid heartbeat_frequency appearing to change from one value to another. There may be others.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For example:

>>> client = MongoClient(..., connect=False, heartbeatFrequencyMS=99999)
>>> client.topology_description.heartbeat_frequency
10
>>> client.admin.command("ping")
...
>>> client.topology_description.heartbeat_frequency
99.999

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I misunderstood what you meant. The example makes it clear, thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we always initialize a TopologySettings in the constructor. If SRV is enabled, we create a new one after resolution but keep the existing topology id.

)
self._initial_topology_id = td._topology_settings._topology_id
return td
return self._topology.description

@property
Expand All @@ -1216,6 +1231,8 @@ def nodes(self) -> FrozenSet[_Address]:
to any servers, or a network partition causes it to lose connection
to all servers.
"""
if self._topology is None:
return frozenset()
description = self._topology.description
return frozenset(s.address for s in description.known_servers)

Expand Down Expand Up @@ -1570,6 +1587,8 @@ def address(self) -> Optional[tuple[str, int]]:

.. versionadded:: 3.0
"""
if self._topology is None:
self._get_topology()
topology_type = self._topology._description.topology_type
if (
topology_type == TOPOLOGY_TYPE.Sharded
Expand All @@ -1592,6 +1611,8 @@ def primary(self) -> Optional[tuple[str, int]]:
.. versionadded:: 3.0
MongoClient gained this property in version 3.0.
"""
if self._topology is None:
self._get_topology()
return self._topology.get_primary() # type: ignore[return-value]

@property
Expand All @@ -1605,6 +1626,8 @@ def secondaries(self) -> set[_Address]:
.. versionadded:: 3.0
MongoClient gained this property in version 3.0.
"""
if self._topology is None:
self._get_topology()
return self._topology.get_secondaries()

@property
Expand All @@ -1615,6 +1638,8 @@ def arbiters(self) -> set[_Address]:
connected to a replica set, there are no arbiters, or this client was
created without the `replicaSet` option.
"""
if self._topology is None:
self._get_topology()
return self._topology.get_arbiters()

@property
Expand Down
7 changes: 5 additions & 2 deletions pymongo/synchronous/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def __init__(
srv_service_name: str = common.SRV_SERVICE_NAME,
srv_max_hosts: int = 0,
server_monitoring_mode: str = common.SERVER_MONITORING_MODE,
topology_id: Optional[ObjectId] = None,
):
"""Represent MongoClient's configuration.

Expand Down Expand Up @@ -78,8 +79,10 @@ def __init__(
self._srv_service_name = srv_service_name
self._srv_max_hosts = srv_max_hosts or 0
self._server_monitoring_mode = server_monitoring_mode

self._topology_id = ObjectId()
if topology_id is not None:
self._topology_id = topology_id
else:
self._topology_id = ObjectId()
# Store the allocation traceback to catch unclosed clients in the
# test suite.
self._stack = "".join(traceback.format_stack()[:-2])
Expand Down
48 changes: 48 additions & 0 deletions test/asynchronous/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,54 @@ async def test_init_disconnected_with_auth(self):
with self.assertRaises(ConnectionFailure):
await c.pymongo_test.test.find_one()

@async_client_context.require_replica_set
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we can remove require_replica_set.

Copy link
Contributor Author

@NoahStapp NoahStapp Apr 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see. "mongodb+srv://test1.test.build.10gen.cc" returns 2 hosts so it can't connect to standalone. It can connect to Mongos though so we should run this test on both replica set and sharded.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

require_no_standalone, then?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Load balancers probably dont work here so it would need to be @require_no_standalone+@require_no_load_balancer+@require_no_load_balancer

@async_client_context.require_tls
async def test_init_disconnected_with_srv(self):
c = await self.async_rs_or_single_client(
"mongodb+srv://test1.test.build.10gen.cc", connect=False, tlsInsecure=True
)
# nodes returns an empty set if not connected
self.assertEqual(c.nodes, frozenset())
# topology_description returns the initial seed description if not connected
topology_description = c.topology_description
self.assertEqual(topology_description.topology_type, TOPOLOGY_TYPE.Unknown)
self.assertEqual(
{
("test1.test.build.10gen.cc", None): ServerDescription(
("test1.test.build.10gen.cc", None)
)
},
topology_description.server_descriptions(),
)

# address causes client to block until connected
self.assertIsNotNone(await c.address)
# Initial seed topology and connected topology have the same ID
self.assertEqual(
c._topology._topology_id, topology_description._topology_settings._topology_id
)

c = await self.async_rs_or_single_client(
"mongodb+srv://test1.test.build.10gen.cc", connect=False, tlsInsecure=True
)
# primary causes client to block until connected
await c.primary
self.assertIsNotNone(c._topology)

c = await self.async_rs_or_single_client(
"mongodb+srv://test1.test.build.10gen.cc", connect=False, tlsInsecure=True
)
# secondaries causes client to block until connected
await c.secondaries
self.assertIsNotNone(c._topology)

c = await self.async_rs_or_single_client(
"mongodb+srv://test1.test.build.10gen.cc", connect=False, tlsInsecure=True
)
# arbiters causes client to block until connected
await c.arbiters
self.assertIsNotNone(c._topology)

async def test_equality(self):
seed = "{}:{}".format(*list(self.client._topology_settings.seeds)[0])
c = await self.async_rs_or_single_client(seed, connect=False)
Expand Down
Loading
Loading