Skip to content

GODRIVER-3542 use pointers to optimize equalTopologies func #2014

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions x/mongo/driver/topology/topology.go
Original file line number Diff line number Diff line change
Expand Up @@ -809,24 +809,27 @@ func equalTopologies(topo1, topo2 description.Topology) bool {
return false
}

topoServers := make(map[string]description.Server, len(topo1.Servers))
for _, s := range topo1.Servers {
topoServers[s.Addr.String()] = s
topoServers := make(map[string]*description.Server, len(topo1.Servers))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

equalTopologies really isn't on a per-operation hot path. From what I can tell, it's only run in processSRVResults and apply. So under normal conditions this function gets called when heartbeat comes back (default 10s) and when an SRV record is rescanned (default 60s). If allocations are a legitimate concern I suggest we consider opting for a 0-allocation O(n^2) solution:

func equalTopologies(t1, t2 description.Topology) bool {
	if t1.Kind != t2.Kind {
		return false
	}

	if len(t1.Servers) != len(t2.Servers) {
		return false
	}

	for _, s1 := range t1.Servers {
		found := false
		for _, s2 := range t2.Servers {
			if s1.Addr == s2.Addr && driverutil.EqualServers(s1, s2) {
				found = true
				break
			}
		}
		if !found {
			return false
		}
	}

	return true
}

This should lower future maintenance burden concerning pointers, I'm specifically thinking of slice reallocations. What are your thoughts?

Consider updating the benchmark to iterate over three different sizes: 10, 100, 1000 when comparing this solution to the pointer solution.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method you suggested has a huge optimization

	if len(t1.Servers) != len(t2.Servers) {
		return false
	}

which is applicable to any other implementation, but breaks these now passing tests:

	assert.True(t, equalTopologies(
		description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}, {
				Addr: "127.0.0.1:27017",
			}},
		}, description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}},
		},
	))
	assert.True(t, equalTopologies(
		description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1",
			}, {
				Addr: "127.0.0.1:27017",
			}},
		}, description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}},
		},
	))

Also your method does no canonical addresses via String() that seems to be the most expensive operation in this method.
So this test fails:

	assert.True(t, equalTopologies(
		description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1",
			}},
		}, description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}},
		},
	))

With all of the above your method gives me on the current benchmark:

BenchmarkEqualTopologies
BenchmarkEqualTopologies-32    	  605614	      1888 ns/op	       0 B/op	       0 allocs/op

For 100 distinct servers in two equal topologies.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If zero-allocation effect should be tested it can be done this way:

func BenchmarkEqualTopologies(b *testing.B) {
	for _, bsize := range []int{10, 100, 1000} {
		b.Run(strconv.Itoa(bsize), benchmarkEqualTopologiesWithSize(bsize))
	}
}

func benchmarkEqualTopologiesWithSize(size int) func(b *testing.B) {
	return func(b *testing.B) {
		servers := make([]description.Server, size)
		for i := 0; i < len(servers); i++ {
			servers[i] = description.Server{
				Addr:              address.Address("127.0.0." + strconv.Itoa(i) + ":27017"),
				HeartbeatInterval: time.Duration(10) * time.Second,
				LastWriteTime:     time.Date(2017, 2, 11, 14, 0, 0, 0, time.UTC),
				LastUpdateTime:    time.Date(2017, 2, 11, 14, 0, 2, 0, time.UTC),
				Kind:              description.ServerKindMongos,
				WireVersion:       &description.VersionRange{Min: 6, Max: 21},
			}
		}
		desc := description.Topology{
			Servers: servers,
		}

		b.ResetTimer()
		b.RunParallel(func(p *testing.PB) {
			b.ReportAllocs()
			for p.Next() {
				_ = equalTopologies(desc, desc)
			}
		})
	}
}

func TestEqualTopologiesZeroAllocations(t *testing.T) {
	result := testing.Benchmark(benchmarkEqualTopologiesWithSize(10))
	assert.EqualValues(t, 0, result.AllocsPerOp())
}

Copy link
Member

@prestonvasquez prestonvasquez Apr 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I think the O(n^2) solution is still valid. Here is a polished version:

func equalTopologies(t1, t2 description.Topology) bool {
	if t1.Kind != t2.Kind {
		return false
	}

	// Every server in t1 has to appear in t2:
	for _, s1 := range t1.Servers {
		addr1 := s1.Addr.String()
		found := false
		for _, s2 := range t2.Servers {
			if addr1 == s2.Addr.String() && driverutil.EqualServers(s1, s2) {
				found = true
				break
			}
		}
		if !found {
			return false
		}
	}

	// Every server in t2 has to appear in t1:
	for _, s2 := range t2.Servers {
		addr2 := s2.Addr.String()
		found := false
		for _, s1 := range t1.Servers {
			if s1.Addr.String() == addr2 && driverutil.EqualServers(s1, s2) {
				found = true
				break
			}
		}
		if !found {
			return false
		}
	}

	return true
}

We are over 10x the ns/op in the benchmark as a trade-off for 0 allocations:

BenchmarkEqualTopologies-10        21643             55066 ns/op               3 B/op          0 allocs/op
PASS
ok      go.mongodb.org/mongo-driver/v2/x/mongo/driver/topology  32.328s

@qingyang-hu , @isopov What are your thoughts? Again, I only expect this function to be called when a heartbeat comes back (default 10s) and when an SRV record is rescanned (default 60s).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that we don't have to be aggressive because this function is not on a hot path.

As a tradeoff, we can reduce the allocations by half from @isopov's solution once we determine the number of duplicates in topo2.
For example:

func equalTopologies(topo1, topo2 description.Topology) bool {
	if topo1.Kind != topo2.Kind {
		return false
	}

	topoServers := make(map[string]uint, len(topo1.Servers)) // Use a unit map value for index in case it exceeds the limit with offset.
	for i := range topo1.Servers {
		topoServers[topo1.Servers[i].Addr.String()] = uint(i)
	}

	dupCnt := 0
	offset := uint(len(topo1.Servers))
	for i := 0; i < len(topo2.Servers); i++ {
		addr := topo2.Servers[i].Addr.String()
		serverIdx, ok := topoServers[addr]
		if !ok {
			return false
		}

		if serverIdx >= offset {
			dupCnt++
		} else {
			topoServers[addr] += offset // Add an offset to mark a duplicate.
			if !driverutil.EqualServers(topo1.Servers[serverIdx], topo2.Servers[i]) {
				return false
			}
		}
	}
	return len(topoServers) == len(topo2.Servers)-dupCnt
}

Copy link
Member

@prestonvasquez prestonvasquez Apr 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@isopov? O(n^2) or uint are the team preferences. Do you have thoughts? Here are the current results:

Pointer:

BenchmarkEqualTopologies/10-10   2732804               412.5 ns/op           840 B/op          2 allocs/op
BenchmarkEqualTopologies/100-10                   308890              4178 ns/op            8362 B/op          4 allocs/op
BenchmarkEqualTopologies/1000-10                   28926             41944 ns/op          114739 B/op          4 allocs/op
PASS
ok      go.mongodb.org/mongo-driver/v2/x/mongo/driver/topology  35.483s

O(n^2):

BenchmarkEqualTopologies/10-10   1641102               730.9 ns/op             0 B/op          0 allocs/op
BenchmarkEqualTopologies/100-10                    21530             54448 ns/op               2 B/op          0 allocs/op
BenchmarkEqualTopologies/1000-10                     193           5718903 ns/op             304 B/op          2 allocs/op
PASS
ok      go.mongodb.org/mongo-driver/v2/x/mongo/driver/topology  34.935s

uint:

BenchmarkEqualTopologies/10-10   4112641               291.5 ns/op           420 B/op          1 allocs/op
BenchmarkEqualTopologies/100-10                   412269              2842 ns/op            4181 B/op          2 allocs/op
BenchmarkEqualTopologies/1000-10                   40996             29112 ns/op           57369 B/op          2 allocs/op
PASS
ok      go.mongodb.org/mongo-driver/v2/x/mongo/driver/topology  24.280s

for i := range topo1.Servers {
topoServers[topo1.Servers[i].Addr.String()] = &topo1.Servers[i]
}

otherServers := make(map[string]description.Server, len(topo2.Servers))
for _, s := range topo2.Servers {
otherServers[s.Addr.String()] = s
otherServers := make(map[string]*description.Server, len(topo2.Servers))
for i := range topo2.Servers {
otherServers[topo2.Servers[i].Addr.String()] = &topo2.Servers[i]
}

if len(topoServers) != len(otherServers) {
return false
}

for _, server := range topoServers {
otherServer := otherServers[server.Addr.String()]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've understood that we can reuse already computed canonical string for the address from the map key.

Benchmarking (different machine from previous results) produce slight improvement:

  • Before
BenchmarkEqualTopologies
BenchmarkEqualTopologies-10    	  281943	      4715 ns/op	    6992 B/op	       6 allocs/op
PASS
  • After
BenchmarkEqualTopologies
BenchmarkEqualTopologies-10    	  317918	      4228 ns/op	    6992 B/op	       6 allocs/op
PASS

And with non-canonical address (address.Address("127.0.0." + strconv.Itoa(i)))

  • Before
BenchmarkEqualTopologies
BenchmarkEqualTopologies-10    	   66252	     17721 ns/op	   35793 B/op	     906 allocs/op
PASS
  • After
BenchmarkEqualTopologies
BenchmarkEqualTopologies-10    	   91142	     12960 ns/op	   26193 B/op	     606 allocs/op
PASS

So we have pretty big additional improvement for free

for addr, server := range topoServers {
otherServer, ok := otherServers[addr]
if !ok {
return false
}

if !driverutil.EqualServers(server, otherServer) {
if !driverutil.EqualServers(*server, *otherServer) {
return false
}
}
Expand Down
51 changes: 51 additions & 0 deletions x/mongo/driver/topology/topology_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"fmt"
"io/ioutil"
"path"
"strconv"
"sync/atomic"
"testing"
"time"
Expand Down Expand Up @@ -1040,3 +1041,53 @@ func BenchmarkSelectServerFromDescription(b *testing.B) {
})
}
}

func TestEqualTopologies(t *testing.T) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should extend this test with the tests noted in the comment above:

	assert.True(t, equalTopologies(
		description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}, {
				Addr: "127.0.0.1:27017",
			}},
		}, description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}},
		},
	))
	assert.True(t, equalTopologies(
		description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1",
			}, {
				Addr: "127.0.0.1:27017",
			}},
		}, description.Topology{
			Servers: []description.Server{{
				Addr: "127.0.0.1:27017",
			}},
		},
	))

Optionally, we should parameterize this test:

func TestEqualTopologies(t *testing.T) {
	tests := []struct{
		name string 
		t1, t2 description.Topology
		wantEqual bool
	}{
		// TODO
	}

	for _, test := range tests {
		t.Run(test.name, func(t *testing.T) {
			assert.Equal(t, test.wantEqual, equalTopologies(test.t1, test.t2))
		})
	}
}

assert.True(t, equalTopologies(
description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}},
}, description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}},
},
))
assert.False(t, equalTopologies(
description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}},
}, description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.2:27017",
}},
},
))
}

func BenchmarkEqualTopologies(b *testing.B) {
servers := make([]description.Server, 100)
for i := 0; i < len(servers); i++ {
servers[i] = description.Server{
Addr: address.Address("127.0.0." + strconv.Itoa(i) + ":27017"),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, changing this string to

Addr:              address.Address("127.0.0." + strconv.Itoa(i)),

turns benchmark results into

BenchmarkEqualTopologies
BenchmarkEqualTopologies-32    	  133854	      8716 ns/op	   35793 B/op	     906 allocs/op
PASS

HeartbeatInterval: time.Duration(10) * time.Second,
LastWriteTime: time.Date(2017, 2, 11, 14, 0, 0, 0, time.UTC),
LastUpdateTime: time.Date(2017, 2, 11, 14, 0, 2, 0, time.UTC),
Kind: description.ServerKindMongos,
WireVersion: &description.VersionRange{Min: 6, Max: 21},
}
}
desc := description.Topology{
Servers: servers,
}

b.ResetTimer()
b.RunParallel(func(p *testing.PB) {
b.ReportAllocs()
for p.Next() {
_ = equalTopologies(desc, desc)
}
})
}