-
Notifications
You must be signed in to change notification settings - Fork 904
GODRIVER-3542 use pointers to optimize equalTopologies func #2014
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -809,24 +809,27 @@ func equalTopologies(topo1, topo2 description.Topology) bool { | |
return false | ||
} | ||
|
||
topoServers := make(map[string]description.Server, len(topo1.Servers)) | ||
for _, s := range topo1.Servers { | ||
topoServers[s.Addr.String()] = s | ||
topoServers := make(map[string]*description.Server, len(topo1.Servers)) | ||
for i := range topo1.Servers { | ||
topoServers[topo1.Servers[i].Addr.String()] = &topo1.Servers[i] | ||
} | ||
|
||
otherServers := make(map[string]description.Server, len(topo2.Servers)) | ||
for _, s := range topo2.Servers { | ||
otherServers[s.Addr.String()] = s | ||
otherServers := make(map[string]*description.Server, len(topo2.Servers)) | ||
for i := range topo2.Servers { | ||
otherServers[topo2.Servers[i].Addr.String()] = &topo2.Servers[i] | ||
} | ||
|
||
if len(topoServers) != len(otherServers) { | ||
return false | ||
} | ||
|
||
for _, server := range topoServers { | ||
otherServer := otherServers[server.Addr.String()] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've understood that we can reuse already computed canonical string for the address from the map key. Benchmarking (different machine from previous results) produce slight improvement:
And with non-canonical address (
So we have pretty big additional improvement for free |
||
for addr, server := range topoServers { | ||
otherServer, ok := otherServers[addr] | ||
if !ok { | ||
return false | ||
} | ||
|
||
if !driverutil.EqualServers(server, otherServer) { | ||
if !driverutil.EqualServers(*server, *otherServer) { | ||
return false | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ import ( | |
"fmt" | ||
"io/ioutil" | ||
"path" | ||
"strconv" | ||
"sync/atomic" | ||
"testing" | ||
"time" | ||
|
@@ -1040,3 +1041,53 @@ func BenchmarkSelectServerFromDescription(b *testing.B) { | |
}) | ||
} | ||
} | ||
|
||
func TestEqualTopologies(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should extend this test with the tests noted in the comment above: assert.True(t, equalTopologies(
description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}, {
Addr: "127.0.0.1:27017",
}},
}, description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}},
},
))
assert.True(t, equalTopologies(
description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1",
}, {
Addr: "127.0.0.1:27017",
}},
}, description.Topology{
Servers: []description.Server{{
Addr: "127.0.0.1:27017",
}},
},
)) Optionally, we should parameterize this test: func TestEqualTopologies(t *testing.T) {
tests := []struct{
name string
t1, t2 description.Topology
wantEqual bool
}{
// TODO
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.wantEqual, equalTopologies(test.t1, test.t2))
})
}
} |
||
assert.True(t, equalTopologies( | ||
description.Topology{ | ||
Servers: []description.Server{{ | ||
Addr: "127.0.0.1:27017", | ||
}}, | ||
}, description.Topology{ | ||
Servers: []description.Server{{ | ||
Addr: "127.0.0.1:27017", | ||
}}, | ||
}, | ||
)) | ||
assert.False(t, equalTopologies( | ||
description.Topology{ | ||
Servers: []description.Server{{ | ||
Addr: "127.0.0.1:27017", | ||
}}, | ||
}, description.Topology{ | ||
Servers: []description.Server{{ | ||
Addr: "127.0.0.2:27017", | ||
}}, | ||
}, | ||
)) | ||
} | ||
|
||
func BenchmarkEqualTopologies(b *testing.B) { | ||
servers := make([]description.Server, 100) | ||
for i := 0; i < len(servers); i++ { | ||
servers[i] = description.Server{ | ||
Addr: address.Address("127.0.0." + strconv.Itoa(i) + ":27017"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, changing this string to
turns benchmark results into
|
||
HeartbeatInterval: time.Duration(10) * time.Second, | ||
LastWriteTime: time.Date(2017, 2, 11, 14, 0, 0, 0, time.UTC), | ||
LastUpdateTime: time.Date(2017, 2, 11, 14, 0, 2, 0, time.UTC), | ||
Kind: description.ServerKindMongos, | ||
WireVersion: &description.VersionRange{Min: 6, Max: 21}, | ||
} | ||
} | ||
desc := description.Topology{ | ||
Servers: servers, | ||
} | ||
|
||
b.ResetTimer() | ||
b.RunParallel(func(p *testing.PB) { | ||
b.ReportAllocs() | ||
for p.Next() { | ||
_ = equalTopologies(desc, desc) | ||
} | ||
}) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
equalTopologies really isn't on a per-operation hot path. From what I can tell, it's only run in
processSRVResults
andapply
. So under normal conditions this function gets called when heartbeat comes back (default 10s) and when an SRV record is rescanned (default 60s). If allocations are a legitimate concern I suggest we consider opting for a 0-allocation O(n^2) solution:This should lower future maintenance burden concerning pointers, I'm specifically thinking of slice reallocations. What are your thoughts?
Consider updating the benchmark to iterate over three different sizes: 10, 100, 1000 when comparing this solution to the pointer solution.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This method you suggested has a huge optimization
which is applicable to any other implementation, but breaks these now passing tests:
Also your method does no canonical addresses via
String()
that seems to be the most expensive operation in this method.So this test fails:
With all of the above your method gives me on the current benchmark:
For 100 distinct servers in two equal topologies.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If zero-allocation effect should be tested it can be done this way:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, I think the O(n^2) solution is still valid. Here is a polished version:
We are over 10x the ns/op in the benchmark as a trade-off for 0 allocations:
@qingyang-hu , @isopov What are your thoughts? Again, I only expect this function to be called when a heartbeat comes back (default 10s) and when an SRV record is rescanned (default 60s).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree that we don't have to be aggressive because this function is not on a hot path.
As a tradeoff, we can reduce the allocations by half from @isopov's solution once we determine the number of duplicates in
topo2
.For example:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@isopov? O(n^2) or uint are the team preferences. Do you have thoughts? Here are the current results:
Pointer:
O(n^2):
uint: