Skip to content

Commit 29f1ea3

Browse files
committed
eof: Implement block deduplicator for EOF. Add simple unit tests.
1 parent 46ead2e commit 29f1ea3

File tree

9 files changed

+466
-85
lines changed

9 files changed

+466
-85
lines changed

Diff for: libevmasm/Assembly.cpp

+12-7
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,14 @@
2222

2323
#include <libevmasm/Assembly.h>
2424

25+
#include <libevmasm/BlockDeduplicator.h>
26+
#include <libevmasm/BlockSorter.h>
2527
#include <libevmasm/CommonSubexpressionEliminator.h>
28+
#include <libevmasm/ConstantOptimiser.h>
2629
#include <libevmasm/ControlFlowGraph.h>
27-
#include <libevmasm/PeepholeOptimiser.h>
2830
#include <libevmasm/Inliner.h>
2931
#include <libevmasm/JumpdestRemover.h>
30-
#include <libevmasm/BlockDeduplicator.h>
31-
#include <libevmasm/ConstantOptimiser.h>
32+
#include <libevmasm/PeepholeOptimiser.h>
3233

3334
#include <liblangutil/CharStream.h>
3435
#include <liblangutil/Exceptions.h>
@@ -796,6 +797,10 @@ std::map<u256, u256> const& Assembly::optimiseInternal(
796797
if (m_tagReplacements)
797798
return *m_tagReplacements;
798799

800+
if (m_eofVersion.has_value())
801+
for (auto& codeSection: m_codeSections)
802+
BlockSorter{codeSection.items}.sort();
803+
799804
// Run optimisation for sub-assemblies.
800805
// TODO: verify and double-check this for EOF.
801806
for (size_t subId = 0; subId < m_subs.size(); ++subId)
@@ -856,12 +861,12 @@ std::map<u256, u256> const& Assembly::optimiseInternal(
856861
}
857862
}
858863

859-
// This only modifies PushTags, we have to run again to actually remove code.
860-
// TODO: implement for EOF.
861-
if (_settings.runDeduplicate && !m_eofVersion.has_value())
864+
// For legacy this only modifies PushTags, we have to run again to actually remove code.
865+
// For EOF it modifies RJUMP and RJUMPI tags.
866+
if (_settings.runDeduplicate)
862867
for (auto& section: m_codeSections)
863868
{
864-
BlockDeduplicator deduplicator{section.items};
869+
BlockDeduplicator deduplicator{section.items, m_eofVersion};
865870
if (deduplicator.deduplicate())
866871
{
867872
for (auto const& replacement: deduplicator.replacedTags())

Diff for: libevmasm/BlockDeduplicator.cpp

+120-39
Original file line numberDiff line numberDiff line change
@@ -41,55 +41,130 @@ bool BlockDeduplicator::deduplicate()
4141

4242
// Virtual tag that signifies "the current block" and which is used to optimise loops.
4343
// We abort if this virtual tag actually exists.
44-
AssemblyItem pushSelf{PushTag, u256(-4)};
45-
if (
46-
std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()) ||
47-
std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag())
48-
)
44+
auto const virtualTagData = u256(-4);
45+
AssemblyItem pushSelf{PushTag, u256(virtualTagData)};
46+
47+
// There is no PushTag in EOF context but relative jumps have their destination stored in AssmblyItem data.
48+
// We need to virtually replace all destinations of these r-jumps if they point to the _item Tag data.
49+
AssemblyItem rjumpSelf{RelativeJump, Instruction::RJUMP, virtualTagData};
50+
AssemblyItem rjumpiSelf{ConditionalRelativeJump, Instruction::RJUMPI, virtualTagData};
51+
52+
if (std::count(m_items.cbegin(), m_items.cend(), pushSelf.tag()))
4953
return false;
5054

51-
std::function<bool(size_t, size_t)> comparator = [&](size_t _i, size_t _j)
55+
if (!m_eofVersion.has_value())
56+
{
57+
if (std::count(m_items.cbegin(), m_items.cend(), pushSelf.pushTag()))
58+
return false;
59+
}
60+
else
5261
{
53-
if (_i == _j)
62+
if (
63+
std::count(m_items.cbegin(), m_items.cend(), rjumpSelf) ||
64+
std::count(m_items.cbegin(), m_items.cend(), rjumpiSelf)
65+
)
5466
return false;
67+
}
68+
69+
std::function<bool(size_t, size_t)> comparator;
70+
71+
if (!m_eofVersion.has_value())
72+
{
73+
comparator = [&](size_t _i, size_t _j)
74+
{
75+
if (_i == _j)
76+
return false;
77+
78+
using diff_type = BlockIterator::difference_type;
79+
80+
// To compare recursive loops, we have to already unify PushTag opcodes of the
81+
// block's own tag.
82+
AssemblyItem pushFirstTag{pushSelf};
83+
AssemblyItem pushSecondTag{pushSelf};
84+
85+
if (_i < m_items.size() && m_items.at(_i).type() == Tag)
86+
pushFirstTag = m_items.at(_i).pushTag();
87+
if (_j < m_items.size() && m_items.at(_j).type() == Tag)
88+
pushSecondTag = m_items.at(_j).pushTag();
89+
90+
BlockIterator first{m_items.begin() + diff_type(_i), m_items.end(), {{pushFirstTag, pushSelf}}};
91+
BlockIterator second{m_items.begin() + diff_type(_j), m_items.end(), {{pushSecondTag, pushSelf}}};
92+
BlockIterator end{m_items.end(), m_items.end(), {}};
93+
94+
if (first != end && (*first).type() == Tag)
95+
++first;
96+
if (second != end && (*second).type() == Tag)
97+
++second;
98+
99+
return std::lexicographical_compare(first, end, second, end);
100+
};
101+
}
102+
else
103+
{
104+
comparator = [&](size_t _i, size_t _j)
105+
{
106+
if (_i == _j)
107+
return false;
55108

56-
// To compare recursive loops, we have to already unify PushTag opcodes of the
57-
// block's own tag.
58-
AssemblyItem pushFirstTag{pushSelf};
59-
AssemblyItem pushSecondTag{pushSelf};
109+
using diff_type = BlockIterator::difference_type;
60110

61-
if (_i < m_items.size() && m_items.at(_i).type() == Tag)
62-
pushFirstTag = m_items.at(_i).pushTag();
63-
if (_j < m_items.size() && m_items.at(_j).type() == Tag)
64-
pushSecondTag = m_items.at(_j).pushTag();
111+
std::map<AssemblyItem const, AssemblyItem const> replacmentMapFirst;
112+
std::map<AssemblyItem const, AssemblyItem const> replacmentMapSecond;
113+
114+
if (_i < m_items.size() && m_items.at(_i).type() == Tag)
115+
{
116+
replacmentMapFirst.emplace(AssemblyItem::relativeJumpTo(m_items.at(_i)), rjumpSelf);
117+
replacmentMapFirst.emplace(AssemblyItem::conditionalRelativeJumpTo(m_items.at(_i)), rjumpiSelf);
118+
}
119+
if (_j < m_items.size() && m_items.at(_j).type() == Tag)
120+
{
121+
replacmentMapSecond.emplace(AssemblyItem::relativeJumpTo(m_items.at(_j)), rjumpSelf);
122+
replacmentMapSecond.emplace(AssemblyItem::conditionalRelativeJumpTo(m_items.at(_j)), rjumpiSelf);
123+
}
65124

66-
using diff_type = BlockIterator::difference_type;
67-
BlockIterator first{m_items.begin() + diff_type(_i), m_items.end(), &pushFirstTag, &pushSelf};
68-
BlockIterator second{m_items.begin() + diff_type(_j), m_items.end(), &pushSecondTag, &pushSelf};
69-
BlockIterator end{m_items.end(), m_items.end()};
125+
BlockIterator first{m_items.begin() + diff_type(_i), m_items.end(), std::move(replacmentMapFirst)};
126+
BlockIterator second{m_items.begin() + diff_type(_j), m_items.end(), std::move(replacmentMapSecond)};
127+
BlockIterator end{m_items.end(), m_items.end(), {}};
70128

71-
if (first != end && (*first).type() == Tag)
72-
++first;
73-
if (second != end && (*second).type() == Tag)
74-
++second;
129+
if (first != end && (*first).type() == Tag)
130+
++first;
131+
if (second != end && (*second).type() == Tag)
132+
++second;
75133

76-
return std::lexicographical_compare(first, end, second, end);
77-
};
134+
return std::lexicographical_compare(first, end, second, end);
135+
};
136+
}
78137

79138
size_t iterations = 0;
80139
for (; ; ++iterations)
81140
{
82-
//@todo this should probably be optimized.
83141
std::set<size_t, std::function<bool(size_t, size_t)>> blocksSeen(comparator);
84-
for (size_t i = 0; i < m_items.size(); ++i)
142+
if (!m_eofVersion.has_value())
85143
{
86-
if (m_items.at(i).type() != Tag)
87-
continue;
88-
auto it = blocksSeen.find(i);
89-
if (it == blocksSeen.end())
90-
blocksSeen.insert(i);
91-
else
92-
m_replacedTags[m_items.at(i).data()] = m_items.at(*it).data();
144+
//@todo this should probably be optimized.
145+
for (size_t i = 0; i < m_items.size(); ++i)
146+
{
147+
if (m_items.at(i).type() != Tag)
148+
continue;
149+
auto it = blocksSeen.find(i);
150+
if (it == blocksSeen.end())
151+
blocksSeen.insert(i);
152+
else
153+
m_replacedTags[m_items.at(i).data()] = m_items.at(*it).data();
154+
}
155+
}
156+
else
157+
{
158+
for (size_t i = m_items.size(); i > 0; --i)
159+
{
160+
if (m_items.at(i - 1).type() != Tag)
161+
continue;
162+
auto it = blocksSeen.find(i - 1);
163+
if (it == blocksSeen.end())
164+
blocksSeen.insert(i - 1);
165+
else
166+
m_replacedTags[m_items.at(i - 1).data()] = m_items.at(*it).data();
167+
}
93168
}
94169

95170
if (!applyTagReplacement(m_items, m_replacedTags))
@@ -131,7 +206,12 @@ BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++()
131206
{
132207
if (it == end)
133208
return *this;
134-
if (SemanticInformation::altersControlFlow(*it) && *it != AssemblyItem{Instruction::JUMPI} && it->type() != ConditionalRelativeJump)
209+
if (
210+
SemanticInformation::altersControlFlow(*it) &&
211+
*it != AssemblyItem{Instruction::JUMPI} &&
212+
it->type() != ConditionalRelativeJump &&
213+
it->type() != CallF
214+
)
135215
it = end;
136216
else
137217
{
@@ -144,8 +224,9 @@ BlockDeduplicator::BlockIterator& BlockDeduplicator::BlockIterator::operator++()
144224

145225
AssemblyItem const& BlockDeduplicator::BlockIterator::operator*() const
146226
{
147-
if (replaceItem && replaceWith && *it == *replaceItem)
148-
return *replaceWith;
149-
else
150-
return *it;
227+
auto const rmIt = m_replaceMap.find(*it);
228+
229+
if (rmIt != m_replaceMap.end())
230+
return rmIt->second;
231+
return *it;
151232
}

Diff for: libevmasm/BlockDeduplicator.h

+6-6
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ using AssemblyItems = std::vector<AssemblyItem>;
4646
class BlockDeduplicator
4747
{
4848
public:
49-
explicit BlockDeduplicator(AssemblyItems& _items): m_items(_items) {}
49+
explicit BlockDeduplicator(AssemblyItems& _items, std::optional<uint8_t> _eofVersion):
50+
m_items(_items), m_eofVersion(_eofVersion) {}
5051
/// @returns true if something was changed
5152
bool deduplicate();
5253
/// @returns the tags that were replaced.
@@ -77,22 +78,21 @@ class BlockDeduplicator
7778
BlockIterator(
7879
AssemblyItems::const_iterator _it,
7980
AssemblyItems::const_iterator _end,
80-
AssemblyItem const* _replaceItem = nullptr,
81-
AssemblyItem const* _replaceWith = nullptr
81+
std::map<AssemblyItem const, AssemblyItem const>&& replaceMap
8282
):
83-
it(_it), end(_end), replaceItem(_replaceItem), replaceWith(_replaceWith) {}
83+
it(_it), end(_end), m_replaceMap(replaceMap) {}
8484
BlockIterator& operator++();
8585
bool operator==(BlockIterator const& _other) const { return it == _other.it; }
8686
bool operator!=(BlockIterator const& _other) const { return it != _other.it; }
8787
AssemblyItem const& operator*() const;
8888
AssemblyItems::const_iterator it;
8989
AssemblyItems::const_iterator end;
90-
AssemblyItem const* replaceItem;
91-
AssemblyItem const* replaceWith;
90+
std::map<AssemblyItem const, AssemblyItem const> m_replaceMap;
9291
};
9392

9493
std::map<u256, u256> m_replacedTags;
9594
AssemblyItems& m_items;
95+
std::optional<uint8_t> m_eofVersion;
9696
};
9797

9898
}

0 commit comments

Comments
 (0)