Skip to content

Commit 1fd7f7d

Browse files
committed
Support human-readable assembly in EVM assembly test case
1 parent f928c71 commit 1fd7f7d

13 files changed

+493
-5
lines changed

Diff for: test/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ set(libevmasm_sources
5454
libevmasm/EVMAssemblyTest.cpp
5555
libevmasm/EVMAssemblyTest.h
5656
libevmasm/Optimiser.cpp
57+
libevmasm/PlainAssemblyParser.cpp
58+
libevmasm/PlainAssemblyParser.h
5759
)
5860
detect_stray_source_files("${libevmasm_sources}" "libevmasm/")
5961

Diff for: test/TestCase.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ bool TestCase::isTestFilename(boost::filesystem::path const& _filename)
5252
{
5353
std::string extension = _filename.extension().string();
5454
// NOTE: .asmjson rather than .json because JSON files that do not represent test cases exist in some test dirs.
55-
return (extension == ".sol" || extension == ".yul" || extension == ".asmjson" || extension == ".stack") &&
55+
return (extension == ".sol" || extension == ".yul" || extension == ".asm" || extension == ".asmjson" || extension == ".stack") &&
5656
!boost::starts_with(_filename.string(), "~") &&
5757
!boost::starts_with(_filename.string(), ".");
5858
}

Diff for: test/libevmasm/EVMAssemblyTest.cpp

+26-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
#include <test/libevmasm/EVMAssemblyTest.h>
1919

20+
#include <test/libevmasm/PlainAssemblyParser.h>
21+
2022
#include <test/Common.h>
2123

2224
#include <libevmasm/Disassemble.h>
@@ -39,6 +41,7 @@ using namespace solidity::langutil;
3941
using namespace solidity::util;
4042

4143
std::vector<std::string> const EVMAssemblyTest::c_outputLabels = {
44+
"InputAssemblyJSON",
4245
"Assembly",
4346
"Bytecode",
4447
"Opcodes",
@@ -56,8 +59,12 @@ EVMAssemblyTest::EVMAssemblyTest(std::string const& _filename):
5659
m_source = m_reader.source();
5760
m_expectation = m_reader.simpleExpectations();
5861

59-
if (!boost::algorithm::ends_with(_filename, ".asmjson"))
60-
BOOST_THROW_EXCEPTION(std::runtime_error("Not an assembly test: \"" + _filename + "\". Allowed extensions: .asmjson."));
62+
if (boost::algorithm::ends_with(_filename, ".asmjson"))
63+
m_assemblyFormat = AssemblyFormat::JSON;
64+
else if (boost::algorithm::ends_with(_filename, ".asm"))
65+
m_assemblyFormat = AssemblyFormat::Plain;
66+
else
67+
BOOST_THROW_EXCEPTION(std::runtime_error("Not an assembly test: \"" + _filename + "\". Allowed extensions: .asm, .asmjson."));
6168

6269
m_selectedOutputs = m_reader.stringSetting("outputs", "Assembly,Bytecode,Opcodes,SourceMappings");
6370
OptimisationPreset optimizationPreset = m_reader.enumSetting<OptimisationPreset>(
@@ -101,9 +108,23 @@ TestCase::TestResult EVMAssemblyTest::run(std::ostream& _stream, std::string con
101108

102109
evmAssemblyStack.selectDebugInfo(DebugInfoSelection::AllExceptExperimental());
103110

111+
std::string assemblyJSON;
112+
switch (m_assemblyFormat)
113+
{
114+
case AssemblyFormat::JSON:
115+
assemblyJSON = m_source;
116+
break;
117+
case AssemblyFormat::Plain:
118+
assemblyJSON = jsonPrint(
119+
PlainAssemblyParser{}.parse(m_reader.fileName().filename().string(), m_source),
120+
{JsonFormat::Pretty, 4}
121+
);
122+
break;
123+
}
124+
104125
try
105126
{
106-
evmAssemblyStack.parseAndAnalyze(m_reader.fileName().filename().string(), m_source);
127+
evmAssemblyStack.parseAndAnalyze(m_reader.fileName().filename().string(), assemblyJSON);
107128
}
108129
catch (AssemblyImportException const& _exception)
109130
{
@@ -125,6 +146,8 @@ TestCase::TestResult EVMAssemblyTest::run(std::ostream& _stream, std::string con
125146
soltestAssert(evmAssemblyStack.compilationSuccessful());
126147

127148
auto const produceOutput = [&](std::string const& _output) {
149+
if (_output == "InputAssemblyJSON")
150+
return assemblyJSON;
128151
if (_output == "Assembly")
129152
return evmAssemblyStack.assemblyString({{m_reader.fileName().filename().string(), m_source}});
130153
if (_output == "Bytecode")

Diff for: test/libevmasm/EVMAssemblyTest.h

+7
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,15 @@ class EVMAssemblyTest: public frontend::test::EVMVersionRestrictedTestCase
4242
TestResult run(std::ostream& _stream, std::string const& _linePrefix = "", bool const _formatted = false) override;
4343

4444
private:
45+
enum class AssemblyFormat
46+
{
47+
JSON,
48+
Plain,
49+
};
50+
4551
static std::vector<std::string> const c_outputLabels;
4652

53+
AssemblyFormat m_assemblyFormat{};
4754
std::string m_selectedOutputs;
4855
evmasm::Assembly::OptimiserSettings m_optimizerSettings;
4956
};

Diff for: test/libevmasm/PlainAssemblyParser.cpp

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
This file is part of solidity.
3+
4+
solidity is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
solidity is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with solidity. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
18+
#include <test/libevmasm/PlainAssemblyParser.h>
19+
20+
#include <test/Common.h>
21+
#include <test/libsolidity/util/SoltestErrors.h>
22+
23+
#include <libevmasm/Instruction.h>
24+
25+
#include <liblangutil/Common.h>
26+
27+
#include <boost/algorithm/string/find.hpp>
28+
29+
#include <fmt/format.h>
30+
31+
#include <sstream>
32+
33+
using namespace std::string_literals;
34+
using namespace solidity;
35+
using namespace solidity::test;
36+
using namespace solidity::evmasm;
37+
using namespace solidity::evmasm::test;
38+
using namespace solidity::langutil;
39+
40+
Json PlainAssemblyParser::parse(std::string _sourceName, std::string const& _source)
41+
{
42+
m_sourceName = std::move(_sourceName);
43+
Json codeJSON = Json::array();
44+
std::istringstream sourceStream(_source);
45+
while (getline(sourceStream, m_line))
46+
{
47+
advanceLine(m_line);
48+
if (m_lineTokens.empty())
49+
continue;
50+
51+
if (c_instructions.contains(currentToken().value))
52+
{
53+
expectNoMoreArguments();
54+
codeJSON.push_back({{"name", currentToken().value}});
55+
}
56+
else if (currentToken().value == "PUSH")
57+
{
58+
if (hasMoreTokens() && nextToken().value == "[tag]")
59+
{
60+
advanceToken();
61+
std::string_view tagID = expectArgument();
62+
expectNoMoreArguments();
63+
codeJSON.push_back({{"name", "PUSH [tag]"}, {"value", tagID}});
64+
}
65+
else
66+
{
67+
std::string_view immediateArgument = expectArgument();
68+
expectNoMoreArguments();
69+
70+
if (!immediateArgument.starts_with("0x"))
71+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("The immediate argument to PUSH must be a hex number prefixed with '0x'.")));
72+
73+
immediateArgument.remove_prefix("0x"s.size());
74+
codeJSON.push_back({{"name", "PUSH"}, {"value", immediateArgument}});
75+
}
76+
}
77+
else if (currentToken().value == "tag")
78+
{
79+
std::string_view tagID = expectArgument();
80+
expectNoMoreArguments();
81+
82+
codeJSON.push_back({{"name", "tag"}, {"value", tagID}});
83+
codeJSON.push_back({{"name", "JUMPDEST"}});
84+
}
85+
else
86+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Unknown instruction.")));
87+
}
88+
return {{".code", codeJSON}};
89+
}
90+
91+
PlainAssemblyParser::Token const& PlainAssemblyParser::currentToken() const
92+
{
93+
soltestAssert(m_tokenIndex < m_lineTokens.size());
94+
return m_lineTokens[m_tokenIndex];
95+
}
96+
97+
PlainAssemblyParser::Token const& PlainAssemblyParser::nextToken() const
98+
{
99+
soltestAssert(m_tokenIndex + 1 < m_lineTokens.size());
100+
return m_lineTokens[m_tokenIndex + 1];
101+
}
102+
103+
bool PlainAssemblyParser::advanceToken()
104+
{
105+
if (!hasMoreTokens())
106+
return false;
107+
108+
++m_tokenIndex;
109+
return true;
110+
}
111+
112+
std::string_view PlainAssemblyParser::expectArgument()
113+
{
114+
bool hasArgument = advanceToken();
115+
if (!hasArgument)
116+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Missing argument(s).")));
117+
118+
return currentToken().value;
119+
}
120+
121+
void PlainAssemblyParser::expectNoMoreArguments()
122+
{
123+
bool hasArgument = advanceToken();
124+
if (hasArgument)
125+
BOOST_THROW_EXCEPTION(std::runtime_error(formatError("Too many arguments.")));
126+
}
127+
128+
void PlainAssemblyParser::advanceLine(std::string_view _line)
129+
{
130+
++m_lineNumber;
131+
m_line = _line;
132+
m_lineTokens = tokenizeLine(m_line);
133+
m_tokenIndex = 0;
134+
}
135+
136+
std::vector<PlainAssemblyParser::Token> PlainAssemblyParser::tokenizeLine(std::string_view _line)
137+
{
138+
auto const notWhiteSpace = [](char _c) { return !isWhiteSpace(_c); };
139+
140+
std::vector<Token> tokens;
141+
auto tokenLocation = boost::find_token(_line, notWhiteSpace, boost::token_compress_on);
142+
while (!tokenLocation.empty())
143+
{
144+
std::string_view value{tokenLocation.begin(), tokenLocation.end()};
145+
if (value.starts_with("//"))
146+
break;
147+
148+
tokens.push_back({
149+
.value = value,
150+
.position = static_cast<size_t>(std::distance(_line.begin(), tokenLocation.begin())),
151+
});
152+
soltestAssert(!value.empty());
153+
soltestAssert(tokens.back().position < _line.size());
154+
soltestAssert(tokens.back().position + value.size() <= _line.size());
155+
156+
std::string_view tail{tokenLocation.end(), _line.end()};
157+
tokenLocation = boost::find_token(tail, notWhiteSpace, boost::token_compress_on);
158+
}
159+
160+
return tokens;
161+
}
162+
163+
std::string PlainAssemblyParser::formatError(std::string_view _message) const
164+
{
165+
soltestAssert(currentToken().value.size() >= 1);
166+
167+
std::string lineNumberString = std::to_string(m_lineNumber);
168+
std::string padding(lineNumberString.size(), ' ');
169+
std::string underline = std::string(currentToken().position, ' ') + std::string(currentToken().value.size(), '^');
170+
return fmt::format(
171+
"Error while parsing plain assembly: {}\n"
172+
"{}--> {}\n"
173+
"{} | \n"
174+
"{} | {}\n"
175+
"{} | {}\n",
176+
_message,
177+
padding, m_sourceName,
178+
padding,
179+
m_lineNumber, m_line,
180+
padding, underline
181+
);
182+
}

Diff for: test/libevmasm/PlainAssemblyParser.h

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
This file is part of solidity.
3+
4+
solidity is free software: you can redistribute it and/or modify
5+
it under the terms of the GNU General Public License as published by
6+
the Free Software Foundation, either version 3 of the License, or
7+
(at your option) any later version.
8+
9+
solidity is distributed in the hope that it will be useful,
10+
but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License
15+
along with solidity. If not, see <http://www.gnu.org/licenses/>.
16+
*/
17+
// SPDX-License-Identifier: GPL-3.0
18+
19+
#pragma once
20+
21+
#include <libsolutil/JSON.h>
22+
23+
#include <string>
24+
#include <string_view>
25+
#include <vector>
26+
27+
namespace solidity::evmasm::test
28+
{
29+
30+
/// Parser for the plain assembly format. The format is meant to be good enough for humans to read
31+
/// while being sstraightforward to map the assembly JSON format that solc can import.
32+
///
33+
/// Syntax:
34+
/// - Every line consists of zero or more whitespace-separated tokens.
35+
/// - A token that begins with `//` starts a comment, which extends to the end of the line.
36+
/// - A non-empty line represents a single assembly item.
37+
/// - The name of the item is the first thing on the line and may consist of one or more tokens.
38+
/// - One or more arguments follow the name.
39+
///
40+
/// Supported items:
41+
/// - All instruction names.
42+
/// - PUSH <hex value>
43+
/// - PUSH [tag] <tagID>
44+
/// - tag <tagID>
45+
class PlainAssemblyParser
46+
{
47+
public:
48+
/// Parses plain assembly format and returns the equivalent assembly JSON.
49+
/// Errors are reported by throwing runtime_error.
50+
Json parse(std::string _sourceName, std::string const& _source);
51+
52+
protected:
53+
struct Token
54+
{
55+
std::string_view value; ///< Substring of m_line that represents a complete token.
56+
size_t position; ///< Position of the first character of the token within m_line.
57+
};
58+
59+
Token const& currentToken() const;
60+
Token const& nextToken() const;
61+
bool hasMoreTokens() const { return m_tokenIndex + 1 < m_lineTokens.size(); }
62+
63+
bool advanceToken();
64+
std::string_view expectArgument();
65+
void expectNoMoreArguments();
66+
void advanceLine(std::string_view _line);
67+
68+
static std::vector<Token> tokenizeLine(std::string_view _line);
69+
std::string formatError(std::string_view _message) const;
70+
71+
private:
72+
std::string m_sourceName; ///< Name of the file the source comes from.
73+
size_t m_lineNumber = 0; ///< The number of the current line within the source, 1-based.
74+
std::string m_line; ///< The current line, unparsed.
75+
std::vector<Token> m_lineTokens; ///< Decomposition of the current line into tokens (does not include comments).
76+
size_t m_tokenIndex = 0; ///< Points at a token within m_lineTokens.
77+
};
78+
79+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//
2+
//// comment
3+
// comment
4+
CALLVALUE // 0xff
5+
CALLVALUE //0xff
6+
7+
PUSH 0xff // comment // //0xff
8+
//
9+
10+
//
11+
// ====
12+
// outputs: InputAssemblyJSON,Assembly,Bytecode,Opcodes,SourceMappings
13+
// ----
14+
// InputAssemblyJSON: {
15+
// ".code": [
16+
// {
17+
// "name": "CALLVALUE"
18+
// },
19+
// {
20+
// "name": "CALLVALUE"
21+
// },
22+
// {
23+
// "name": "PUSH",
24+
// "value": "ff"
25+
// }
26+
// ]
27+
// }
28+
// Assembly:
29+
// callvalue
30+
// callvalue
31+
// 0xff
32+
// Bytecode: 343460ff
33+
// Opcodes: CALLVALUE CALLVALUE PUSH1 0xFF
34+
// SourceMappings: :::-:0;;

0 commit comments

Comments
 (0)