Skip to content

More fixes towards v1.0 #52

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 15 additions & 19 deletions esp32_ulp/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
ESP32 ULP Co-Processor Assembler
"""

import re
from . import opcodes
from .nocomment import remove_comments as do_remove_comments
from .util import garbage_collect
Expand Down Expand Up @@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None):
self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
opcodes.symbols = self.symbols # XXX dirty hack

# regex for parsing assembly lines
# format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
# where [] means optional
# initialised here once, instead of compiling once per line
self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$')

def init(self, a_pass):
self.a_pass = a_pass
self.sections = dict(text=[], data=[])
Expand All @@ -108,25 +115,14 @@ def parse_line(self, line):
"""
if not line:
return
has_label = line[0] not in '\t .'
if has_label:
label_line = line.split(None, 1)
if len(label_line) == 2:
label, line = label_line
else: # 1
label, line = label_line[0], None
label = label.rstrip(':')
else:
label, line = None, line.lstrip()
if line is None:
opcode, args = None, ()
else:
opcode_args = line.split(None, 1)
if len(opcode_args) == 2:
opcode, args = opcode_args
args = tuple(arg.strip() for arg in args.split(','))
else: # 1
opcode, args = opcode_args[0], ()

matches = self.line_regex.match(line)
label, opcode, args = matches.group(2), matches.group(4), matches.group(5)

label = label if label else None # force empty strings to None
opcode = opcode if opcode else None # force empty strings to None
args = tuple(arg.strip() for arg in args.split(',')) if args else ()

return label, opcode, args

def split_statements(self, lines):
Expand Down
25 changes: 17 additions & 8 deletions esp32_ulp/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,9 @@ def get_rel(arg):
if arg.type == IMM:
if arg.value & 3 != 0: # bitwise version of: arg.value % 4 != 0
raise ValueError('Relative offset must be a multiple of 4')
return arg.value >> 2 # bitwise version of: arg.value // 4
return IMM, arg.value >> 2 # bitwise version of: arg.value // 4
if arg.type == SYM:
return symbols.resolve_relative(arg.value)
return SYM, symbols.resolve_relative(arg.value)
raise TypeError('wanted: immediate, got: %s' % arg.raw)


Expand Down Expand Up @@ -449,7 +449,7 @@ def i_tsens(reg_dest, delay):
return _tsens.all


def i_adc(reg_dest, adc_idx, mux):
def i_adc(reg_dest, adc_idx, mux, _not_used=None):
_adc.dreg = get_reg(reg_dest)
_adc.mux = get_imm(mux)
_adc.sar_sel = get_imm(adc_idx)
Expand Down Expand Up @@ -619,7 +619,8 @@ def i_jump(target, condition='--'):
raise ValueError("invalid flags condition")
if target.type == IMM or target.type == SYM:
_bx.dreg = 0
_bx.addr = get_abs(target)
# we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
_bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2 # bitwise version of "// 4"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't get this.

If it is a symbol, it directly uses its value (absolute address) else it uses an immediate offset // 4?

Considering everything else in the opcode is the same, how does the cpu know what to do? like whether it is an offset or an absolute address?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In both cases what the machine instruction gets is an absolute address (not relative offset). The absolute address is either the address of a label, or an absolute address specified as immediate value.

What is different in the two cases is that the assembler instruction must take an address given in bytes, while the machine instruction needs to contain the address in words. Because in py-esp32-ulp we already track label addresses in words internally, we do not need to divide those symbol addresses by 4. But for immediate values we need to (see: https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-guides/ulp_instruction_set.html#note-about-addressing).

Does that make sense, or did I miss what you asked?

However... I did just discover one more case for the JUMP instruction, where we don't properly divide by 4. So this current logic is not 100% perfect. It appears to be that it's not only IMM values that must be divided by 4, but also symbol values that we internally label with the ABS type (as opposed to REL). Those ABS type symbols are defined by .set. So maybe the logic here should really be: Is what was given to us a relocatable thing (label) or not. If yes, use as is, if not divide by 4.

This logic is starting to look valid to me, the more I think about it. It should give the same result for the cases currently handled correctly, but also for the case of symbols defined with .set. It would requires a bit of a bigger change though, because of expression evaluation, which potentially uses multiple symbols that could have different types. So I will try and see how binutils-esp32ulp handles mixing symbols types, and likely a valid solution could be to consider the result of an expression to be a REL type if any symbols in the expression were of that type, otherwise consider the result to be an ABS type. And then use ABS and REL here rather than SYM and IMM to decide whether to divide by 4.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for explaining, please add a comment above this like:

# we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could also have ABS1 (byte addresses), ABS4 (32bit word addrs), etc. so we better know what we have.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, my last part about the REL vs ABS, I think is not relevant anymore. I believe that "one more case" I talked about above, is actually a bug in binutils-esp32ulp

I have now tested quite a bit more and stepped through binutils-esp32ulp a bit and found the following: whether the value (that was defined with .set) gets divided by 4 depends on whether the symbol is marked as "global" or not. If marked as global, then the value will be taken as-is, when not marked as global, the value will be divided by 4. I don't see how this could be intended behaviour.

The reason I started to get skeptical is that binutils-esp32ulp behaves correctly for JUMPS and JUMPR cases, as per description in the documentation: https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-guides/ulp_instruction_set.html#note-about-addressing. The documentation shows an example, where values, which come from .set'ed symbols, should not be converted from bytes to words. So the fact that the JUMP instruction then tried to divide the value of the symbol by 4 (in the case I tried to fix) seems inconsistent.

I think the issues comes up in binutils-esp32ulp, depending on where the value is actually "filled-in" into the instruction. For non-global symbols, the value is already set during the assembly stage (as), while in the global case, the symbol value is only "filled in" during the linking stage (ld) - it's 0 in the instruction before that stage. The implementation of this logic in the assembler and linker differ.

So that means, I would not change anything in our implementation (other than adding the comment you suggested).

I might file a bug report to the binutils-esp32ulp project.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, sounds strange, please file a bug.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I filed a bug report now: espressif/binutils-esp32ulp#22

_bx.unused = 0
_bx.reg = 0
_bx.type = jump_type
Expand Down Expand Up @@ -652,7 +653,7 @@ def _jump_relr(threshold, cond, offset):


def i_jumpr(offset, threshold, condition):
offset = get_rel(offset)
offset_type, offset = get_rel(offset)
threshold = get_imm(threshold)
condition = get_cond(condition)
if condition == 'lt':
Expand All @@ -669,7 +670,11 @@ def i_jumpr(offset, threshold, condition):
# jump over next JUMPR
skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
# jump to target
offset -= 1 # adjust for the additional JUMPR instruction
if (offset_type == IMM and offset < 0) or offset_type == SYM:
# adjust for the additional JUMPR instruction
# for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
# for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
offset -= 1
jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
return (skip_ins, jump_ins)
else:
Expand All @@ -691,7 +696,7 @@ def _jump_rels(threshold, cond, offset):


def i_jumps(offset, threshold, condition):
offset = get_rel(offset)
offset_type, offset = get_rel(offset)
threshold = get_imm(threshold)
condition = get_cond(condition)
if condition == 'lt':
Expand All @@ -711,7 +716,11 @@ def i_jumps(offset, threshold, condition):
# jump over next JUMPS
skip_ins = _jump_rels(threshold, skip_cond, 2)
# jump to target
offset -= 1 # adjust for the additional JUMPS instruction
if (offset_type == IMM and offset < 0) or offset_type == SYM:
# adjust for the additional JUMPS instruction
# for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
# for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
offset -= 1
jump_ins = _jump_rels(threshold, jump_cond, offset)

return (skip_ins, jump_ins)
Expand Down
2 changes: 1 addition & 1 deletion tests/02_compat_rtc_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/g
test_name="${src_name##*/}"

# for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix)
for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do
for I in esp32ulp_jumpr esp32ulp_ranges; do
if [ "${test_name}" = "$I" ]; then
# these are old bugs, and not related to the RTC macro handling functionality
# they will still be great to fix over time
Expand Down
27 changes: 27 additions & 0 deletions tests/assemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,32 @@ def test_parse_line():
assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label


def test_parse_labels_correctly():
"""
description of what defines a label
https://sourceware.org/binutils/docs/as/Statements.html
https://sourceware.org/binutils/docs/as/Labels.html
"""
a = Assembler()
assert a.parse_line('') is None
assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
assert a.parse_line('label:') == ('label', None, ())
assert a.parse_line(' label:') == ('label', None, ())
assert a.parse_line(' label: ') == ('label', None, ())
assert a.parse_line('nop ') == (None, 'nop', ())
assert a.parse_line('.set c, 1 ') == (None, '.set', ('c', '1',))
assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',)) # no whitespace between label and colon
assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',)) # colon in string
assert a.parse_line('label::') == ('label', ':', ())
assert a.parse_line('label: :') == ('label', ':', ())
assert a.parse_line('a_label:') == ('a_label', None, ())
assert a.parse_line('$label:') == ('$label', None, ())
assert a.parse_line('.label:') == ('.label', None, ())
assert a.parse_line('&label:') == (None, '&label:', ()) # & not a valid char in a label


def test_parse():
a = Assembler()
lines = remove_comments(src)
Expand Down Expand Up @@ -260,6 +286,7 @@ def test_support_multiple_statements_per_line():


test_parse_line()
test_parse_labels_correctly()
test_parse()
test_assemble()
test_assemble_bss()
Expand Down
5 changes: 5 additions & 0 deletions tests/compat/fixes.S
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,9 @@ entry:
# interpret ; as statement separator - this results in 2 NOP machine instructions
nop; nop;

# adc supports an undocumented 4th argument, which should be entirely ignored
# binutils-esp32ulp also ignores this argument, if present, see:
# https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810
adc r1, 0, 1, 100

halt
12 changes: 12 additions & 0 deletions tests/compat/jumps.S
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
entry:
nop

# simple jumps
jump entry
jump later
jump 0x120, EQ
jump -288, EQ

# jumps with labels
jumps entry, 42, lt
jumps entry, 42, lt
Expand All @@ -20,12 +26,15 @@ entry:

# jumps with immediate offset (specified in bytes, but real instruction uses words)
jumps 0, 42, lt
jumps 0, 42, eq # dual-instruction condition

jumps 4, 42, lt
jumps 4, 42, eq # dual-instruction condition
jumps 8, 42, lt
jumps 32, 42, lt

jumps -4, 42, lt
jumps -4, 42, eq # dual-instruction condition
jumps -8, 42, lt
jumps -32, 42, lt

Expand All @@ -46,12 +55,15 @@ entry:

# jumpr with immediate offset (specified in bytes, but real instruction uses words)
jumpr 0, 42, lt
jumpr 0, 42, eq # dual-instruction condition

jumpr 4, 42, lt
jumpr 4, 42, eq # dual-instruction condition
jumpr 8, 42, lt
jumpr 32, 42, lt

jumpr -4, 42, lt
jumpr -4, 42, eq # dual-instruction condition
jumpr -8, 42, lt
jumpr -32, 42, lt

Expand Down