diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py index 9182b18..0ec11ec 100644 --- a/esp32_ulp/assemble.py +++ b/esp32_ulp/assemble.py @@ -2,6 +2,7 @@ ESP32 ULP Co-Processor Assembler """ +import re from . import opcodes from .nocomment import remove_comments as do_remove_comments from .util import garbage_collect @@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None): self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {}) opcodes.symbols = self.symbols # XXX dirty hack + # regex for parsing assembly lines + # format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]] + # where [] means optional + # initialised here once, instead of compiling once per line + self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$') + def init(self, a_pass): self.a_pass = a_pass self.sections = dict(text=[], data=[]) @@ -108,25 +115,14 @@ def parse_line(self, line): """ if not line: return - has_label = line[0] not in '\t .' - if has_label: - label_line = line.split(None, 1) - if len(label_line) == 2: - label, line = label_line - else: # 1 - label, line = label_line[0], None - label = label.rstrip(':') - else: - label, line = None, line.lstrip() - if line is None: - opcode, args = None, () - else: - opcode_args = line.split(None, 1) - if len(opcode_args) == 2: - opcode, args = opcode_args - args = tuple(arg.strip() for arg in args.split(',')) - else: # 1 - opcode, args = opcode_args[0], () + + matches = self.line_regex.match(line) + label, opcode, args = matches.group(2), matches.group(4), matches.group(5) + + label = label if label else None # force empty strings to None + opcode = opcode if opcode else None # force empty strings to None + args = tuple(arg.strip() for arg in args.split(',')) if args else () + return label, opcode, args def split_statements(self, lines): diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py index 12598ec..98d7284 100644 --- a/esp32_ulp/opcodes.py +++ b/esp32_ulp/opcodes.py @@ -342,9 +342,9 @@ def get_rel(arg): if arg.type == IMM: if arg.value & 3 != 0: # bitwise version of: arg.value % 4 != 0 raise ValueError('Relative offset must be a multiple of 4') - return arg.value >> 2 # bitwise version of: arg.value // 4 + return IMM, arg.value >> 2 # bitwise version of: arg.value // 4 if arg.type == SYM: - return symbols.resolve_relative(arg.value) + return SYM, symbols.resolve_relative(arg.value) raise TypeError('wanted: immediate, got: %s' % arg.raw) @@ -449,7 +449,7 @@ def i_tsens(reg_dest, delay): return _tsens.all -def i_adc(reg_dest, adc_idx, mux): +def i_adc(reg_dest, adc_idx, mux, _not_used=None): _adc.dreg = get_reg(reg_dest) _adc.mux = get_imm(mux) _adc.sar_sel = get_imm(adc_idx) @@ -619,7 +619,8 @@ def i_jump(target, condition='--'): raise ValueError("invalid flags condition") if target.type == IMM or target.type == SYM: _bx.dreg = 0 - _bx.addr = get_abs(target) + # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4. + _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2 # bitwise version of "// 4" _bx.unused = 0 _bx.reg = 0 _bx.type = jump_type @@ -652,7 +653,7 @@ def _jump_relr(threshold, cond, offset): def i_jumpr(offset, threshold, condition): - offset = get_rel(offset) + offset_type, offset = get_rel(offset) threshold = get_imm(threshold) condition = get_cond(condition) if condition == 'lt': @@ -669,7 +670,11 @@ def i_jumpr(offset, threshold, condition): # jump over next JUMPR skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2) # jump to target - offset -= 1 # adjust for the additional JUMPR instruction + if (offset_type == IMM and offset < 0) or offset_type == SYM: + # adjust for the additional JUMPR instruction + # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting + # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting + offset -= 1 jump_ins = _jump_relr(threshold, BRCOND_GE, offset) return (skip_ins, jump_ins) else: @@ -691,7 +696,7 @@ def _jump_rels(threshold, cond, offset): def i_jumps(offset, threshold, condition): - offset = get_rel(offset) + offset_type, offset = get_rel(offset) threshold = get_imm(threshold) condition = get_cond(condition) if condition == 'lt': @@ -711,7 +716,11 @@ def i_jumps(offset, threshold, condition): # jump over next JUMPS skip_ins = _jump_rels(threshold, skip_cond, 2) # jump to target - offset -= 1 # adjust for the additional JUMPS instruction + if (offset_type == IMM and offset < 0) or offset_type == SYM: + # adjust for the additional JUMPS instruction + # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting + # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting + offset -= 1 jump_ins = _jump_rels(threshold, jump_cond, offset) return (skip_ins, jump_ins) diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh index 2904ee6..05a3ee2 100755 --- a/tests/02_compat_rtc_tests.sh +++ b/tests/02_compat_rtc_tests.sh @@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/g test_name="${src_name##*/}" # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix) - for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do + for I in esp32ulp_jumpr esp32ulp_ranges; do if [ "${test_name}" = "$I" ]; then # these are old bugs, and not related to the RTC macro handling functionality # they will still be great to fix over time diff --git a/tests/assemble.py b/tests/assemble.py index 7cb9265..c17bbce 100644 --- a/tests/assemble.py +++ b/tests/assemble.py @@ -53,6 +53,32 @@ def test_parse_line(): assert a.parse_line(next(lines)) == (None, '.data', ()) # test left-aligned directive is not treated as label +def test_parse_labels_correctly(): + """ + description of what defines a label + https://sourceware.org/binutils/docs/as/Statements.html + https://sourceware.org/binutils/docs/as/Labels.html + """ + a = Assembler() + assert a.parse_line('') is None + assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',)) + assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',)) + assert a.parse_line('label:') == ('label', None, ()) + assert a.parse_line(' label:') == ('label', None, ()) + assert a.parse_line(' label: ') == ('label', None, ()) + assert a.parse_line('nop ') == (None, 'nop', ()) + assert a.parse_line('.set c, 1 ') == (None, '.set', ('c', '1',)) + assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',)) # no whitespace between label and colon + assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',)) + assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',)) # colon in string + assert a.parse_line('label::') == ('label', ':', ()) + assert a.parse_line('label: :') == ('label', ':', ()) + assert a.parse_line('a_label:') == ('a_label', None, ()) + assert a.parse_line('$label:') == ('$label', None, ()) + assert a.parse_line('.label:') == ('.label', None, ()) + assert a.parse_line('&label:') == (None, '&label:', ()) # & not a valid char in a label + + def test_parse(): a = Assembler() lines = remove_comments(src) @@ -260,6 +286,7 @@ def test_support_multiple_statements_per_line(): test_parse_line() +test_parse_labels_correctly() test_parse() test_assemble() test_assemble_bss() diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S index dee6092..3b33a78 100644 --- a/tests/compat/fixes.S +++ b/tests/compat/fixes.S @@ -28,4 +28,9 @@ entry: # interpret ; as statement separator - this results in 2 NOP machine instructions nop; nop; + # adc supports an undocumented 4th argument, which should be entirely ignored + # binutils-esp32ulp also ignores this argument, if present, see: + # https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810 + adc r1, 0, 1, 100 + halt diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S index eb50885..588739b 100644 --- a/tests/compat/jumps.S +++ b/tests/compat/jumps.S @@ -5,6 +5,12 @@ entry: nop + # simple jumps + jump entry + jump later + jump 0x120, EQ + jump -288, EQ + # jumps with labels jumps entry, 42, lt jumps entry, 42, lt @@ -20,12 +26,15 @@ entry: # jumps with immediate offset (specified in bytes, but real instruction uses words) jumps 0, 42, lt + jumps 0, 42, eq # dual-instruction condition jumps 4, 42, lt + jumps 4, 42, eq # dual-instruction condition jumps 8, 42, lt jumps 32, 42, lt jumps -4, 42, lt + jumps -4, 42, eq # dual-instruction condition jumps -8, 42, lt jumps -32, 42, lt @@ -46,12 +55,15 @@ entry: # jumpr with immediate offset (specified in bytes, but real instruction uses words) jumpr 0, 42, lt + jumpr 0, 42, eq # dual-instruction condition jumpr 4, 42, lt + jumpr 4, 42, eq # dual-instruction condition jumpr 8, 42, lt jumpr 32, 42, lt jumpr -4, 42, lt + jumpr -4, 42, eq # dual-instruction condition jumpr -8, 42, lt jumpr -32, 42, lt