micropython · ThomasWaldmann · Oct 4, 2021 · Sep 28, 2021 · Sep 28, 2021 · Sep 28, 2021
diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py
@@ -2,6 +2,7 @@
 ESP32 ULP Co-Processor Assembler
 """
 
+import re
 from . import opcodes
 from .nocomment import remove_comments as do_remove_comments
 from .util import garbage_collect
@@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None):
         self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
         opcodes.symbols = self.symbols  # XXX dirty hack
 
+        # regex for parsing assembly lines
+        # format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
+        # where [] means optional
+        # initialised here once, instead of compiling once per line
+        self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$')
+
     def init(self, a_pass):
         self.a_pass = a_pass
         self.sections = dict(text=[], data=[])
@@ -108,25 +115,14 @@ def parse_line(self, line):
         """
         if not line:
             return
-        has_label = line[0] not in '\t .'
-        if has_label:
-            label_line = line.split(None, 1)
-            if len(label_line) == 2:
-                label, line = label_line
-            else:  # 1
-                label, line = label_line[0], None
-            label = label.rstrip(':')
-        else:
-            label, line = None, line.lstrip()
-        if line is None:
-            opcode, args = None, ()
-        else:
-            opcode_args = line.split(None, 1)
-            if len(opcode_args) == 2:
-                opcode, args = opcode_args
-                args = tuple(arg.strip() for arg in args.split(','))
-            else:  # 1
-                opcode, args = opcode_args[0], ()
+
+        matches = self.line_regex.match(line)
+        label, opcode, args = matches.group(2), matches.group(4), matches.group(5)
+
+        label = label if label else None  # force empty strings to None
+        opcode = opcode if opcode else None  # force empty strings to None
+        args = tuple(arg.strip() for arg in args.split(',')) if args else ()
+
         return label, opcode, args
 
     def split_statements(self, lines):

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
@@ -342,9 +342,9 @@ def get_rel(arg):
     if arg.type == IMM:
         if arg.value & 3 != 0:  # bitwise version of: arg.value % 4 != 0
             raise ValueError('Relative offset must be a multiple of 4')
-        return arg.value >> 2  # bitwise version of: arg.value // 4
+        return IMM, arg.value >> 2  # bitwise version of: arg.value // 4
     if arg.type == SYM:
-        return symbols.resolve_relative(arg.value)
+        return SYM, symbols.resolve_relative(arg.value)
     raise TypeError('wanted: immediate, got: %s' % arg.raw)
 
 
@@ -449,7 +449,7 @@ def i_tsens(reg_dest, delay):
     return _tsens.all
 
 
-def i_adc(reg_dest, adc_idx, mux):
+def i_adc(reg_dest, adc_idx, mux, _not_used=None):
     _adc.dreg = get_reg(reg_dest)
     _adc.mux = get_imm(mux)
     _adc.sar_sel = get_imm(adc_idx)
@@ -619,7 +619,8 @@ def i_jump(target, condition='--'):
         raise ValueError("invalid flags condition")
     if target.type == IMM or target.type == SYM:
         _bx.dreg = 0
-        _bx.addr = get_abs(target)
+        # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
+        _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
         _bx.unused = 0
         _bx.reg = 0
         _bx.type = jump_type
@@ -652,7 +653,7 @@ def _jump_relr(threshold, cond, offset):
 
 
 def i_jumpr(offset, threshold, condition):
-    offset = get_rel(offset)
+    offset_type, offset = get_rel(offset)
     threshold = get_imm(threshold)
     condition = get_cond(condition)
     if condition == 'lt':
@@ -669,7 +670,11 @@ def i_jumpr(offset, threshold, condition):
         # jump over next JUMPR
         skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
         # jump to target
-        offset -= 1  # adjust for the additional JUMPR instruction
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPR instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
         jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
         return (skip_ins, jump_ins)
     else:
@@ -691,7 +696,7 @@ def _jump_rels(threshold, cond, offset):
 
 
 def i_jumps(offset, threshold, condition):
-    offset = get_rel(offset)
+    offset_type, offset = get_rel(offset)
     threshold = get_imm(threshold)
     condition = get_cond(condition)
     if condition == 'lt':
@@ -711,7 +716,11 @@ def i_jumps(offset, threshold, condition):
         # jump over next JUMPS
         skip_ins = _jump_rels(threshold, skip_cond, 2)
         # jump to target
-        offset -= 1  # adjust for the additional JUMPS instruction
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPS instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
         jump_ins = _jump_rels(threshold, jump_cond, offset)
 
         return (skip_ins, jump_ins)

diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh
@@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/g
     test_name="${src_name##*/}"
 
     # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix)
-    for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do
+    for I in esp32ulp_jumpr esp32ulp_ranges; do
         if [ "${test_name}" = "$I" ]; then
             # these are old bugs, and not related to the RTC macro handling functionality
             # they will still be great to fix over time

diff --git a/tests/assemble.py b/tests/assemble.py
@@ -53,6 +53,32 @@ def test_parse_line():
     assert a.parse_line(next(lines)) == (None, '.data', ())  # test left-aligned directive is not treated as label
 
 
+def test_parse_labels_correctly():
+    """
+    description of what defines a label
+    https://sourceware.org/binutils/docs/as/Statements.html
+    https://sourceware.org/binutils/docs/as/Labels.html
+    """
+    a = Assembler()
+    assert a.parse_line('') is None
+    assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
+    assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
+    assert a.parse_line('label:') == ('label', None, ())
+    assert a.parse_line('    label:') == ('label', None, ())
+    assert a.parse_line('    label:  ') == ('label', None, ())
+    assert a.parse_line('nop  ') == (None, 'nop', ())
+    assert a.parse_line('.set c, 1  ') == (None, '.set', ('c', '1',))
+    assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',))  # no whitespace between label and colon
+    assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
+    assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',))  # colon in string
+    assert a.parse_line('label::') == ('label', ':', ())
+    assert a.parse_line('label: :') == ('label', ':', ())
+    assert a.parse_line('a_label:') == ('a_label', None, ())
+    assert a.parse_line('$label:') == ('$label', None, ())
+    assert a.parse_line('.label:') == ('.label', None, ())
+    assert a.parse_line('&label:') == (None, '&label:', ())  # & not a valid char in a label
+
+
 def test_parse():
     a = Assembler()
     lines = remove_comments(src)
@@ -260,6 +286,7 @@ def test_support_multiple_statements_per_line():
 
 
 test_parse_line()
+test_parse_labels_correctly()
 test_parse()
 test_assemble()
 test_assemble_bss()

diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S
@@ -28,4 +28,9 @@ entry:
   # interpret ; as statement separator - this results in 2 NOP machine instructions
   nop; nop;
 
+  # adc supports an undocumented 4th argument, which should be entirely ignored
+  # binutils-esp32ulp also ignores this argument, if present, see:
+  # https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810
+  adc r1, 0, 1, 100
+
   halt
diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S
@@ -5,6 +5,12 @@
 entry:
   nop
 
+  # simple jumps
+  jump entry
+  jump later
+  jump 0x120, EQ
+  jump -288, EQ
+
   # jumps with labels
   jumps entry, 42, lt
   jumps entry, 42, lt
@@ -20,12 +26,15 @@ entry:
 
   # jumps with immediate offset (specified in bytes, but real instruction uses words)
   jumps 0, 42, lt
+  jumps 0, 42, eq  # dual-instruction condition
 
   jumps 4, 42, lt
+  jumps 4, 42, eq  # dual-instruction condition
   jumps 8, 42, lt
   jumps 32, 42, lt
 
   jumps -4, 42, lt
+  jumps -4, 42, eq  # dual-instruction condition
   jumps -8, 42, lt
   jumps -32, 42, lt
 
@@ -46,12 +55,15 @@ entry:
 
   # jumpr with immediate offset (specified in bytes, but real instruction uses words)
   jumpr 0, 42, lt
+  jumpr 0, 42, eq  # dual-instruction condition
 
   jumpr 4, 42, lt
+  jumpr 4, 42, eq  # dual-instruction condition
   jumpr 8, 42, lt
   jumpr 32, 42, lt
 
   jumpr -4, 42, lt
+  jumpr -4, 42, eq  # dual-instruction condition
   jumpr -8, 42, lt
   jumpr -32, 42, lt