From 1cb0db3067798000c1787a4c165a728a93ee086a Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 28 Sep 2021 08:53:05 +0300
Subject: [PATCH 1/8] fix JUMP with immediate offsets (should get converted to
 words)

Offsets specified as immediate values are given in bytes but the
underlying machine instruction needs a word offset. Thus, we need
to convert immediate values to words by dividing by 4.

This commit contributes to being able to eventually assemble the
esp32ulp_all.s test from binutils-esp32ulp. It addresses this line:
https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_all.s#L109
---
 esp32_ulp/opcodes.py | 2 +-
 tests/compat/jumps.S | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
index 12598ec..a7eee5a 100644
--- a/esp32_ulp/opcodes.py
+++ b/esp32_ulp/opcodes.py
@@ -619,7 +619,7 @@ def i_jump(target, condition='--'):
         raise ValueError("invalid flags condition")
     if target.type == IMM or target.type == SYM:
         _bx.dreg = 0
-        _bx.addr = get_abs(target)
+        _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
         _bx.unused = 0
         _bx.reg = 0
         _bx.type = jump_type
diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S
index eb50885..5d15eaa 100644
--- a/tests/compat/jumps.S
+++ b/tests/compat/jumps.S
@@ -5,6 +5,12 @@
 entry:
   nop
 
+  # simple jumps
+  jump entry
+  jump later
+  jump 0x120, EQ
+  jump -288, EQ
+
   # jumps with labels
   jumps entry, 42, lt
   jumps entry, 42, lt

From 5e8ab9238e3504bdffe7bc20f90d673945c5e034 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 28 Sep 2021 08:43:46 +0300
Subject: [PATCH 2/8] fix dual-instruction jumps with immediate values

This was a previously untested and incorrectly handled case. There
was already some offset manipulation to account for the extra
instructions for handling non-native conditions.

Because of the way symbol offsets are calculated by py-esp32-ulp,
namely that extra instructions are already included in the offsets,
the relative distance to a forward lying label is 1 instruction too
much. Or put another way, when expanding a jump into 2 instructions
the "current offset" refers to the first instruction, while it's
only the second instruction that jumps to the intended label, so
that difference must be accounted for.

Thus, for symbols we need to always subtract 1 from the offset. For
immediate values, we only subtract 1 for a backward jump.
---
 esp32_ulp/opcodes.py | 20 ++++++++++++++------
 tests/compat/jumps.S |  6 ++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
index a7eee5a..3684df5 100644
--- a/esp32_ulp/opcodes.py
+++ b/esp32_ulp/opcodes.py
@@ -342,9 +342,9 @@ def get_rel(arg):
     if arg.type == IMM:
         if arg.value & 3 != 0:  # bitwise version of: arg.value % 4 != 0
             raise ValueError('Relative offset must be a multiple of 4')
-        return arg.value >> 2  # bitwise version of: arg.value // 4
+        return IMM, arg.value >> 2  # bitwise version of: arg.value // 4
     if arg.type == SYM:
-        return symbols.resolve_relative(arg.value)
+        return SYM, symbols.resolve_relative(arg.value)
     raise TypeError('wanted: immediate, got: %s' % arg.raw)
 
 
@@ -652,7 +652,7 @@ def _jump_relr(threshold, cond, offset):
 
 
 def i_jumpr(offset, threshold, condition):
-    offset = get_rel(offset)
+    offset_type, offset = get_rel(offset)
     threshold = get_imm(threshold)
     condition = get_cond(condition)
     if condition == 'lt':
@@ -669,7 +669,11 @@ def i_jumpr(offset, threshold, condition):
         # jump over next JUMPR
         skip_ins = _jump_relr(threshold + 1, BRCOND_GE, 2)
         # jump to target
-        offset -= 1  # adjust for the additional JUMPR instruction
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPR instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
         jump_ins = _jump_relr(threshold, BRCOND_GE, offset)
         return (skip_ins, jump_ins)
     else:
@@ -691,7 +695,7 @@ def _jump_rels(threshold, cond, offset):
 
 
 def i_jumps(offset, threshold, condition):
-    offset = get_rel(offset)
+    offset_type, offset = get_rel(offset)
     threshold = get_imm(threshold)
     condition = get_cond(condition)
     if condition == 'lt':
@@ -711,7 +715,11 @@ def i_jumps(offset, threshold, condition):
         # jump over next JUMPS
         skip_ins = _jump_rels(threshold, skip_cond, 2)
         # jump to target
-        offset -= 1  # adjust for the additional JUMPS instruction
+        if (offset_type == IMM and offset < 0) or offset_type == SYM:
+            # adjust for the additional JUMPS instruction
+            # for IMM offsets, the offset is relative to the 2nd instruction, so only backwards jumps need adjusting
+            # for SYM offsets, label offsets already include the extra instruction, so both directions need adjusting
+            offset -= 1
         jump_ins = _jump_rels(threshold, jump_cond, offset)
 
         return (skip_ins, jump_ins)
diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S
index 5d15eaa..9ac1fa0 100644
--- a/tests/compat/jumps.S
+++ b/tests/compat/jumps.S
@@ -26,12 +26,15 @@ entry:
 
   # jumps with immediate offset (specified in bytes, but real instruction uses words)
   jumps 0, 42, lt
+  jumps 0, 42, eq  #dual-instruction condition
 
   jumps 4, 42, lt
+  jumps 4, 42, eq  #dual-instruction condition
   jumps 8, 42, lt
   jumps 32, 42, lt
 
   jumps -4, 42, lt
+  jumps -4, 42, eq  #dual-instruction condition
   jumps -8, 42, lt
   jumps -32, 42, lt
 
@@ -52,12 +55,15 @@ entry:
 
   # jumpr with immediate offset (specified in bytes, but real instruction uses words)
   jumpr 0, 42, lt
+  jumpr 0, 42, eq  #dual-instruction condition
 
   jumpr 4, 42, lt
+  jumpr 4, 42, eq  #dual-instruction condition
   jumpr 8, 42, lt
   jumpr 32, 42, lt
 
   jumpr -4, 42, lt
+  jumpr -4, 42, eq  #dual-instruction condition
   jumpr -8, 42, lt
   jumpr -32, 42, lt
 

From c9ff24dda41bd6b28cc4f6a3ace47a1b59e80090 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 28 Sep 2021 23:10:31 +0300
Subject: [PATCH 3/8] fix ADC instruction when used with undocumented parameter

binutil-esp32ulp accepts an undocumented 4th parameter for the ADC
instruction, even though this parameter is unused as per binutils'
current code.

This commit contributes to being able to eventually assemble the
esp32ulp_all.s test from binutils-esp32ulp. It addresses this line:
https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_all.s#L174
---
 esp32_ulp/opcodes.py | 2 +-
 tests/compat/fixes.S | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
index 3684df5..e9c3ba1 100644
--- a/esp32_ulp/opcodes.py
+++ b/esp32_ulp/opcodes.py
@@ -449,7 +449,7 @@ def i_tsens(reg_dest, delay):
     return _tsens.all
 
 
-def i_adc(reg_dest, adc_idx, mux):
+def i_adc(reg_dest, adc_idx, mux, _not_used=None):
     _adc.dreg = get_reg(reg_dest)
     _adc.mux = get_imm(mux)
     _adc.sar_sel = get_imm(adc_idx)
diff --git a/tests/compat/fixes.S b/tests/compat/fixes.S
index dee6092..3b33a78 100644
--- a/tests/compat/fixes.S
+++ b/tests/compat/fixes.S
@@ -28,4 +28,9 @@ entry:
   # interpret ; as statement separator - this results in 2 NOP machine instructions
   nop; nop;
 
+  # adc supports an undocumented 4th argument, which should be entirely ignored
+  # binutils-esp32ulp also ignores this argument, if present, see:
+  # https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/config/esp32ulp-parse.y#L810
+  adc r1, 0, 1, 100
+
   halt

From d90a0e8f83b337d65c527c327720f89c66322354 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Tue, 28 Sep 2021 23:47:30 +0300
Subject: [PATCH 4/8] improve line parsing

So far line parsing made many assumptions on the format of lines.
This commit makes line parsing more flexible and tolerable to
different formats.

The change is in how labels are identified. Now any symbol that
is first on the line (excluding whitespace) and that is followed
directly with a colon (:), will be identified as a label.

Labels allow all characters that are valid for symbol names,
including ._$.

This commit contributes to being able to eventually assemble the
esp32ulp_all.s test from binutils-esp32ulp. It addresses this line:
[esp32ulp_all.s:2](https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_all.s#L2) (no space between colon and next character)
and also this line: [esp32ulp_globals.s:92](https://github.com/espressif/binutils-esp32ulp/blob/249ec34cc2c9574a86f3f86bbb175a863f988bcf/gas/testsuite/gas/esp32ulp/esp32/esp32ulp_globals.s#L92) (label indented).
---
 esp32_ulp/assemble.py | 12 ++++++++----
 tests/assemble.py     | 25 +++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py
index 9182b18..530f382 100644
--- a/esp32_ulp/assemble.py
+++ b/esp32_ulp/assemble.py
@@ -108,17 +108,21 @@ def parse_line(self, line):
         """
         if not line:
             return
-        has_label = line[0] not in '\t .'
+        has_label = ':' in line
         if has_label:
-            label_line = line.split(None, 1)
+            orig_line = line.strip()
+            label_line = orig_line.split(':', 1)
             if len(label_line) == 2:
                 label, line = label_line
             else:  # 1
                 label, line = label_line[0], None
-            label = label.rstrip(':')
+
+            if label.strip('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_$.'):  # if any chars remain
+                # if label contains other chars than allowed, it's not a label
+                label, line = None, orig_line
         else:
             label, line = None, line.lstrip()
-        if line is None:
+        if not line:
             opcode, args = None, ()
         else:
             opcode_args = line.split(None, 1)
diff --git a/tests/assemble.py b/tests/assemble.py
index 7cb9265..8f4e971 100644
--- a/tests/assemble.py
+++ b/tests/assemble.py
@@ -53,6 +53,30 @@ def test_parse_line():
     assert a.parse_line(next(lines)) == (None, '.data', ())  # test left-aligned directive is not treated as label
 
 
+def test_parse_labels_correctly():
+    """
+    description of what defines a label
+    https://sourceware.org/binutils/docs/as/Statements.html
+    https://sourceware.org/binutils/docs/as/Labels.html
+    """
+    a = Assembler()
+    assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
+    assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
+    assert a.parse_line('label:') == ('label', None, ())
+    assert a.parse_line('    label:') == ('label', None, ())
+    assert a.parse_line('    label:  ') == ('label', None, ())
+    assert a.parse_line('nop  ') == (None, 'nop', ())
+    assert a.parse_line('.set c, 1  ') == (None, '.set', ('c', '1',))
+    assert a.parse_line('invalid : nop') == (None, 'invalid', (': nop',))  # no whitespace between label and colon
+    assert a.parse_line('.string "hello world"') == (None, '.string', ('"hello world"',))
+    assert a.parse_line('.string "hello : world"') == (None, '.string', ('"hello : world"',))  # colon in string
+    assert a.parse_line('label::') == ('label', ':', ())
+    assert a.parse_line('label: :') == ('label', ':', ())
+    assert a.parse_line('a_label:') == ('a_label', None, ())
+    assert a.parse_line('$label:') == ('$label', None, ())
+    assert a.parse_line('.label:') == ('.label', None, ())
+
+
 def test_parse():
     a = Assembler()
     lines = remove_comments(src)
@@ -260,6 +284,7 @@ def test_support_multiple_statements_per_line():
 
 
 test_parse_line()
+test_parse_labels_correctly()
 test_parse()
 test_assemble()
 test_assemble_bss()

From ff7db422acc83314dae19c6d7bf4ae4067ca3361 Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Wed, 29 Sep 2021 21:13:17 +0300
Subject: [PATCH 5/8] enable more skipped compat tests

The last few commits/fixes allow these tests to now pass so we no
longer need to skip them.
---
 tests/02_compat_rtc_tests.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/02_compat_rtc_tests.sh b/tests/02_compat_rtc_tests.sh
index 2904ee6..05a3ee2 100755
--- a/tests/02_compat_rtc_tests.sh
+++ b/tests/02_compat_rtc_tests.sh
@@ -66,7 +66,7 @@ for src_file in ulptool/src/ulp_examples/*/*.s binutils-esp32ulp/gas/testsuite/g
     test_name="${src_name##*/}"
 
     # for now, skip files that contain known bugs in esp32_ulp (essentially a todo list of what to fix)
-    for I in esp32ulp_all esp32ulp_globals esp32ulp_jumpr esp32ulp_ranges test_reg; do
+    for I in esp32ulp_jumpr esp32ulp_ranges; do
         if [ "${test_name}" = "$I" ]; then
             # these are old bugs, and not related to the RTC macro handling functionality
             # they will still be great to fix over time

From 6fa631a026bf79d2a51d29ccf2b44183f9e58bcf Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 3 Oct 2021 13:39:36 +0300
Subject: [PATCH 6/8] fix comments syntax

---
 tests/compat/jumps.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/compat/jumps.S b/tests/compat/jumps.S
index 9ac1fa0..588739b 100644
--- a/tests/compat/jumps.S
+++ b/tests/compat/jumps.S
@@ -26,15 +26,15 @@ entry:
 
   # jumps with immediate offset (specified in bytes, but real instruction uses words)
   jumps 0, 42, lt
-  jumps 0, 42, eq  #dual-instruction condition
+  jumps 0, 42, eq  # dual-instruction condition
 
   jumps 4, 42, lt
-  jumps 4, 42, eq  #dual-instruction condition
+  jumps 4, 42, eq  # dual-instruction condition
   jumps 8, 42, lt
   jumps 32, 42, lt
 
   jumps -4, 42, lt
-  jumps -4, 42, eq  #dual-instruction condition
+  jumps -4, 42, eq  # dual-instruction condition
   jumps -8, 42, lt
   jumps -32, 42, lt
 
@@ -55,15 +55,15 @@ entry:
 
   # jumpr with immediate offset (specified in bytes, but real instruction uses words)
   jumpr 0, 42, lt
-  jumpr 0, 42, eq  #dual-instruction condition
+  jumpr 0, 42, eq  # dual-instruction condition
 
   jumpr 4, 42, lt
-  jumpr 4, 42, eq  #dual-instruction condition
+  jumpr 4, 42, eq  # dual-instruction condition
   jumpr 8, 42, lt
   jumpr 32, 42, lt
 
   jumpr -4, 42, lt
-  jumpr -4, 42, eq  #dual-instruction condition
+  jumpr -4, 42, eq  # dual-instruction condition
   jumpr -8, 42, lt
   jumpr -32, 42, lt
 

From b3ae44272a32d1837b82f8db468772d5395153ae Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 3 Oct 2021 13:43:05 +0300
Subject: [PATCH 7/8] improve line parsing, using regex to extract labels

Using a regex to parse each line makes the code easier to read
and reason about. It uses capture groups to extract the exact
substrings we need, removing the need for extra whitespace
trimming and string splitting.

Benchmarking showed that it even performs slightly better on
the ESP32, while raising the committed memory by only 64 extra
bytes of memory compared with the previous algorithm
(measured after garbage collect).
---
 esp32_ulp/assemble.py | 38 +++++++++++++++-----------------------
 tests/assemble.py     |  2 ++
 2 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/esp32_ulp/assemble.py b/esp32_ulp/assemble.py
index 530f382..0ec11ec 100644
--- a/esp32_ulp/assemble.py
+++ b/esp32_ulp/assemble.py
@@ -2,6 +2,7 @@
 ESP32 ULP Co-Processor Assembler
 """
 
+import re
 from . import opcodes
 from .nocomment import remove_comments as do_remove_comments
 from .util import garbage_collect
@@ -91,6 +92,12 @@ def __init__(self, symbols=None, bases=None, globals=None):
         self.symbols = SymbolTable(symbols or {}, bases or {}, globals or {})
         opcodes.symbols = self.symbols  # XXX dirty hack
 
+        # regex for parsing assembly lines
+        # format: [[whitespace]label:][whitespace][opcode[whitespace arg[,arg...]]]
+        # where [] means optional
+        # initialised here once, instead of compiling once per line
+        self.line_regex = re.compile(r'^(\s*([a-zA-Z0-9_$.]+):)?\s*((\S*)\s*(.*))$')
+
     def init(self, a_pass):
         self.a_pass = a_pass
         self.sections = dict(text=[], data=[])
@@ -108,29 +115,14 @@ def parse_line(self, line):
         """
         if not line:
             return
-        has_label = ':' in line
-        if has_label:
-            orig_line = line.strip()
-            label_line = orig_line.split(':', 1)
-            if len(label_line) == 2:
-                label, line = label_line
-            else:  # 1
-                label, line = label_line[0], None
-
-            if label.strip('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_$.'):  # if any chars remain
-                # if label contains other chars than allowed, it's not a label
-                label, line = None, orig_line
-        else:
-            label, line = None, line.lstrip()
-        if not line:
-            opcode, args = None, ()
-        else:
-            opcode_args = line.split(None, 1)
-            if len(opcode_args) == 2:
-                opcode, args = opcode_args
-                args = tuple(arg.strip() for arg in args.split(','))
-            else:  # 1
-                opcode, args = opcode_args[0], ()
+
+        matches = self.line_regex.match(line)
+        label, opcode, args = matches.group(2), matches.group(4), matches.group(5)
+
+        label = label if label else None  # force empty strings to None
+        opcode = opcode if opcode else None  # force empty strings to None
+        args = tuple(arg.strip() for arg in args.split(',')) if args else ()
+
         return label, opcode, args
 
     def split_statements(self, lines):
diff --git a/tests/assemble.py b/tests/assemble.py
index 8f4e971..c17bbce 100644
--- a/tests/assemble.py
+++ b/tests/assemble.py
@@ -60,6 +60,7 @@ def test_parse_labels_correctly():
     https://sourceware.org/binutils/docs/as/Labels.html
     """
     a = Assembler()
+    assert a.parse_line('') is None
     assert a.parse_line('label: .set const, 42') == ('label', '.set', ('const', '42',))
     assert a.parse_line('label:.set const, 42') == ('label', '.set', ('const', '42',))
     assert a.parse_line('label:') == ('label', None, ())
@@ -75,6 +76,7 @@ def test_parse_labels_correctly():
     assert a.parse_line('a_label:') == ('a_label', None, ())
     assert a.parse_line('$label:') == ('$label', None, ())
     assert a.parse_line('.label:') == ('.label', None, ())
+    assert a.parse_line('&label:') == (None, '&label:', ())  # & not a valid char in a label
 
 
 def test_parse():

From 78e9812f52bf1894853da718fdf835c69268d28e Mon Sep 17 00:00:00 2001
From: Wilko Nienhaus <wilko.nienhaus@gmail.com>
Date: Sun, 3 Oct 2021 15:13:56 +0300
Subject: [PATCH 8/8] Add comment about dividing by 4

The comment clarifies that because we track label addresses in
32bit words, but that immediate values are in bytes, the code
needs to divide immediate values by 4, while leaving symbols
(label) addresses as-is.
---
 esp32_ulp/opcodes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/esp32_ulp/opcodes.py b/esp32_ulp/opcodes.py
index e9c3ba1..98d7284 100644
--- a/esp32_ulp/opcodes.py
+++ b/esp32_ulp/opcodes.py
@@ -619,6 +619,7 @@ def i_jump(target, condition='--'):
         raise ValueError("invalid flags condition")
     if target.type == IMM or target.type == SYM:
         _bx.dreg = 0
+        # we track label addresses in 32bit words, but immediate values are in bytes and need to get divided by 4.
         _bx.addr = get_abs(target) if target.type == SYM else get_abs(target) >> 2  # bitwise version of "// 4"
         _bx.unused = 0
         _bx.reg = 0