From c4de499db06c6343cd741d27f4ed4d6d878aad46 Mon Sep 17 00:00:00 2001
From: Alexis Roda <alexis.roda.villalonga@gmail.com>
Date: Fri, 26 Jul 2019 21:38:40 +0200
Subject: Optimize table lookup.

Instead of a flat table of regexes this implementation groups regexes
by mnemonic, making lookup faster.

The implementation is encaptulated in its own class "Parser".
---
 tests.py    |  42 +++++++++++++------
 z80count.py | 134 ++++++++++++++++++++++++++++++++++++------------------------
 2 files changed, 110 insertions(+), 66 deletions(-)

diff --git a/tests.py b/tests.py
index c6da92e..b235ba0 100644
--- a/tests.py
+++ b/tests.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python3
 
-from z80count import init_table
-from z80count import lookup
+import pytest
+
+from z80count import Parser
 
 
 data = (
@@ -905,17 +906,32 @@ data = (
 )
 
 
-def runtests():
-    table = init_table()
+@pytest.fixture(scope="module")
+def parser_table():
+    yield Parser()
+
+
+@pytest.mark.parametrize("instruction,cycles", data)
+def test_lookup(instruction, cycles, parser_table):
+    entry = parser_table.lookup(instruction)
+    assert entry is not None, "Not found: {}".format(instruction)
+    assert entry["cycles"] == cycles, "Failed: {} expected '{}' != found '{}'".format(instruction, cycles, entry["cycles"])
+
 
-    for instruction, cycles in data:
-        entry = lookup(instruction, table)
-        if entry is None:
-            print("Not found: {}".format(instruction))
-            continue
-        if entry["cycles"] != cycles:
-            print("Failed: {} expected '{}' != found '{}'".format(instruction, cycles, entry["cycles"]))
+@pytest.mark.parametrize("line,operator", (
+    ("foo: LD A, 1 ; load accumulator", "LD"),
+    ("foo: CALL 0xABCD", "CALL"),
+    ("foo: EI", "EI"),
+    ("LD A, 1 ; load accumulator", "LD"),
+    ("CALL 0xABCE", "CALL"),
+    ("EI", "EI"),
+    ("foo: ; some label", None),
+    ("foo:", None),
+    ("; some comment", None),
+))
+def test_extract_mnemonic(line, operator):
+    assert Parser._extract_mnemonic(line) == operator
 
 
-if __name__ == "__main__":
-    runtests()
+def test_extract_mnemonic_normalizes_operator():
+    assert Parser._extract_mnemonic("call 0xabcd") == "CALL"
diff --git a/z80count.py b/z80count.py
index 8b7fdb6..54929dd 100755
--- a/z80count.py
+++ b/z80count.py
@@ -32,57 +32,45 @@ from os import path
 OUR_COMMENT = re.compile(r"(\[[0-9.\s/]+\])")
 
 
-def z80count(line, table, total, total_cond, subt, update, tabstop=2, debug=False):
+def z80count(line, parser, total, total_cond, subt, update, tabstop=2, debug=False):
     out = line.rstrip() + "\n"
-    for entry in table:
-        if entry["cregex"].search(line):
-            cycles = entry["cycles"]
-            if "/" in cycles:
-                c = cycles.split("/")
-                total += int(c[1])
-                total_cond += total + int(c[0])
+    entry = parser.lookup(line)
+    if entry:
+        cycles = entry["cycles"]
+        if "/" in cycles:
+            c = cycles.split("/")
+            total += int(c[1])
+            total_cond += total + int(c[0])
+        else:
+            total += int(cycles)
+            total_cond = 0
+
+        line = line.rstrip().rsplit(";", 1)
+        comment = "; [%s" % cycles
+        if subt:
+            if total_cond:
+                comment += " .. %d/%d]" % (total_cond, total)
             else:
-                total += int(cycles)
-                total_cond = 0
-
-            line = line.rstrip().rsplit(";", 1)
-            comment = "; [%s" % cycles
-            if subt:
-                if total_cond:
-                    comment += " .. %d/%d]" % (total_cond, total)
-                else:
-                    comment += " .. %d]" % total
-            else:
-                comment += "]"
-            if debug:
-                comment += " case{%s}" % entry["case"]
-
-            if len(line) == 1:
-                comment = "\t" * tabstop + comment
-            out = line[0] + comment
-            if len(line) > 1:
-                if update:
-                    m = OUR_COMMENT.search(line[1])
-                    if m:
-                        line[1] = line[1].replace(m.group(0), "")
-                out += " "
-                out += line[1].lstrip()
-            out += "\n"
-            found = True
-            break
+                comment += " .. %d]" % total
+        else:
+            comment += "]"
+        if debug:
+            comment += " case{%s}" % entry["case"]
+
+        if len(line) == 1:
+            comment = "\t" * tabstop + comment
+        out = line[0] + comment
+        if len(line) > 1:
+            if update:
+                m = OUR_COMMENT.search(line[1])
+                if m:
+                    line[1] = line[1].replace(m.group(0), "")
+            out += " "
+            out += line[1].lstrip()
+        out += "\n"
 
     return (out, total, total_cond)
 
-def init_table(table_file="z80table.json"):
-    table_file = path.join(
-        path.dirname(path.realpath(__file__)), table_file)
-    with open(table_file, "rt") as fd:
-        table = json.load(fd)
-
-    for i in table:
-        i["cregex"] = re.compile(r"^\s*" + i["regex"] + r"\s*(;.*)?$", re.I)
-
-    return sorted(table, key=lambda o: o["w"])
 
 def parse_command_line():
     parser = argparse.ArgumentParser(
@@ -108,22 +96,62 @@ def parse_command_line():
     return parser.parse_args()
 
 
-def lookup(line, table):
-    for entry in table:
-        if entry["cregex"].search(line):
-            return entry
-    return None
+class Parser(object):
+    """Simple parser based on a table of regexes.
+
+    """
+
+    # [label:] OPERATOR [OPERANDS] [; comment]
+    _LINE_RE = re.compile(r"^([\w]+:)?\s*(?P<operator>\w+)(\s+.*)?$")
+
+    def __init__(self):
+        self._table = self._load_table()
+
+    def lookup(self, line):
+        mnemo = self._extract_mnemonic(line)
+        if mnemo is None or mnemo not in self._table:
+            return None
+        for entry in self._table[mnemo]:
+            if entry["cregex"].search(line):
+                return entry
+        return None
+
+    @classmethod
+    def _load_table(cls):
+        table_file = path.join(path.dirname(path.realpath(__file__)), "z80table.json")
+        with open(table_file, "rt") as fd:
+            table = json.load(fd)
+
+        for i in table:
+            i["cregex"] = re.compile(r"^\s*" + i["regex"] + r"\s*(;.*)?$", re.I)
+
+        table.sort(key=lambda o: o["w"])
+        res = {}
+        for i in table:
+            mnemo = cls._extract_mnemonic(i["case"])
+            assert mnemo is not None
+            if mnemo not in res:
+                res[mnemo] = []
+            res[mnemo].append(i)
+        return res
+
+    @classmethod
+    def _extract_mnemonic(cls, line):
+        match = cls._LINE_RE.match(line)
+        if match:
+            return match.group("operator").upper()
+        return None
 
 
 def main():
     args = parse_command_line()
     in_f = args.infile
     out_f = args.outfile
-    table = init_table()
+    parser = Parser()
     total = total_cond = 0
     for line in in_f:
         output, total, total_cond = z80count(
-            line, table, total, total_cond, args.subt, args.update, args.tabstop, args.debug)
+            line, parser, total, total_cond, args.subt, args.update, args.tabstop, args.debug)
         out_f.write(output)
 
 
-- 
cgit v1.2.3