#!/usr/bin/env python3 import argparse import dataclasses import io import os import re import sys from typing import Dict, List, Optional, TextIO, Tuple STACK = "Stack" class Er: errors = 0 @classmethod def report_error(cls, filename, line, error): sys.stderr.write(f"{filename}:{line}: {error}\n") cls.errors += 1 if cls.errors > 10: raise SystemExit("Too many errors, aborting") @classmethod def assoc_error(cls, filename, line, t): cls.report_error(filename, line, f"no asy type associated to '{t}'") # Convert parameters into stack pop code in correct order def c_params(params_list): global STACK result = [] # Reverse order for correct push/pop for p in reversed(params_list): match = re.search( r"^\s*" # optional whitespace r"(explicit)*" # optional explicit r"\s*" r"(\w*(?:\s*\*)?)" # type r"\s*" r"(\w*)" # parameter name r"(=*)([\w.+\-]*)", # optional default value p, ) if not match: continue _, type_str, name, eqsign, val = match.groups() template = "" if type_str == "item" else f"<{type_str}>" def_val = f",{val}" if eqsign else "" r = f" {type_str} {name}=vm::pop{template}({STACK}{def_val});\n" result.append(r) return result def clean_type(v): return re.sub(r"\s+", "", v) def clean_params(v: str) -> str: return v.replace("\n", "") def symbolize(name, opsymbols): # If name is alphanumeric/underscore, just SYM(...) it if re.search(r"^[A-Za-z0-9_]+$", name): return f"SYM({name})" # If the name is recognized in opsymbols, substitute if name in opsymbols: return opsymbols[name] # Check if it is "operator smth" form opm = re.search(r"operator\s+(\w+)", name) if opm and opm.group(1) in opsymbols: return opsymbols[opm.group(1)] # Otherwise, generate a trans(...) symbol return f'symbol::trans("{name}")' def asy_params(params_string, filename, line, type_map, opsymbols): # Split parameters by comma params_list = re.split(r",\s*", params_string) if params_string else [] result = [] for p in params_list: # (explicit)*\s*(\w*(?:\s*\*)?) => optional 'explicit', type, pointer # (\w*) => parameter name, possibly something like "x" # (=*) => optional equals sign for default parameter match = re.search( r"^\s*(explicit)*\s*" # optional explicit r"(\w*(?:\s*\*)?)\s*" # type or pointer type r"(\w*)" # name r"(=*)", # if '=' is present, there is a default value p, ) if not match: continue explicit, t, n, d = match.groups() t = clean_type(t) if t not in type_map: Er.assoc_error(filename, line, t) has_def = "true" if d else "false" is_ex = "true" if explicit else "false" result.append( "formal(" f"{type_map[t]}, {symbolize(n.lower(), opsymbols)}, {has_def}, {is_ex})" ) return result @dataclasses.dataclass() class RunData: runtimeBaseFile: str srcTemplateDir: str prefix: str headerOutDir: str srcOutDir: str type_map: Dict = dataclasses.field(default_factory=dict) # type: Dict[str, str] op_symbols: Dict = dataclasses.field(default_factory=dict) # type: Dict[str, str] def read_opsymbols(opsymbols_file: str) -> Dict: op_symbols: Dict = {} with open(opsymbols_file, "r", encoding="utf-8") as opf: for line in opf: # match OPSYMBOL("symname", token); m = re.search(r"^OPSYMBOL\(\"(.*)\", ([A-Za-z_]+)\);", line) if m: symname, token = m.groups() op_symbols[symname] = token return op_symbols def parse_args() -> RunData: parser = argparse.ArgumentParser() parser.add_argument("--opsym-file", dest="opsymbolsFile", required=True) parser.add_argument("--runtime-base-file", dest="runtimeBaseFile", required=True) parser.add_argument("--src-template-dir", dest="srcTemplateDir", required=True) parser.add_argument("--prefix", dest="prefix", required=True) parser.add_argument("--header-out-dir", dest="headerOutDir", required=True) parser.add_argument("--src-out-dir", dest="srcOutDir", required=True) args = parser.parse_args() return RunData( runtimeBaseFile=args.runtimeBaseFile, srcTemplateDir=args.srcTemplateDir, prefix=args.prefix, headerOutDir=args.headerOutDir, srcOutDir=args.srcOutDir, op_symbols=read_opsymbols(args.opsymbolsFile), ) def parse_types( data: str, input_filename: str, input_start_line: int ) -> Tuple[Dict, int]: """Return (new_entries, lines_consumed).""" entries: Dict = {} lines = data.split("\n") for line_no, line in enumerate(lines, start=input_start_line): # Remove // comments in each line trimmed = re.sub(r"//.*", "", line) # Skip empty lines if re.fullmatch(r"\s*", trimmed): continue # Regex for matching something like "item => ITEMVAL" # (\w*(?:\s*\*)?) matches a type or pointer (e.g. "int", "item*", etc.) # \s*=>\s* captures the '=>' then # (.*) grabs the rest of the line as the code match = re.search( r"(\w*(?:\s*\*)?)" # Type or pointer type r"\s*=>\s*" # => r"(.*)", # Everything else trimmed, ) if not match: Er.report_error(input_filename, line_no, "bad type declaration") continue t, code = match.groups() entries[clean_type(t)] = code return entries, data.count("\n") class MatchNotFoundError(Exception): pass class FunctionData: # This regex attempts to parse a function definition in the form: # optional comments, return type, asy function name, # optional C++ function name, parameters, code _pat = re.compile( # pylint: disable=line-too-long r"^((?:\s*//[^\n]*\n)*)" # $1: capture comment lines starting with // r"\s*" r"(\w*(?:\s*\*)?)" # $2: return type (e.g., 'int', 'item*', etc.) r"\s*" r"([^(:]*)" # $3: read to the first colon or open parenthesis (asy function name) r"\:*" r"([^(]*)" # $4: read to the first open parenthesis (optional c++ function name) r"\s*" r"\(([\w\s*,=.+\-]*)\)" # $5: parameters list inside parentheses r"\s*" r"\{(.*)\}", # $6: function body up to last closing brace in section re.DOTALL, # allow . to match newlines (for $6) ) def __init__( self, section: str, prefix: str, function_count: int, header_lines: List, ): global STACK md = self._pat.search(section) if not md: raise MatchNotFoundError comments, return_type, name, cname, params, code = md.groups() # Insert a fallback cName if needed if cname: header_lines.append(f"void {cname}(vm::stack *);\n") else: cname = f"gen_{prefix}{function_count}" # Added newlines here would mess up the line count assert cname.count("\n") == 0 # Clean up types return_type = clean_type(return_type) # If there's "Operator", remove it: name = re.sub(r"Operator\s*", "", name) # Replace 'return X;' with push qualifier = "" if return_type == "item" else f"<{return_type}>" code = re.sub( r"\breturn\s+([^;]*);", # read until the next semicolon rf"{{{STACK}->push{qualifier}(\1); return;}}", code, ) self.comments: str = comments self.return_type: str = return_type self.name: str = name self.cname: str = cname self.params_string: str = params self.code: str = code def generate_addFunc( self, *, in_line_counter: int, d: RunData, ) -> str: assert self.cname if self.name: if self.return_type not in d.type_map: Er.assoc_error(f"{d.prefix}.in", in_line_counter, self.return_type) asy_param_list = asy_params( self.params_string, f"{d.prefix}.in", in_line_counter, d.type_map, d.op_symbols, ) asy_params_comma = "" if asy_param_list: joined = ", ".join(asy_param_list) asy_params_comma = f", {joined}" return ( f'#line {in_line_counter} "{d.srcTemplateDir}/{d.prefix}.in"\n' f" addFunc(ve, run::{self.cname}, {d.type_map[self.return_type]}, " f"{symbolize(self.name,d.op_symbols)}{asy_params_comma});\n" ) # builtin with no name => REGISTER_BLTIN return ( f'#line {in_line_counter} "{d.srcTemplateDir}/{d.prefix}.in"\n' f' REGISTER_BLTIN(run::{self.cname},"{self.cname}");\n' ) def write_cc(self, f_out: TextIO, d: RunData, in_line_counter: int) -> None: global STACK # Write out any preceding comments f_out.write(self.comments) in_line_counter += self.comments.count("\n") f_out.write(f'#line {in_line_counter} "{d.srcTemplateDir}/{d.prefix}.in"\n') # Split the parameter list by commas (ignoring whitespace after each comma) param_list = re.split(r",\s*", self.params_string) if self.params_string else [] # Build param popping lines param_code = "".join(c_params(param_list)) # Write out the function prototype as a comment prototype = f"{self.return_type} {self.name}({self.params_string});" in_line_counter += prototype.count("\n") + 1 if self.name: prototype = clean_params(prototype) assert prototype.count("\n") == 0 f_out.write(f"// {prototype}\n") # Actual function definition in prefix.cc param_name = STACK if self.return_type != "void" or param_list else "" f_out.write(f"void {self.cname}(stack *{param_name})\n") f_out.write("{\n") assert not param_code or param_code[-1] == "\n" f_out.write(param_code) f_out.write(f'#line {in_line_counter} "{d.srcTemplateDir}/{d.prefix}.in"') assert self.code[0] == "\n" f_out.write(self.code) f_out.write("}\n\n") @dataclasses.dataclass class SectionWriter: """Write sections while keeping track of input line numbers.""" f_out: TextIO srcdir: str prefix: str base_line_counter: int = 1 in_line_counter: int = 1 def write_base_chunk(self, chunk: str) -> None: directive = f'#line {self.base_line_counter} "{self.srcdir}/runtimebase.in"\n' self.f_out.write(directive) self.f_out.write(chunk) self.base_line_counter += chunk.count("\n") def write_in_chunk(self, chunk: str, directive: Optional[str] = None) -> None: if directive is None: directive = ( f'#line {self.in_line_counter} "{self.srcdir}/{self.prefix}.in"\n' ) self.f_out.write(directive) self.f_out.write(chunk) self.in_line_counter += chunk.count("\n") def overwrite_if_changed(filename: str, new_contents: str) -> None: try: with open(filename, "r", encoding="utf-8") as f: old_contents = f.read() if old_contents == new_contents: return except FileNotFoundError: pass with open(filename, "w", encoding="utf-8") as f: f.write(new_contents) def write_trans_namespace(f_out: TextIO, d: RunData, builtin: List) -> None: f_out.write("namespace trans {\n\n") f_out.write(f"void gen_{d.prefix}_venv(venv &ve)\n") f_out.write("{\n") f_out.write("".join(builtin)) f_out.write("}\n\n") f_out.write("} // namespace trans\n") def read_sections(filename: str) -> List: # Read *.in files split by form-feed + newline with open(filename, "rb") as f: # Convert to text with universal newline data = io.TextIOWrapper(f, encoding="utf-8", newline=None).read() # We split and merge corresponding sections delineated by form-feed sections = data.split("\f\n") sections[:-1] = [section + "\f\n" for section in sections[:-1]] # For now, imitate the behavior of the original perl code: if the last section # is empty, remove it. if sections and not sections[-1]: sections.pop() return sections def main(d: RunData) -> None: outHeaderFile = os.path.join(d.headerOutDir, f"{d.prefix}.h") outSrcFile = os.path.join(d.srcOutDir, f"{d.prefix}.cc") sections_in, sections_base = read_input_sections(d) header_lines = process_sections(d, sections_in, sections_base, outSrcFile) finalize_output(outHeaderFile, outSrcFile, header_lines) def read_input_sections(d: RunData) -> Tuple[List, List]: # Read base, prefix.in, etc., all separated by form-feed + newline sections_in = read_sections(os.path.join(d.srcTemplateDir, f"{d.prefix}.in")) sections_base = read_sections(d.runtimeBaseFile) return sections_in, sections_base def process_sections( d: RunData, sections_in: List, sections_base: List, outSrcFile: str ) -> List: with open(outSrcFile, "w", newline="", encoding="utf-8") as f_out: out_autogen = ( f"/***** Autogenerated from {d.prefix}.in; " "changes will be overwritten *****/\n\n" ) # Output an autogenerated banner f_out.write(out_autogen) # Track how many lines have passed in each file w = SectionWriter(f_out, d.srcTemplateDir, d.prefix) # 1) runtimebase.in chunk #1 w.write_base_chunk(sections_base[0]) # 2) prefix.in chunk #1 w.write_in_chunk( sections_in[0], ) # 3) read next chunk from runtimebase.in for type mapping current_chunk = sections_base[1] if len(sections_base) > 1 else "" new_types, lines_consumed = parse_types( current_chunk, "runtimebase.in", w.base_line_counter ) d.type_map.update(new_types) w.base_line_counter += lines_consumed # ... and similarly the next chunk from prefix.in current_chunk = sections_in[1] if len(sections_in) > 1 else "" new_types, lines_consumed = parse_types( current_chunk, f"{d.prefix}.in", w.in_line_counter ) d.type_map.update(new_types) w.in_line_counter += lines_consumed # 4) next chunk from base w.write_base_chunk(sections_base[2] if len(sections_base) > 2 else "") # 5) next chunk from prefix.in w.write_in_chunk( sections_in[2] if len(sections_in) > 2 else "", # pylint: disable=fixme # TODO: The following line preserves erroneous behavior of the original # perl code, but should be eliminated once the perl code is removed. f'#line {w.in_line_counter} "{d.prefix}.in"\n', ) f_out.write("\n#ifndef NOSYM\n") f_out.write(f'#include "{d.prefix}.symbols.h"\n') f_out.write("\n#endif\n") f_out.write("namespace run {\n") header_lines = [out_autogen, "#pragma once\n", "namespace run {\n"] builtin: List = [] # 6) read remaining lines from sections_in[3...] for function definitions for function_count, section in enumerate(sections_in[3:]): try: fd = FunctionData(section, d.prefix, function_count, header_lines) except MatchNotFoundError: Er.report_error( f"{d.prefix}.in", w.in_line_counter, "bad function definition", ) continue # Build addFunc part builtin.append( fd.generate_addFunc( in_line_counter=w.in_line_counter, d=d, ) ) fd.write_cc(f_out, d, w.in_line_counter) w.in_line_counter += section.count("\n") f_out.write("} // namespace run\n\n") write_trans_namespace(f_out, d, builtin) return header_lines def finalize_output(outHeaderFile: str, outSrcFile: str, header_lines: List) -> None: header_lines.append("}\n\n") overwrite_if_changed(outHeaderFile, "".join(header_lines)) if Er.errors: try: os.unlink(outHeaderFile) except FileNotFoundError: pass try: os.unlink(outSrcFile) except FileNotFoundError: pass sys.exit(1) if __name__ == "__main__": runData = parse_args() main(runData)