WIP: awk replacement

Co-authored-by: Benjamin Lipp <blipp@rosenpass.eu>
2025-12-16 20:37:06 +03:00 · 2025-12-15 18:05:32 +01:00
parent 0a945aa152
commit 444e782854
5 changed files with 460 additions and 36 deletions
--- a/marzipan/TODO.md
+++ b/marzipan/TODO.md
@@ -39,15 +39,15 @@
    * ~`in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));`~
    * ~                                          ^~
  * ~04_dos… has a syntax error (see below)~
+  * ~~rewrite marzipan.awk into Python/LARK~~
+  * ~~define a LARK grammar for marzipan.awk rules~~
+  * ~~write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)~~

 ## Next Steps

 * integrate marzipan.awk into Python, somehow
-  * options term special cases (c.f. manual page 133, starting with "fun" term)
-    * complete with CryptoVerif options
-* rewrite marzipan.awk into Python/LARK
-  * define a LARK grammar for marzipan.awk rules
-  * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)
+* options term special cases (c.f. manual page 133, starting with "fun" term)
+  * complete with CryptoVerif options
 * do not assume that the repo path has subdir marzipan
 * do not assume that the repo path has subdir analysis
 * rewrite cpp into Python/LARK (step: mpv->i.pv)
--- a/marzipan/src/init.py
+++ b/marzipan/src/init.py
@@ -1,9 +1,9 @@
-from .util import pkgs, setup_exports, export, rename
-from .parser import *
-
 # from rich.console import Console
 import click

+from .parser import *
+from .util import export, pkgs, rename, setup_exports
+
 target_subdir = "target/proverif"

 (__all__, export) = setup_exports()
@@ -25,7 +25,7 @@ def set_logging_logrecordfactory(filename):
    pkgs.logging.setLogRecordFactory(record_factory)


-#def set_logging_format(max_length):
+# def set_logging_format(max_length):
 #    pass
 #    #format_str = "{levelname:<8} {filename:<" + str(max_length + 2) + "} {message}"
 #    #pkgs.logging.basicConfig(level=pkgs.logging.DEBUG, style="{", format=format_str)
@@ -218,9 +218,9 @@ def analyze(repo_path, output):
    full_paths = sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv"))
    entries.extend(full_paths)

-    #modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths]
-    #max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0
-    #set_logging_format(max_length)
+    # modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths]
+    # max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0
+    # set_logging_format(max_length)

    with pkgs.concurrent.futures.ProcessPoolExecutor() as executor:
        futures = {
@@ -276,7 +276,8 @@ def metaverif(repo_path, tmpdir, file):
        print(f"AWK Prep Path: {awk_prep}")

        cpp(file, cpp_prep)
-        awk(repo_path, cpp_prep, awk_prep)
+        parse_main(cpp_prep, awk_prep)
+        # awk(repo_path, cpp_prep, awk_prep)

        log_file = pkgs.os.path.join(tmpdir, f"{name}.log")

@@ -300,12 +301,13 @@ def metaverif(repo_path, tmpdir, file):


@main.command()
-@click.argument("file_path")
-def parse(file_path):
+@click.argument("i_path")
+@click.argument("o_path")
+def parse(i_path, o_path):
    try:
-        parse_main(file_path)
+        parse_main(i_path, o_path)
    except pkgs.lark.exceptions.UnexpectedCharacters as e:
-        logger.error(f"Error {type(e).__name__} parsing {file_path}: {e}")
+        logger.error(f"Error {type(e).__name__} parsing {i_path}: {e}")


 if __name__ == "__main__":
--- a/marzipan/src/grammar/decl.lark
+++ b/marzipan/src/grammar/decl.lark
@@ -32,9 +32,11 @@ decl: type_decl
    #| def_decl
    #| expand_decl

-type_decl: "type" IDENT options{OPTIONS_TYPE} "."
+type_decl: "type" IDENT "."
+#type_decl: "type" IDENT options{OPTIONS_TYPE} "."
 channel_decl: "channel" _non_empty_seq{IDENT} "."
-free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
+free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
+#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
 const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
 fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
 letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
--- a/marzipan/src/grammars/awk_proverif.lark
+++ b/marzipan/src/grammars/awk_proverif.lark
@@ -0,0 +1,362 @@
+
+QUERY: "@query"
+REACHABLE: "@reachable"
+LEMMA: "@lemma"
+PROCESS: "process"
+EQUIVALENCE: "equivalence"
+YIELD: "yield"
+channel: CHANNEL
+CHANNEL: "channel"
+IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/
+ZERO: "0"
+INFIX: "||"
+        | "&&"
+        | "="
+        | "<>"
+        | "<="
+        | ">="
+        | "<"
+        | ">"
+typeid: channel
+        | IDENT
+_non_empty_seq{x}: x ("," x)*
+_maybe_empty_seq{x}: [ _non_empty_seq{x} ]
+
+OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
+OPTIONS_FUN: OPTIONS_FUN_CONST
+OPTIONS_CONST: OPTIONS_FUN_CONST
+OPTIONS_FREE_REDUC: "private"
+OPTIONS_PRED: "memberOptim" | "block"
+OPTIONS_PROCESS: "precise"
+OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
+OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
+OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
+OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset"
+OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll"
+OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
+OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
+OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
+OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual
+options{idents}: [ "[" _non_empty_seq{idents} "]" ]
+BOOL : "true" | "false"
+NONE: "none"
+FULL: "full"
+ALL: "all"
+FUNC: IDENT
+ignoretype_options: BOOL | ALL | NONE | "attacker"
+boolean_settings_names: "privateCommOnPublicTerms"
+                        | "rejectChoiceTrueFalse"
+                        | "rejectNoSimplif"
+                        | "allowDiffPatterns"
+                        | "inductionQueries"
+                        | "inductionLemmas"
+                        | "movenew"
+                        | "movelet"
+                        | "stopTerm"
+                        | "removeEventsForLemma"
+                        | "simpEqAll"
+                        | "eqInNames"
+                        | "preciseLetExpand"
+                        | "expandSimplifyIfCst"
+                        | "featureFuns"
+                        | "featureNames"
+                        | "featurePredicates"
+                        | "featureEvents"
+                        | "featureTables"
+                        | "featureDepth"
+                        | "featureWidth"
+                        | "simplifyDerivation"
+                        | "abbreviateDerivation"
+                        | "explainDerivation"
+                        | "unifyDerivation"
+                        | "reconstructDerivation"
+                        | "displayDerivation"
+                        | "traceBacktracking"
+                        | "interactiveSwapping"
+                        | "color"
+                        | "verboseLemmas"
+                        | "abbreviateClauses"
+                        | "removeUselessClausesBeforeDisplay"
+                        | "verboseEq"
+                        | "verboseDestructors"
+                        | "verboseTerm"
+                        | "verboseStatistics"
+                        | "verboseRules"
+                        | "verboseBase"
+                        | "verboseRedundant"
+                        | "verboseCompleted"
+                        | "verboseGoalReachable"
+
+_decl_pair{name, value}: "set" name "=" value "."
+
+set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
+
+ignore_types_values: BOOL | "all" | "none" | "attacker"
+simplify_process_values: BOOL | "interactive"
+precise_actions_values: BOOL | "trueWithoutArgsInNames"
+redundant_hyp_elim_values: BOOL | "beginOnly"
+reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL
+attacker_values: "active" | "passive"
+key_compromise_values: "none" | "approx" | "strict"
+predicates_implementable: "check" | "nocheck"
+application_values: "instantiate" | "full" | "none" | "discard"
+max_values: "none" | "n"
+sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
+redundancy_elim_values: "best" | "simple" | "no"
+nounif_ignore_a_few_times_values: "none" | "auto" | "all"
+nounif_ignore_ntimes_values: "n"
+trace_display_values: "short" | "long" | "none"
+verbose_clauses_values: "none" | "explained" | "short"
+INT: NAT | "-" NAT
+COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
+
+%import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library
+%ignore WS // Disregard spaces in text
+%ignore COMMENT
+decl: type_decl
+    | channel_decl
+    | free_decl
+    | const_decl
+    | fun_decl
+    | letfun_decl
+    | reduc_decl
+    | fun_reduc_decl
+    | equation_decl
+    | pred_decl
+    | table_decl
+    | let_decl
+    | set_settings_decl
+    | event_decl
+    | query_decl
+    | axiom_decl
+    | restriction_decl
+    | lemma_decl
+    | noninterf_decl
+    | weaksecret_decl
+    | not_decl
+    | select_decl
+    | noselect_decl
+    | nounif_decl
+    | elimtrue_decl
+    | clauses_decl
+    | module_decl
+    #| param_decl
+    #| proba_decl
+    #| letproba_decl
+    #| proof_decl
+    #| def_decl
+    #| expand_decl
+
+type_decl: "type" IDENT "."
+#type_decl: "type" IDENT options{OPTIONS_TYPE} "."
+channel_decl: "channel" _non_empty_seq{IDENT} "."
+free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
+#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
+const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
+fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
+letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
+reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
+fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
+equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
+pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
+table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "."
+let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
+
+set_settings_decl: set_settings_boolean_decl
+            | _decl_pair{"ignoreTypes", ignore_types_values}
+            | _decl_pair{"simplifyProcess", simplify_process_values}
+            | _decl_pair{"preciseActions", precise_actions_values}
+            | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
+            | _decl_pair{"reconstructTrace", reconstruct_trace_values}
+            | _decl_pair{"attacker", attacker_values}
+            | _decl_pair{"keyCompromise", key_compromise_values}
+            | _decl_pair{"predicatesImplementable", predicates_implementable}
+            | _decl_pair{"saturationApplication", application_values}
+            | _decl_pair{"verificationApplication", application_values}
+            | _decl_pair{"maxDepth", max_values}
+            | _decl_pair{"maxHyp", max_values}
+            | _decl_pair{"selFun", sel_fun_values}
+            | _decl_pair{"redundancyElim", redundancy_elim_values}
+            | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
+            | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
+            | _decl_pair{"traceDisplay", trace_display_values}
+            | _decl_pair{"verboseClauses", verbose_clauses_values}
+            | set_strategy
+            | set_symb_order
+
+_swap_strategy_seq{x}: x ("->" x)*
+set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
+_symb_ord_seq{x}: x (">" x)*
+set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
+
+event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
+select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
+nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
+
+elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
+clauses_decl: "clauses" clauses "."
+
+module_decl: "@module" " " IDENT
+
+noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "."
+weaksecret_decl: "weaksecret" IDENT "."
+
+nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
+equality: term "=" term
+        | "let" IDENT "=" term "in" equality
+mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
+eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
+clause: term
+        | term "->" term
+        | term "<->" term
+        | term "<=>" term
+clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
+mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
+NAT: DIGIT+
+start: decl* PROCESS process | decl* EQUIVALENCE process process
+process: ZERO
+        | YIELD
+        | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
+        | bracketed_process
+        | piped_process
+        | replicated_process
+        | replicated_process_bounds
+        | sample_process
+        | if_process
+        | in_process
+        | out_process
+        | let_process
+        | insert_process
+        | get_process
+        | event_process
+        | phase
+        | sync
+bracketed_process: "(" process ")"
+piped_process: process "|" process
+replicated_process: "!" process
+replicated_process_bounds: "!" IDENT "<=" IDENT process
+                            | "foreach" IDENT "<=" IDENT "do" process
+sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
+                | IDENT "<-R" typeid [";" process]
+let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
+            | IDENT [":" typeid] "<-" pterm [";" process]
+            | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+if_process: "if" pterm "then" process [ "else" process ]
+in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
+get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
+out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
+insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
+event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
+term: IDENT
+        | NAT
+        | "(" _maybe_empty_seq{term} ")"
+        | IDENT "(" _maybe_empty_seq{term} ")"
+        | term ( "+" | "-" ) NAT
+        | NAT "+" term
+        | term INFIX term
+        | "not" "(" term ")"
+
+phase: "phase" NAT [";" process]
+sync: "sync" NAT ["[" TAG "]"] [";" process]
+query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
+        | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
+        | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
+        | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
+nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
+            | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
+gformat: IDENT
+        | "*" IDENT
+        | IDENT "(" _maybe_empty_seq{gformat} ")"
+        | "choice" "[" gformat "," gformat "]"
+        | "not" "(" _maybe_empty_seq{gformat} ")"
+        | "new" IDENT [ "[" [ fbinding ] "]" ]
+        | "let" IDENT "=" gformat "in" gformat
+fbinding: "!" NAT "=" gformat [";" fbinding]
+        | IDENT "=" gformat [";" fbinding]
+nounifoption: "hypothesis"
+            | "conclusion"
+            | "ignoreAFewTimes"
+            | "inductionOn" "=" IDENT
+            | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
+
+query_annotation: [(REACHABLE|QUERY) ESCAPED_STRING]
+query_decl: query_annotation query_decl_core
+query_decl_core: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
+not_decl: "not" [ typedecl ";"] gterm "."
+TAG: IDENT
+lemma: gterm [";" lemma]
+         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
+         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
+axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
+restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
+lemma_annotation: [LEMMA ESCAPED_STRING]
+lemma_decl: lemma_annotation lemma_decl_core
+lemma_decl_core:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
+gterm: ident_gterm
+        | fun_gterm
+        | choice_gterm
+        | infix_gterm
+        | arith_gterm
+        | arith2_gterm
+        | event_gterm
+        | injevent_gterm
+        | implies_gterm
+        | paren_gterm
+        | sample_gterm
+        | let_gterm
+ident_gterm: IDENT
+fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
+choice_gterm: "choice" "[" gterm "," gterm "]"
+infix_gterm: gterm INFIX gterm
+arith_gterm: gterm ( "+" | "-" ) NAT
+arith2_gterm: NAT "+" gterm
+event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
+implies_gterm: gterm "==>" gterm
+paren_gterm: "(" _maybe_empty_seq{gterm} ")"
+sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
+let_gterm: "let" IDENT "=" gterm "in" gterm
+
+gbinding: "!" NAT "=" gterm [";" gbinding]
+        | IDENT "=" gterm [";" gbinding]
+
+pterm: IDENT
+        | NAT
+        | "(" _maybe_empty_seq{pterm} ")"
+        | IDENT "(" _maybe_empty_seq{pterm} ")"
+        | choice_pterm
+        | pterm ("+" | "-") NAT
+        | NAT "+" pterm
+        | pterm INFIX pterm
+        | not_pterm
+        | sample_pterm
+        | if_pterm
+        | let_pterm
+        | insert_pterm
+        | get_pterm
+        | event_pterm
+choice_pterm: "choice[" pterm "," pterm "]"
+if_pterm: "if" pterm "then" pterm [ "else" pterm ]
+not_pterm: "not" "(" pterm ")"
+let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
+            | IDENT [":" typeid] "<-" pterm ";" pterm
+            | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
+sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
+            | IDENT "<-R" typeid [";" pterm]
+insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
+event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
+get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
+pattern: IDENT [":" typeid]
+        | "_" [ ":" typeid ]
+        | NAT
+        | pattern "+" NAT
+        | NAT "+" pattern
+        | "(" _maybe_empty_seq{pattern} ")"
+        | IDENT "(" _maybe_empty_seq{pattern} ")"
+        | "=" pterm
+mayfailterm: term
+            | "fail"
+mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
+typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
+failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]
--- a/marzipan/src/parser.py
+++ b/marzipan/src/parser.py
@@ -1,8 +1,36 @@
 import re
-import sys
 from pathlib import Path

-from lark import Lark, Token, Transformer, exceptions, tree
+from lark import Discard, Lark, Transformer
+from lark.reconstruct import Reconstructor
+
+
+class TreeVisitor(Transformer):
+    # def __default_token__(self, token):
+    #     if token.type == "PROCESS":
+    #         return ""
+    #     elif token.type == "IDENT":
+    #         return token
+    #         # return f"<ident>{token}</ident>"
+
+    # def __default__(self, data, children, meta):
+    # #return "\n".join([c for c in children if c])
+    # amazing_str = ""
+    # for c in children:
+    #     if c:
+    #         amazing_str += c
+    # return f"<{data}> {amazing_str} </{data}>"
+
+    # lemma_decl_core: original_stuff
+    # lemma_annotation: [LEMMA ESCAPED_STRING]
+    # lemma_decl: lemma_annotation lemma_decl_core
+    # lemma_decl: original_stuff
+    def lemma_annotation(self, children):
+        return Discard
+
+    def query_annotation(self, children):
+        return Discard
+

 # taken from Page 17 in the ProVerif manual
 # At the moment, we do not reject a ProVerif model that uses reserved words as identifier,
@@ -118,35 +146,47 @@ LEMMA: "@lemma"


 # add @query and @reachable to query_decl, @lemma to lemma_decl
-def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str:
-    old_decl_renamed = f"{decl_rule}_core"
-    rename_target = f"{decl_rule}\s*:"
+def modify_decl_rule(
+    grammar: str, old_rule_name: str, annot_rule_name: str, annot_rule: str
+) -> str:
+    old_decl_renamed = f"{old_rule_name}_core"
+    rename_target = f"{old_rule_name}\s*:"
    # rename *_decl -> *_decl_core
-    rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1)
+    grammar, count = re.subn(rename_target, f"{old_decl_renamed}:", grammar, count=1)
    if count == 0:
        raise RuntimeError("*_decl not found!")
-    wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}"
+    # lemma_annotation: [LEMMA ESCAPED_STRING]
+    annot_rule = f"{annot_rule_name}: {annot_rule}"
+    # lemma_decl: lemma_annotation lemma_decl_core
+    wrapper = f"{old_rule_name}: {annot_rule_name} {old_decl_renamed}"
    old_decl_target = f"{old_decl_renamed}\s*:"
    # get index of *_decl_core rule
-    match = re.search(old_decl_target, rules, flags=re.MULTILINE)
+    match = re.search(old_decl_target, grammar, flags=re.MULTILINE)
    if not match:
        raise RuntimeError("*_decl_core: rule not found after rename")
    insert_pos = match.start()
-    rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:]
-    return rules
+    grammar = (
+        grammar[:insert_pos] + annot_rule + "\n" + wrapper + "\n" + grammar[insert_pos:]
+    )
+    return grammar


 query_rules = modify_decl_rule(
-    query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]"
+    query_rules, "query_decl", "query_annotation", "[(REACHABLE|QUERY) ESCAPED_STRING]"
 )

-lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]")
+lemma_rules = modify_decl_rule(
+    lemma_rules, "lemma_decl", "lemma_annotation", "[LEMMA ESCAPED_STRING]"
+)

 grammar = (
    common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules
 )

-parser = Lark(grammar)
+with open("./src/grammars/awk_proverif.lark", "w") as f:
+    f.write(grammar)
+
+parser = Lark(grammar, maybe_placeholders=False)

 # COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
 # COMMENT:  "(*" /(\*(?!\))|[^*])*/  "*)"
@@ -165,8 +205,26 @@ def parsertest(input):
    return parsetree


-def parse_main(file_path):
-    with open(file_path, "r") as f:
+"""
+    parse in i.pv file using new awk-marzipan grammar,
+    eliminate aliases from i.pv using lark TreeVisitor,
+    then return o.pv (awk_prep)
+"""
+
+
+def parse_main(ipv_path, opv_path):
+    with open(ipv_path, "r") as f:
        content = f.read()
-        # print(content)
-        parsertest(content)
+        # content += "\nprocess main"
+
+        forest = parsertest(content)
+        with open("i.pv", "w") as ipvf:
+            ipvf.write(forest.pretty())
+
+        tree = TreeVisitor().transform(forest)
+        # TODO: tree -> o.pv
+        new_json = Reconstructor(parser).reconstruct(tree)
+
+        with open(opv_path, "w") as opvf:
+            # opvf.write(tree.pretty())
+            opvf.write(new_json)