WIP: awk replacement

Co-authored-by: Benjamin Lipp <blipp@rosenpass.eu>
2025-12-18 13:24:38 +03:00 · 2025-12-15 18:05:32 +01:00
parent 0a945aa152
commit 444e782854
5 changed files with 460 additions and 36 deletions
--- a/marzipan/TODO.md
+++ b/marzipan/TODO.md
@@ -39,15 +39,15 @@
    * ~`in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));`~
    * ~                                          ^~
  * ~04_dos… has a syntax error (see below)~
  * ~~rewrite marzipan.awk into Python/LARK~~
  * ~~define a LARK grammar for marzipan.awk rules~~
  * ~~write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)~~
 ## Next Steps
 * integrate marzipan.awk into Python, somehow
-  * options term special cases (c.f. manual page 133, starting with "fun" term)
+* options term special cases (c.f. manual page 133, starting with "fun" term)
-    * complete with CryptoVerif options
+  * complete with CryptoVerif options
 * rewrite marzipan.awk into Python/LARK
  * define a LARK grammar for marzipan.awk rules
  * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)
 * do not assume that the repo path has subdir marzipan
 * do not assume that the repo path has subdir analysis
 * rewrite cpp into Python/LARK (step: mpv->i.pv)
--- a/marzipan/src/init.py
+++ b/marzipan/src/init.py
@@ -1,9 +1,9 @@
 from .util import pkgs, setup_exports, export, rename
 from .parser import *
 # from rich.console import Console
 import click
 from .parser import *
 from .util import export, pkgs, rename, setup_exports
 target_subdir = "target/proverif"
 (__all__, export) = setup_exports()
@@ -25,7 +25,7 @@ def set_logging_logrecordfactory(filename):
    pkgs.logging.setLogRecordFactory(record_factory)
-#def set_logging_format(max_length):
+# def set_logging_format(max_length):
 #    pass
 #    #format_str = "{levelname:<8} {filename:<" + str(max_length + 2) + "} {message}"
 #    #pkgs.logging.basicConfig(level=pkgs.logging.DEBUG, style="{", format=format_str)
@@ -218,9 +218,9 @@ def analyze(repo_path, output):
    full_paths = sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv"))
    entries.extend(full_paths)
-    #modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths]
+    # modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths]
-    #max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0
+    # max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0
-    #set_logging_format(max_length)
+    # set_logging_format(max_length)
    with pkgs.concurrent.futures.ProcessPoolExecutor() as executor:
        futures = {
@@ -276,7 +276,8 @@ def metaverif(repo_path, tmpdir, file):
        print(f"AWK Prep Path: {awk_prep}")
        cpp(file, cpp_prep)
-        awk(repo_path, cpp_prep, awk_prep)
+        parse_main(cpp_prep, awk_prep)
        # awk(repo_path, cpp_prep, awk_prep)
        log_file = pkgs.os.path.join(tmpdir, f"{name}.log")
@@ -300,12 +301,13 @@ def metaverif(repo_path, tmpdir, file):
@main.command()
-@click.argument("file_path")
+@click.argument("i_path")
-def parse(file_path):
+@click.argument("o_path")
 def parse(i_path, o_path):
    try:
-        parse_main(file_path)
+        parse_main(i_path, o_path)
    except pkgs.lark.exceptions.UnexpectedCharacters as e:
-        logger.error(f"Error {type(e).__name__} parsing {file_path}: {e}")
+        logger.error(f"Error {type(e).__name__} parsing {i_path}: {e}")
 if __name__ == "__main__":
--- a/marzipan/src/grammar/decl.lark
+++ b/marzipan/src/grammar/decl.lark
@@ -32,9 +32,11 @@ decl: type_decl
    #| def_decl
    #| expand_decl
-type_decl: "type" IDENT options{OPTIONS_TYPE} "."
+type_decl: "type" IDENT "."
 #type_decl: "type" IDENT options{OPTIONS_TYPE} "."
 channel_decl: "channel" _non_empty_seq{IDENT} "."
-free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
+free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
 #free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
 const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
 fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
 letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
--- a/marzipan/src/grammars/awk_proverif.lark
+++ b/marzipan/src/grammars/awk_proverif.lark
@@ -0,0 +1,362 @@
 QUERY: "@query"
 REACHABLE: "@reachable"
 LEMMA: "@lemma"
 PROCESS: "process"
 EQUIVALENCE: "equivalence"
 YIELD: "yield"
 channel: CHANNEL
 CHANNEL: "channel"
 IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/
 ZERO: "0"
 INFIX: "||"
        | "&&"
        | "="
        | "<>"
        | "<="
        | ">="
        | "<"
        | ">"
 typeid: channel
        | IDENT
 _non_empty_seq{x}: x ("," x)*
 _maybe_empty_seq{x}: [ _non_empty_seq{x} ]
 OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
 OPTIONS_FUN: OPTIONS_FUN_CONST
 OPTIONS_CONST: OPTIONS_FUN_CONST
 OPTIONS_FREE_REDUC: "private"
 OPTIONS_PRED: "memberOptim" | "block"
 OPTIONS_PROCESS: "precise"
 OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
 OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
 OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
 OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset"
 OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll"
 OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
 OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
 OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
 OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual
 options{idents}: [ "[" _non_empty_seq{idents} "]" ]
 BOOL : "true" | "false"
 NONE: "none"
 FULL: "full"
 ALL: "all"
 FUNC: IDENT
 ignoretype_options: BOOL | ALL | NONE | "attacker"
 boolean_settings_names: "privateCommOnPublicTerms"
                        | "rejectChoiceTrueFalse"
                        | "rejectNoSimplif"
                        | "allowDiffPatterns"
                        | "inductionQueries"
                        | "inductionLemmas"
                        | "movenew"
                        | "movelet"
                        | "stopTerm"
                        | "removeEventsForLemma"
                        | "simpEqAll"
                        | "eqInNames"
                        | "preciseLetExpand"
                        | "expandSimplifyIfCst"
                        | "featureFuns"
                        | "featureNames"
                        | "featurePredicates"
                        | "featureEvents"
                        | "featureTables"
                        | "featureDepth"
                        | "featureWidth"
                        | "simplifyDerivation"
                        | "abbreviateDerivation"
                        | "explainDerivation"
                        | "unifyDerivation"
                        | "reconstructDerivation"
                        | "displayDerivation"
                        | "traceBacktracking"
                        | "interactiveSwapping"
                        | "color"
                        | "verboseLemmas"
                        | "abbreviateClauses"
                        | "removeUselessClausesBeforeDisplay"
                        | "verboseEq"
                        | "verboseDestructors"
                        | "verboseTerm"
                        | "verboseStatistics"
                        | "verboseRules"
                        | "verboseBase"
                        | "verboseRedundant"
                        | "verboseCompleted"
                        | "verboseGoalReachable"
 _decl_pair{name, value}: "set" name "=" value "."
 set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
 ignore_types_values: BOOL | "all" | "none" | "attacker"
 simplify_process_values: BOOL | "interactive"
 precise_actions_values: BOOL | "trueWithoutArgsInNames"
 redundant_hyp_elim_values: BOOL | "beginOnly"
 reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL
 attacker_values: "active" | "passive"
 key_compromise_values: "none" | "approx" | "strict"
 predicates_implementable: "check" | "nocheck"
 application_values: "instantiate" | "full" | "none" | "discard"
 max_values: "none" | "n"
 sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
 redundancy_elim_values: "best" | "simple" | "no"
 nounif_ignore_a_few_times_values: "none" | "auto" | "all"
 nounif_ignore_ntimes_values: "n"
 trace_display_values: "short" | "long" | "none"
 verbose_clauses_values: "none" | "explained" | "short"
 INT: NAT | "-" NAT
 COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
 %import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library
 %ignore WS // Disregard spaces in text
 %ignore COMMENT
 decl: type_decl
    | channel_decl
    | free_decl
    | const_decl
    | fun_decl
    | letfun_decl
    | reduc_decl
    | fun_reduc_decl
    | equation_decl
    | pred_decl
    | table_decl
    | let_decl
    | set_settings_decl
    | event_decl
    | query_decl
    | axiom_decl
    | restriction_decl
    | lemma_decl
    | noninterf_decl
    | weaksecret_decl
    | not_decl
    | select_decl
    | noselect_decl
    | nounif_decl
    | elimtrue_decl
    | clauses_decl
    | module_decl
    #| param_decl
    #| proba_decl
    #| letproba_decl
    #| proof_decl
    #| def_decl
    #| expand_decl
 type_decl: "type" IDENT "."
 #type_decl: "type" IDENT options{OPTIONS_TYPE} "."
 channel_decl: "channel" _non_empty_seq{IDENT} "."
 free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
 #free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
 const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
 fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
 letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
 reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
 fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
 equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
 pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
 table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "."
 let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
 set_settings_decl: set_settings_boolean_decl
            | _decl_pair{"ignoreTypes", ignore_types_values}
            | _decl_pair{"simplifyProcess", simplify_process_values}
            | _decl_pair{"preciseActions", precise_actions_values}
            | _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
            | _decl_pair{"reconstructTrace", reconstruct_trace_values}
            | _decl_pair{"attacker", attacker_values}
            | _decl_pair{"keyCompromise", key_compromise_values}
            | _decl_pair{"predicatesImplementable", predicates_implementable}
            | _decl_pair{"saturationApplication", application_values}
            | _decl_pair{"verificationApplication", application_values}
            | _decl_pair{"maxDepth", max_values}
            | _decl_pair{"maxHyp", max_values}
            | _decl_pair{"selFun", sel_fun_values}
            | _decl_pair{"redundancyElim", redundancy_elim_values}
            | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
            | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
            | _decl_pair{"traceDisplay", trace_display_values}
            | _decl_pair{"verboseClauses", verbose_clauses_values}
            | set_strategy
            | set_symb_order
 _swap_strategy_seq{x}: x ("->" x)*
 set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
 _symb_ord_seq{x}: x (">" x)*
 set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
 event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
 select_decl:    "select"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
 noselect_decl:  "noselect"  [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
 nounif_decl:    "nounif"    [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
 elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
 clauses_decl: "clauses" clauses "."
 module_decl: "@module" " " IDENT
 noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "."
 weaksecret_decl: "weaksecret" IDENT "."
 nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
 equality: term "=" term
        | "let" IDENT "=" term "in" equality
 mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
 eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
 clause: term
        | term "->" term
        | term "<->" term
        | term "<=>" term
 clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
 mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
 NAT: DIGIT+
 start: decl* PROCESS process | decl* EQUIVALENCE process process
 process: ZERO
        | YIELD
        | IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
        | bracketed_process
        | piped_process
        | replicated_process
        | replicated_process_bounds
        | sample_process
        | if_process
        | in_process
        | out_process
        | let_process
        | insert_process
        | get_process
        | event_process
        | phase
        | sync
 bracketed_process: "(" process ")"
 piped_process: process "|" process
 replicated_process: "!" process
 replicated_process_bounds: "!" IDENT "<=" IDENT process
                            | "foreach" IDENT "<=" IDENT "do" process
 sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
                | IDENT "<-R" typeid [";" process]
 let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
            | IDENT [":" typeid] "<-" pterm [";" process]
            | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
 if_process: "if" pterm "then" process [ "else" process ]
 in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
 get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
 out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
 insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
 event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
 term: IDENT
        | NAT
        | "(" _maybe_empty_seq{term} ")"
        | IDENT "(" _maybe_empty_seq{term} ")"
        | term ( "+" | "-" ) NAT
        | NAT "+" term
        | term INFIX term
        | "not" "(" term ")"
 phase: "phase" NAT [";" process]
 sync: "sync" NAT ["[" TAG "]"] [";" process]
 query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
        | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
        | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
        | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
 nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
            | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
 gformat: IDENT
        | "*" IDENT
        | IDENT "(" _maybe_empty_seq{gformat} ")"
        | "choice" "[" gformat "," gformat "]"
        | "not" "(" _maybe_empty_seq{gformat} ")"
        | "new" IDENT [ "[" [ fbinding ] "]" ]
        | "let" IDENT "=" gformat "in" gformat
 fbinding: "!" NAT "=" gformat [";" fbinding]
        | IDENT "=" gformat [";" fbinding]
 nounifoption: "hypothesis"
            | "conclusion"
            | "ignoreAFewTimes"
            | "inductionOn" "=" IDENT
            | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
 query_annotation: [(REACHABLE|QUERY) ESCAPED_STRING]
 query_decl: query_annotation query_decl_core
 query_decl_core: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
 not_decl: "not" [ typedecl ";"] gterm "."
 TAG: IDENT
 lemma: gterm [";" lemma]
         | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
         | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
 axiom_decl:       "axiom"       [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
 restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
 lemma_annotation: [LEMMA ESCAPED_STRING]
 lemma_decl: lemma_annotation lemma_decl_core
 lemma_decl_core:       "lemma"       [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
 gterm: ident_gterm
        | fun_gterm
        | choice_gterm
        | infix_gterm
        | arith_gterm
        | arith2_gterm
        | event_gterm
        | injevent_gterm
        | implies_gterm
        | paren_gterm
        | sample_gterm
        | let_gterm
 ident_gterm: IDENT
 fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
 choice_gterm: "choice" "[" gterm "," gterm "]"
 infix_gterm: gterm INFIX gterm
 arith_gterm: gterm ( "+" | "-" ) NAT
 arith2_gterm: NAT "+" gterm
 event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
 injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
 implies_gterm: gterm "==>" gterm
 paren_gterm: "(" _maybe_empty_seq{gterm} ")"
 sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
 let_gterm: "let" IDENT "=" gterm "in" gterm
 gbinding: "!" NAT "=" gterm [";" gbinding]
        | IDENT "=" gterm [";" gbinding]
 pterm: IDENT
        | NAT
        | "(" _maybe_empty_seq{pterm} ")"
        | IDENT "(" _maybe_empty_seq{pterm} ")"
        | choice_pterm
        | pterm ("+" | "-") NAT
        | NAT "+" pterm
        | pterm INFIX pterm
        | not_pterm
        | sample_pterm
        | if_pterm
        | let_pterm
        | insert_pterm
        | get_pterm
        | event_pterm
 choice_pterm: "choice[" pterm "," pterm "]"
 if_pterm: "if" pterm "then" pterm [ "else" pterm ]
 not_pterm: "not" "(" pterm ")"
 let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
            | IDENT [":" typeid] "<-" pterm ";" pterm
            | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
 sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
            | IDENT "<-R" typeid [";" pterm]
 insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
 event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
 get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
 pattern: IDENT [":" typeid]
        | "_" [ ":" typeid ]
        | NAT
        | pattern "+" NAT
        | NAT "+" pattern
        | "(" _maybe_empty_seq{pattern} ")"
        | IDENT "(" _maybe_empty_seq{pattern} ")"
        | "=" pterm
 mayfailterm: term
            | "fail"
 mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
 typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
 failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]
--- a/marzipan/src/parser.py
+++ b/marzipan/src/parser.py
@@ -1,8 +1,36 @@
 import re
 import sys
 from pathlib import Path
-from lark import Lark, Token, Transformer, exceptions, tree
+from lark import Discard, Lark, Transformer
 from lark.reconstruct import Reconstructor
 class TreeVisitor(Transformer):
    # def __default_token__(self, token):
    #     if token.type == "PROCESS":
    #         return ""
    #     elif token.type == "IDENT":
    #         return token
    #         # return f"<ident>{token}</ident>"
    # def __default__(self, data, children, meta):
    # #return "\n".join([c for c in children if c])
    # amazing_str = ""
    # for c in children:
    #     if c:
    #         amazing_str += c
    # return f"<{data}> {amazing_str} </{data}>"
    # lemma_decl_core: original_stuff
    # lemma_annotation: [LEMMA ESCAPED_STRING]
    # lemma_decl: lemma_annotation lemma_decl_core
    # lemma_decl: original_stuff
    def lemma_annotation(self, children):
        return Discard
    def query_annotation(self, children):
        return Discard
 # taken from Page 17 in the ProVerif manual
 # At the moment, we do not reject a ProVerif model that uses reserved words as identifier,
@@ -118,35 +146,47 @@ LEMMA: "@lemma"
 # add @query and @reachable to query_decl, @lemma to lemma_decl
-def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str:
+def modify_decl_rule(
-    old_decl_renamed = f"{decl_rule}_core"
+    grammar: str, old_rule_name: str, annot_rule_name: str, annot_rule: str
-    rename_target = f"{decl_rule}\s*:"
+) -> str:
    old_decl_renamed = f"{old_rule_name}_core"
    rename_target = f"{old_rule_name}\s*:"
    # rename *_decl -> *_decl_core
-    rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1)
+    grammar, count = re.subn(rename_target, f"{old_decl_renamed}:", grammar, count=1)
    if count == 0:
        raise RuntimeError("*_decl not found!")
-    wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}"
+    # lemma_annotation: [LEMMA ESCAPED_STRING]
    annot_rule = f"{annot_rule_name}: {annot_rule}"
    # lemma_decl: lemma_annotation lemma_decl_core
    wrapper = f"{old_rule_name}: {annot_rule_name} {old_decl_renamed}"
    old_decl_target = f"{old_decl_renamed}\s*:"
    # get index of *_decl_core rule
-    match = re.search(old_decl_target, rules, flags=re.MULTILINE)
+    match = re.search(old_decl_target, grammar, flags=re.MULTILINE)
    if not match:
        raise RuntimeError("*_decl_core: rule not found after rename")
    insert_pos = match.start()
-    rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:]
+    grammar = (
-    return rules
+        grammar[:insert_pos] + annot_rule + "\n" + wrapper + "\n" + grammar[insert_pos:]
    )
    return grammar
 query_rules = modify_decl_rule(
-    query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]"
+    query_rules, "query_decl", "query_annotation", "[(REACHABLE|QUERY) ESCAPED_STRING]"
 )
-lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]")
+lemma_rules = modify_decl_rule(
    lemma_rules, "lemma_decl", "lemma_annotation", "[LEMMA ESCAPED_STRING]"
 )
 grammar = (
    common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules
 )
-parser = Lark(grammar)
+with open("./src/grammars/awk_proverif.lark", "w") as f:
    f.write(grammar)
 parser = Lark(grammar, maybe_placeholders=False)
 # COMMENT:  /\(\*(\*(?!\))|[^*])*\*\)/
 # COMMENT:  "(*" /(\*(?!\))|[^*])*/  "*)"
@@ -165,8 +205,26 @@ def parsertest(input):
    return parsetree
-def parse_main(file_path):
+"""
-    with open(file_path, "r") as f:
+    parse in i.pv file using new awk-marzipan grammar,
    eliminate aliases from i.pv using lark TreeVisitor,
    then return o.pv (awk_prep)
 """
 def parse_main(ipv_path, opv_path):
    with open(ipv_path, "r") as f:
        content = f.read()
-        # print(content)
+        # content += "\nprocess main"
-        parsertest(content)
+
        forest = parsertest(content)
        with open("i.pv", "w") as ipvf:
            ipvf.write(forest.pretty())
        tree = TreeVisitor().transform(forest)
        # TODO: tree -> o.pv
        new_json = Reconstructor(parser).reconstruct(tree)
        with open(opv_path, "w") as opvf:
            # opvf.write(tree.pretty())
            opvf.write(new_json)