diff --git a/marzipan/TODO.md b/marzipan/TODO.md index b88a184..38ae15f 100644 --- a/marzipan/TODO.md +++ b/marzipan/TODO.md @@ -39,15 +39,15 @@ * ~`in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));`~ * ~ ^~ * ~04_dos… has a syntax error (see below)~ + * ~~rewrite marzipan.awk into Python/LARK~~ + * ~~define a LARK grammar for marzipan.awk rules~~ + * ~~write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)~~ ## Next Steps * integrate marzipan.awk into Python, somehow - * options term special cases (c.f. manual page 133, starting with "fun" term) - * complete with CryptoVerif options -* rewrite marzipan.awk into Python/LARK - * define a LARK grammar for marzipan.awk rules - * write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv) +* options term special cases (c.f. manual page 133, starting with "fun" term) + * complete with CryptoVerif options * do not assume that the repo path has subdir marzipan * do not assume that the repo path has subdir analysis * rewrite cpp into Python/LARK (step: mpv->i.pv) diff --git a/marzipan/src/__init__.py b/marzipan/src/__init__.py index 220561a..211f3dc 100644 --- a/marzipan/src/__init__.py +++ b/marzipan/src/__init__.py @@ -1,9 +1,9 @@ -from .util import pkgs, setup_exports, export, rename -from .parser import * - # from rich.console import Console import click +from .parser import * +from .util import export, pkgs, rename, setup_exports + target_subdir = "target/proverif" (__all__, export) = setup_exports() @@ -25,7 +25,7 @@ def set_logging_logrecordfactory(filename): pkgs.logging.setLogRecordFactory(record_factory) -#def set_logging_format(max_length): +# def set_logging_format(max_length): # pass # #format_str = "{levelname:<8} {filename:<" + str(max_length + 2) + "} {message}" # #pkgs.logging.basicConfig(level=pkgs.logging.DEBUG, style="{", format=format_str) @@ -218,9 +218,9 @@ def analyze(repo_path, output): full_paths = sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv")) entries.extend(full_paths) - #modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths] - #max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0 - #set_logging_format(max_length) + # modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths] + # max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0 + # set_logging_format(max_length) with pkgs.concurrent.futures.ProcessPoolExecutor() as executor: futures = { @@ -276,7 +276,8 @@ def metaverif(repo_path, tmpdir, file): print(f"AWK Prep Path: {awk_prep}") cpp(file, cpp_prep) - awk(repo_path, cpp_prep, awk_prep) + parse_main(cpp_prep, awk_prep) + # awk(repo_path, cpp_prep, awk_prep) log_file = pkgs.os.path.join(tmpdir, f"{name}.log") @@ -300,12 +301,13 @@ def metaverif(repo_path, tmpdir, file): @main.command() -@click.argument("file_path") -def parse(file_path): +@click.argument("i_path") +@click.argument("o_path") +def parse(i_path, o_path): try: - parse_main(file_path) + parse_main(i_path, o_path) except pkgs.lark.exceptions.UnexpectedCharacters as e: - logger.error(f"Error {type(e).__name__} parsing {file_path}: {e}") + logger.error(f"Error {type(e).__name__} parsing {i_path}: {e}") if __name__ == "__main__": diff --git a/marzipan/src/grammar/decl.lark b/marzipan/src/grammar/decl.lark index ab73993..49437db 100644 --- a/marzipan/src/grammar/decl.lark +++ b/marzipan/src/grammar/decl.lark @@ -32,9 +32,11 @@ decl: type_decl #| def_decl #| expand_decl -type_decl: "type" IDENT options{OPTIONS_TYPE} "." +type_decl: "type" IDENT "." +#type_decl: "type" IDENT options{OPTIONS_TYPE} "." channel_decl: "channel" _non_empty_seq{IDENT} "." -free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." +free_decl: "free" _non_empty_seq{IDENT} ":" typeid "." +#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." diff --git a/marzipan/src/grammars/awk_proverif.lark b/marzipan/src/grammars/awk_proverif.lark new file mode 100644 index 0000000..80900f9 --- /dev/null +++ b/marzipan/src/grammars/awk_proverif.lark @@ -0,0 +1,362 @@ + +QUERY: "@query" +REACHABLE: "@reachable" +LEMMA: "@lemma" +PROCESS: "process" +EQUIVALENCE: "equivalence" +YIELD: "yield" +channel: CHANNEL +CHANNEL: "channel" +IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/ +ZERO: "0" +INFIX: "||" + | "&&" + | "=" + | "<>" + | "<=" + | ">=" + | "<" + | ">" +typeid: channel + | IDENT +_non_empty_seq{x}: x ("," x)* +_maybe_empty_seq{x}: [ _non_empty_seq{x} ] + +OPTIONS_FUN_CONST: "data" | "private" | "typeConverter" +OPTIONS_FUN: OPTIONS_FUN_CONST +OPTIONS_CONST: OPTIONS_FUN_CONST +OPTIONS_FREE_REDUC: "private" +OPTIONS_PRED: "memberOptim" | "block" +OPTIONS_PROCESS: "precise" +OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif" +OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM +OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction" +OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset" +OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll" +OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/" +OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml +OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml +OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual +options{idents}: [ "[" _non_empty_seq{idents} "]" ] +BOOL : "true" | "false" +NONE: "none" +FULL: "full" +ALL: "all" +FUNC: IDENT +ignoretype_options: BOOL | ALL | NONE | "attacker" +boolean_settings_names: "privateCommOnPublicTerms" + | "rejectChoiceTrueFalse" + | "rejectNoSimplif" + | "allowDiffPatterns" + | "inductionQueries" + | "inductionLemmas" + | "movenew" + | "movelet" + | "stopTerm" + | "removeEventsForLemma" + | "simpEqAll" + | "eqInNames" + | "preciseLetExpand" + | "expandSimplifyIfCst" + | "featureFuns" + | "featureNames" + | "featurePredicates" + | "featureEvents" + | "featureTables" + | "featureDepth" + | "featureWidth" + | "simplifyDerivation" + | "abbreviateDerivation" + | "explainDerivation" + | "unifyDerivation" + | "reconstructDerivation" + | "displayDerivation" + | "traceBacktracking" + | "interactiveSwapping" + | "color" + | "verboseLemmas" + | "abbreviateClauses" + | "removeUselessClausesBeforeDisplay" + | "verboseEq" + | "verboseDestructors" + | "verboseTerm" + | "verboseStatistics" + | "verboseRules" + | "verboseBase" + | "verboseRedundant" + | "verboseCompleted" + | "verboseGoalReachable" + +_decl_pair{name, value}: "set" name "=" value "." + +set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL} + +ignore_types_values: BOOL | "all" | "none" | "attacker" +simplify_process_values: BOOL | "interactive" +precise_actions_values: BOOL | "trueWithoutArgsInNames" +redundant_hyp_elim_values: BOOL | "beginOnly" +reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL +attacker_values: "active" | "passive" +key_compromise_values: "none" | "approx" | "strict" +predicates_implementable: "check" | "nocheck" +application_values: "instantiate" | "full" | "none" | "discard" +max_values: "none" | "n" +sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset" +redundancy_elim_values: "best" | "simple" | "no" +nounif_ignore_a_few_times_values: "none" | "auto" | "all" +nounif_ignore_ntimes_values: "n" +trace_display_values: "short" | "long" | "none" +verbose_clauses_values: "none" | "explained" | "short" +INT: NAT | "-" NAT +COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ + +%import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library +%ignore WS // Disregard spaces in text +%ignore COMMENT +decl: type_decl + | channel_decl + | free_decl + | const_decl + | fun_decl + | letfun_decl + | reduc_decl + | fun_reduc_decl + | equation_decl + | pred_decl + | table_decl + | let_decl + | set_settings_decl + | event_decl + | query_decl + | axiom_decl + | restriction_decl + | lemma_decl + | noninterf_decl + | weaksecret_decl + | not_decl + | select_decl + | noselect_decl + | nounif_decl + | elimtrue_decl + | clauses_decl + | module_decl + #| param_decl + #| proba_decl + #| letproba_decl + #| proof_decl + #| def_decl + #| expand_decl + +type_decl: "type" IDENT "." +#type_decl: "type" IDENT options{OPTIONS_TYPE} "." +channel_decl: "channel" _non_empty_seq{IDENT} "." +free_decl: "free" _non_empty_seq{IDENT} ":" typeid "." +#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." +const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." +fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." +letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." +reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "." +fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "." +equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "." +pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "." +table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "." +let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "." + +set_settings_decl: set_settings_boolean_decl + | _decl_pair{"ignoreTypes", ignore_types_values} + | _decl_pair{"simplifyProcess", simplify_process_values} + | _decl_pair{"preciseActions", precise_actions_values} + | _decl_pair{"redundantHypElim", redundant_hyp_elim_values} + | _decl_pair{"reconstructTrace", reconstruct_trace_values} + | _decl_pair{"attacker", attacker_values} + | _decl_pair{"keyCompromise", key_compromise_values} + | _decl_pair{"predicatesImplementable", predicates_implementable} + | _decl_pair{"saturationApplication", application_values} + | _decl_pair{"verificationApplication", application_values} + | _decl_pair{"maxDepth", max_values} + | _decl_pair{"maxHyp", max_values} + | _decl_pair{"selFun", sel_fun_values} + | _decl_pair{"redundancyElim", redundancy_elim_values} + | _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values} + | _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values} + | _decl_pair{"traceDisplay", trace_display_values} + | _decl_pair{"verboseClauses", verbose_clauses_values} + | set_strategy + | set_symb_order + +_swap_strategy_seq{x}: x ("->" x)* +set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "." +_symb_ord_seq{x}: x (">" x)* +set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "." + +event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "." +select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." +noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "." +nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "." + +elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "." +clauses_decl: "clauses" clauses "." + +module_decl: "@module" " " IDENT + +noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "." +weaksecret_decl: "weaksecret" IDENT "." + +nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ] +equality: term "=" term + | "let" IDENT "=" term "in" equality +mayfailequality: IDENT mayfailterm_seq "=" mayfailterm +eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ] +clause: term + | term "->" term + | term "<->" term + | term "<=>" term +clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ] +mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ] +NAT: DIGIT+ +start: decl* PROCESS process | decl* EQUIVALENCE process process +process: ZERO + | YIELD + | IDENT [ "(" _maybe_empty_seq{pterm} ")" ] + | bracketed_process + | piped_process + | replicated_process + | replicated_process_bounds + | sample_process + | if_process + | in_process + | out_process + | let_process + | insert_process + | get_process + | event_process + | phase + | sync +bracketed_process: "(" process ")" +piped_process: process "|" process +replicated_process: "!" process +replicated_process_bounds: "!" IDENT "<=" IDENT process + | "foreach" IDENT "<=" IDENT "do" process +sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process] + | IDENT "<-R" typeid [";" process] +let_process: "let" pattern "=" pterm ["in" process [ "else" process ]] + | IDENT [":" typeid] "<-" pterm [";" process] + | "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] +if_process: "if" pterm "then" process [ "else" process ] +in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ] +get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ] +out_process: "out" "(" pterm "," pterm ")" [ ";" process ] +insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ] +event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ] +term: IDENT + | NAT + | "(" _maybe_empty_seq{term} ")" + | IDENT "(" _maybe_empty_seq{term} ")" + | term ( "+" | "-" ) NAT + | NAT "+" term + | term INFIX term + | "not" "(" term ")" + +phase: "phase" NAT [";" process] +sync: "sync" NAT ["[" TAG "]"] [";" process] +query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query] + | "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query] + | "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces. + | "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query] +nounifdecl: "let" IDENT "=" gformat "in" nounifdecl + | IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]] +gformat: IDENT + | "*" IDENT + | IDENT "(" _maybe_empty_seq{gformat} ")" + | "choice" "[" gformat "," gformat "]" + | "not" "(" _maybe_empty_seq{gformat} ")" + | "new" IDENT [ "[" [ fbinding ] "]" ] + | "let" IDENT "=" gformat "in" gformat +fbinding: "!" NAT "=" gformat [";" fbinding] + | IDENT "=" gformat [";" fbinding] +nounifoption: "hypothesis" + | "conclusion" + | "ignoreAFewTimes" + | "inductionOn" "=" IDENT + | "inductionOn" "=" "{" _non_empty_seq{IDENT} "}" + +query_annotation: [(REACHABLE|QUERY) ESCAPED_STRING] +query_decl: query_annotation query_decl_core +query_decl_core: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "." +not_decl: "not" [ typedecl ";"] gterm "." +TAG: IDENT +lemma: gterm [";" lemma] + | gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma] + | gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma] +axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "." +restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "." +lemma_annotation: [LEMMA ESCAPED_STRING] +lemma_decl: lemma_annotation lemma_decl_core +lemma_decl_core: "lemma" [ typedecl ";"] lemma options{OPTIONS_LEMMA} "." +gterm: ident_gterm + | fun_gterm + | choice_gterm + | infix_gterm + | arith_gterm + | arith2_gterm + | event_gterm + | injevent_gterm + | implies_gterm + | paren_gterm + | sample_gterm + | let_gterm +ident_gterm: IDENT +fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT] +choice_gterm: "choice" "[" gterm "," gterm "]" +infix_gterm: gterm INFIX gterm +arith_gterm: gterm ( "+" | "-" ) NAT +arith2_gterm: NAT "+" gterm +event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] +injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT] +implies_gterm: gterm "==>" gterm +paren_gterm: "(" _maybe_empty_seq{gterm} ")" +sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ] +let_gterm: "let" IDENT "=" gterm "in" gterm + +gbinding: "!" NAT "=" gterm [";" gbinding] + | IDENT "=" gterm [";" gbinding] + +pterm: IDENT + | NAT + | "(" _maybe_empty_seq{pterm} ")" + | IDENT "(" _maybe_empty_seq{pterm} ")" + | choice_pterm + | pterm ("+" | "-") NAT + | NAT "+" pterm + | pterm INFIX pterm + | not_pterm + | sample_pterm + | if_pterm + | let_pterm + | insert_pterm + | get_pterm + | event_pterm +choice_pterm: "choice[" pterm "," pterm "]" +if_pterm: "if" pterm "then" pterm [ "else" pterm ] +not_pterm: "not" "(" pterm ")" +let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ] + | IDENT [":" typeid] "<-" pterm ";" pterm + | "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ] +sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm] + | IDENT "<-R" typeid [";" pterm] +insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm +event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm +get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ] +pattern: IDENT [":" typeid] + | "_" [ ":" typeid ] + | NAT + | pattern "+" NAT + | NAT "+" pattern + | "(" _maybe_empty_seq{pattern} ")" + | IDENT "(" _maybe_empty_seq{pattern} ")" + | "=" pterm +mayfailterm: term + | "fail" +mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")" +typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ] +failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ] diff --git a/marzipan/src/parser.py b/marzipan/src/parser.py index fc3271d..c874d1b 100644 --- a/marzipan/src/parser.py +++ b/marzipan/src/parser.py @@ -1,8 +1,36 @@ import re -import sys from pathlib import Path -from lark import Lark, Token, Transformer, exceptions, tree +from lark import Discard, Lark, Transformer +from lark.reconstruct import Reconstructor + + +class TreeVisitor(Transformer): + # def __default_token__(self, token): + # if token.type == "PROCESS": + # return "" + # elif token.type == "IDENT": + # return token + # # return f"{token}" + + # def __default__(self, data, children, meta): + # #return "\n".join([c for c in children if c]) + # amazing_str = "" + # for c in children: + # if c: + # amazing_str += c + # return f"<{data}> {amazing_str} " + + # lemma_decl_core: original_stuff + # lemma_annotation: [LEMMA ESCAPED_STRING] + # lemma_decl: lemma_annotation lemma_decl_core + # lemma_decl: original_stuff + def lemma_annotation(self, children): + return Discard + + def query_annotation(self, children): + return Discard + # taken from Page 17 in the ProVerif manual # At the moment, we do not reject a ProVerif model that uses reserved words as identifier, @@ -118,35 +146,47 @@ LEMMA: "@lemma" # add @query and @reachable to query_decl, @lemma to lemma_decl -def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str: - old_decl_renamed = f"{decl_rule}_core" - rename_target = f"{decl_rule}\s*:" +def modify_decl_rule( + grammar: str, old_rule_name: str, annot_rule_name: str, annot_rule: str +) -> str: + old_decl_renamed = f"{old_rule_name}_core" + rename_target = f"{old_rule_name}\s*:" # rename *_decl -> *_decl_core - rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1) + grammar, count = re.subn(rename_target, f"{old_decl_renamed}:", grammar, count=1) if count == 0: raise RuntimeError("*_decl not found!") - wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}" + # lemma_annotation: [LEMMA ESCAPED_STRING] + annot_rule = f"{annot_rule_name}: {annot_rule}" + # lemma_decl: lemma_annotation lemma_decl_core + wrapper = f"{old_rule_name}: {annot_rule_name} {old_decl_renamed}" old_decl_target = f"{old_decl_renamed}\s*:" # get index of *_decl_core rule - match = re.search(old_decl_target, rules, flags=re.MULTILINE) + match = re.search(old_decl_target, grammar, flags=re.MULTILINE) if not match: raise RuntimeError("*_decl_core: rule not found after rename") insert_pos = match.start() - rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:] - return rules + grammar = ( + grammar[:insert_pos] + annot_rule + "\n" + wrapper + "\n" + grammar[insert_pos:] + ) + return grammar query_rules = modify_decl_rule( - query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]" + query_rules, "query_decl", "query_annotation", "[(REACHABLE|QUERY) ESCAPED_STRING]" ) -lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]") +lemma_rules = modify_decl_rule( + lemma_rules, "lemma_decl", "lemma_annotation", "[LEMMA ESCAPED_STRING]" +) grammar = ( common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules ) -parser = Lark(grammar) +with open("./src/grammars/awk_proverif.lark", "w") as f: + f.write(grammar) + +parser = Lark(grammar, maybe_placeholders=False) # COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ # COMMENT: "(*" /(\*(?!\))|[^*])*/ "*)" @@ -165,8 +205,26 @@ def parsertest(input): return parsetree -def parse_main(file_path): - with open(file_path, "r") as f: +""" + parse in i.pv file using new awk-marzipan grammar, + eliminate aliases from i.pv using lark TreeVisitor, + then return o.pv (awk_prep) +""" + + +def parse_main(ipv_path, opv_path): + with open(ipv_path, "r") as f: content = f.read() - # print(content) - parsertest(content) + # content += "\nprocess main" + + forest = parsertest(content) + with open("i.pv", "w") as ipvf: + ipvf.write(forest.pretty()) + + tree = TreeVisitor().transform(forest) + # TODO: tree -> o.pv + new_json = Reconstructor(parser).reconstruct(tree) + + with open(opv_path, "w") as opvf: + # opvf.write(tree.pretty()) + opvf.write(new_json)