WIP: awk replacement
Some checks failed
rosenpass-ciphers - primitives - benchmark / prim-benchmark (i686-linux) (push) Has been cancelled
rosenpass-ciphers - primitives - benchmark / prim-benchmark (x86_64-linux) (push) Has been cancelled
rosenpass - protocol - benchmark / proto-benchmark (i686-linux) (push) Has been cancelled
rosenpass - protocol - benchmark / proto-benchmark (x86_64-linux) (push) Has been cancelled
rosenpass-ciphers - primitives - benchmark / ciphers-primitives-bench-status (push) Has been cancelled
rosenpass - protocol - benchmark / ciphers-protocol-bench-status (push) Has been cancelled

Co-authored-by: Benjamin Lipp <blipp@rosenpass.eu>
This commit is contained in:
Anja Rabich
2025-12-15 18:05:32 +01:00
parent 0a945aa152
commit 444e782854
5 changed files with 460 additions and 36 deletions

View File

@@ -39,15 +39,15 @@
* ~`in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));`~ * ~`in(C, Cinit_conf(Ssskm, Spsk, Sspkt, ic));`~
* ~ ^~ * ~ ^~
* ~04_dos… has a syntax error (see below)~ * ~04_dos… has a syntax error (see below)~
* ~~rewrite marzipan.awk into Python/LARK~~
* ~~define a LARK grammar for marzipan.awk rules~~
* ~~write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)~~
## Next Steps ## Next Steps
* integrate marzipan.awk into Python, somehow * integrate marzipan.awk into Python, somehow
* options term special cases (c.f. manual page 133, starting with "fun" term) * options term special cases (c.f. manual page 133, starting with "fun" term)
* complete with CryptoVerif options * complete with CryptoVerif options
* rewrite marzipan.awk into Python/LARK
* define a LARK grammar for marzipan.awk rules
* write python code for processing marzipan rules, e.g. alias replacement (step: i.pv->o.pv)
* do not assume that the repo path has subdir marzipan * do not assume that the repo path has subdir marzipan
* do not assume that the repo path has subdir analysis * do not assume that the repo path has subdir analysis
* rewrite cpp into Python/LARK (step: mpv->i.pv) * rewrite cpp into Python/LARK (step: mpv->i.pv)

View File

@@ -1,9 +1,9 @@
from .util import pkgs, setup_exports, export, rename
from .parser import *
# from rich.console import Console # from rich.console import Console
import click import click
from .parser import *
from .util import export, pkgs, rename, setup_exports
target_subdir = "target/proverif" target_subdir = "target/proverif"
(__all__, export) = setup_exports() (__all__, export) = setup_exports()
@@ -25,7 +25,7 @@ def set_logging_logrecordfactory(filename):
pkgs.logging.setLogRecordFactory(record_factory) pkgs.logging.setLogRecordFactory(record_factory)
#def set_logging_format(max_length): # def set_logging_format(max_length):
# pass # pass
# #format_str = "{levelname:<8} {filename:<" + str(max_length + 2) + "} {message}" # #format_str = "{levelname:<8} {filename:<" + str(max_length + 2) + "} {message}"
# #pkgs.logging.basicConfig(level=pkgs.logging.DEBUG, style="{", format=format_str) # #pkgs.logging.basicConfig(level=pkgs.logging.DEBUG, style="{", format=format_str)
@@ -218,9 +218,9 @@ def analyze(repo_path, output):
full_paths = sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv")) full_paths = sorted(pkgs.glob.glob(str(analysis_dir) + "/*.entry.mpv"))
entries.extend(full_paths) entries.extend(full_paths)
#modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths] # modelnames = [pkgs.os.path.basename(path).replace('.entry.mpv', '') for path in full_paths]
#max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0 # max_length = max(len(modelname) for modelname in modelnames) if modelnames else 0
#set_logging_format(max_length) # set_logging_format(max_length)
with pkgs.concurrent.futures.ProcessPoolExecutor() as executor: with pkgs.concurrent.futures.ProcessPoolExecutor() as executor:
futures = { futures = {
@@ -276,7 +276,8 @@ def metaverif(repo_path, tmpdir, file):
print(f"AWK Prep Path: {awk_prep}") print(f"AWK Prep Path: {awk_prep}")
cpp(file, cpp_prep) cpp(file, cpp_prep)
awk(repo_path, cpp_prep, awk_prep) parse_main(cpp_prep, awk_prep)
# awk(repo_path, cpp_prep, awk_prep)
log_file = pkgs.os.path.join(tmpdir, f"{name}.log") log_file = pkgs.os.path.join(tmpdir, f"{name}.log")
@@ -300,12 +301,13 @@ def metaverif(repo_path, tmpdir, file):
@main.command() @main.command()
@click.argument("file_path") @click.argument("i_path")
def parse(file_path): @click.argument("o_path")
def parse(i_path, o_path):
try: try:
parse_main(file_path) parse_main(i_path, o_path)
except pkgs.lark.exceptions.UnexpectedCharacters as e: except pkgs.lark.exceptions.UnexpectedCharacters as e:
logger.error(f"Error {type(e).__name__} parsing {file_path}: {e}") logger.error(f"Error {type(e).__name__} parsing {i_path}: {e}")
if __name__ == "__main__": if __name__ == "__main__":

View File

@@ -32,9 +32,11 @@ decl: type_decl
#| def_decl #| def_decl
#| expand_decl #| expand_decl
type_decl: "type" IDENT options{OPTIONS_TYPE} "." type_decl: "type" IDENT "."
#type_decl: "type" IDENT options{OPTIONS_TYPE} "."
channel_decl: "channel" _non_empty_seq{IDENT} "." channel_decl: "channel" _non_empty_seq{IDENT} "."
free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "." free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "." const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "." fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "." letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."

View File

@@ -0,0 +1,362 @@
QUERY: "@query"
REACHABLE: "@reachable"
LEMMA: "@lemma"
PROCESS: "process"
EQUIVALENCE: "equivalence"
YIELD: "yield"
channel: CHANNEL
CHANNEL: "channel"
IDENT: /[a-zA-Z][a-zA-Z0-9À-ÿ'_]*/
ZERO: "0"
INFIX: "||"
| "&&"
| "="
| "<>"
| "<="
| ">="
| "<"
| ">"
typeid: channel
| IDENT
_non_empty_seq{x}: x ("," x)*
_maybe_empty_seq{x}: [ _non_empty_seq{x} ]
OPTIONS_FUN_CONST: "data" | "private" | "typeConverter"
OPTIONS_FUN: OPTIONS_FUN_CONST
OPTIONS_CONST: OPTIONS_FUN_CONST
OPTIONS_FREE_REDUC: "private"
OPTIONS_PRED: "memberOptim" | "block"
OPTIONS_PROCESS: "precise"
OPTIONS_QUERY_LEMMA_AXIOM: "noneSat" | "discardSat" | "instantiateSat" | "fullSat" | "noneVerif" | "discardVerif" | "instantiateVerif" | "fullVerif"
OPTIONS_AXIOM: OPTIONS_QUERY_LEMMA_AXIOM
OPTIONS_QUERY_LEMMA: OPTIONS_QUERY_LEMMA_AXIOM | "induction" | "noInduction"
OPTIONS_LEMMA: OPTIONS_QUERY_LEMMA | "maxSubset"
OPTIONS_QUERY: OPTIONS_QUERY_LEMMA | "proveAll"
OPTIONS_QUERY_SECRET: "reachability" | "pv_reachability" | "real_or_random" | "pv_real_or_random" | "/cv_[a-zA-Z0-9À-ÿ'_]*/"
OPTIONS_RESTRICTION: "removeEvents" | "keepEvents" | "keep" # transl_option_lemma_query in pitsyntax.ml
OPTIONS_EQUATION: "convergent" | "linear" # check_equations in pitsyntax.ml
OPTIONS_TYPE: "fixed" | "bounded" | "large" | "nonuniform" | "password" | "size" NAT | "size" NAT "_" NAT | "pcoll" NAT | "small" | "ghost" # from Page 22 in CryptoVerif reference manual
options{idents}: [ "[" _non_empty_seq{idents} "]" ]
BOOL : "true" | "false"
NONE: "none"
FULL: "full"
ALL: "all"
FUNC: IDENT
ignoretype_options: BOOL | ALL | NONE | "attacker"
boolean_settings_names: "privateCommOnPublicTerms"
| "rejectChoiceTrueFalse"
| "rejectNoSimplif"
| "allowDiffPatterns"
| "inductionQueries"
| "inductionLemmas"
| "movenew"
| "movelet"
| "stopTerm"
| "removeEventsForLemma"
| "simpEqAll"
| "eqInNames"
| "preciseLetExpand"
| "expandSimplifyIfCst"
| "featureFuns"
| "featureNames"
| "featurePredicates"
| "featureEvents"
| "featureTables"
| "featureDepth"
| "featureWidth"
| "simplifyDerivation"
| "abbreviateDerivation"
| "explainDerivation"
| "unifyDerivation"
| "reconstructDerivation"
| "displayDerivation"
| "traceBacktracking"
| "interactiveSwapping"
| "color"
| "verboseLemmas"
| "abbreviateClauses"
| "removeUselessClausesBeforeDisplay"
| "verboseEq"
| "verboseDestructors"
| "verboseTerm"
| "verboseStatistics"
| "verboseRules"
| "verboseBase"
| "verboseRedundant"
| "verboseCompleted"
| "verboseGoalReachable"
_decl_pair{name, value}: "set" name "=" value "."
set_settings_boolean_decl: _decl_pair{boolean_settings_names, BOOL}
ignore_types_values: BOOL | "all" | "none" | "attacker"
simplify_process_values: BOOL | "interactive"
precise_actions_values: BOOL | "trueWithoutArgsInNames"
redundant_hyp_elim_values: BOOL | "beginOnly"
reconstruct_trace_values: BOOL | NAT | "yes" | "no" # TODO(blipp): check whether yes/no is always allowed for BOOL
attacker_values: "active" | "passive"
key_compromise_values: "none" | "approx" | "strict"
predicates_implementable: "check" | "nocheck"
application_values: "instantiate" | "full" | "none" | "discard"
max_values: "none" | "n"
sel_fun_values: "TermMaxsize" | "Term"| "NounifsetMaxsize" | "Nounifset"
redundancy_elim_values: "best" | "simple" | "no"
nounif_ignore_a_few_times_values: "none" | "auto" | "all"
nounif_ignore_ntimes_values: "n"
trace_display_values: "short" | "long" | "none"
verbose_clauses_values: "none" | "explained" | "short"
INT: NAT | "-" NAT
COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/
%import common (WORD, DIGIT, NUMBER, WS, ESCAPED_STRING) // imports from terminal library
%ignore WS // Disregard spaces in text
%ignore COMMENT
decl: type_decl
| channel_decl
| free_decl
| const_decl
| fun_decl
| letfun_decl
| reduc_decl
| fun_reduc_decl
| equation_decl
| pred_decl
| table_decl
| let_decl
| set_settings_decl
| event_decl
| query_decl
| axiom_decl
| restriction_decl
| lemma_decl
| noninterf_decl
| weaksecret_decl
| not_decl
| select_decl
| noselect_decl
| nounif_decl
| elimtrue_decl
| clauses_decl
| module_decl
#| param_decl
#| proba_decl
#| letproba_decl
#| proof_decl
#| def_decl
#| expand_decl
type_decl: "type" IDENT "."
#type_decl: "type" IDENT options{OPTIONS_TYPE} "."
channel_decl: "channel" _non_empty_seq{IDENT} "."
free_decl: "free" _non_empty_seq{IDENT} ":" typeid "."
#free_decl: "free" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FREE_REDUC} "."
const_decl: "const" _non_empty_seq{IDENT} ":" typeid options{OPTIONS_FUN_CONST} "."
fun_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid options{OPTIONS_FUN_CONST} "."
letfun_decl: "letfun" IDENT [ "(" [ typedecl ] ")" ] "=" pterm "."
reduc_decl: "reduc" eqlist options{OPTIONS_FREE_REDUC} "."
fun_reduc_decl: "fun" IDENT "(" _maybe_empty_seq{typeid} ")" ":" typeid "reduc" mayfailreduc options{OPTIONS_FUN_CONST} "."
equation_decl: "equation" eqlist options{OPTIONS_EQUATION} "."
pred_decl: "pred" IDENT [ "(" [ _maybe_empty_seq{typeid} ] ")" ] options{OPTIONS_PRED} "."
table_decl: "table" IDENT "(" _maybe_empty_seq{typeid} ")" "."
let_decl: "let" IDENT [ "(" [ typedecl ] ")" ] "=" process "."
set_settings_decl: set_settings_boolean_decl
| _decl_pair{"ignoreTypes", ignore_types_values}
| _decl_pair{"simplifyProcess", simplify_process_values}
| _decl_pair{"preciseActions", precise_actions_values}
| _decl_pair{"redundantHypElim", redundant_hyp_elim_values}
| _decl_pair{"reconstructTrace", reconstruct_trace_values}
| _decl_pair{"attacker", attacker_values}
| _decl_pair{"keyCompromise", key_compromise_values}
| _decl_pair{"predicatesImplementable", predicates_implementable}
| _decl_pair{"saturationApplication", application_values}
| _decl_pair{"verificationApplication", application_values}
| _decl_pair{"maxDepth", max_values}
| _decl_pair{"maxHyp", max_values}
| _decl_pair{"selFun", sel_fun_values}
| _decl_pair{"redundancyElim", redundancy_elim_values}
| _decl_pair{"nounifIgnoreAFewTimes", nounif_ignore_a_few_times_values}
| _decl_pair{"nounifIgnoreNtimes", nounif_ignore_ntimes_values}
| _decl_pair{"traceDisplay", trace_display_values}
| _decl_pair{"verboseClauses", verbose_clauses_values}
| set_strategy
| set_symb_order
_swap_strategy_seq{x}: x ("->" x)*
set_strategy: "set" "swapping" "=" _swap_strategy_seq{TAG} "."
_symb_ord_seq{x}: x (">" x)*
set_symb_order: "set" "symbOrder" "=" _symb_ord_seq{FUNC} "."
event_decl: "event" IDENT ["(" _maybe_empty_seq{typeid} ")"] "."
select_decl: "select" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
noselect_decl: "noselect" [ typedecl ";"] nounifdecl [ "/" INT ] [ "[" _non_empty_seq{nounifoption} "]" ] "."
nounif_decl: "nounif" [ typedecl ";"] nounifdecl [ "/" INT ] [ "["_non_empty_seq{nounifoption} "]" ] "."
elimtrue_decl: "elimtrue" [ failtypedecl ";" ] term "."
clauses_decl: "clauses" clauses "."
module_decl: "@module" " " IDENT
noninterf_decl: "noninterf" [ typedecl ";"] _maybe_empty_seq{nidecl} "."
weaksecret_decl: "weaksecret" IDENT "."
nidecl: IDENT [ "among" "(" _non_empty_seq{term} ")" ]
equality: term "=" term
| "let" IDENT "=" term "in" equality
mayfailequality: IDENT mayfailterm_seq "=" mayfailterm
eqlist: [ "forall" typedecl ";" ] equality [ ";" eqlist ]
clause: term
| term "->" term
| term "<->" term
| term "<=>" term
clauses: [ "forall" failtypedecl ";" ] clause [ ";" clauses ]
mayfailreduc: [ "forall" failtypedecl ";" ] mayfailequality [ "otherwise" mayfailreduc ]
NAT: DIGIT+
start: decl* PROCESS process | decl* EQUIVALENCE process process
process: ZERO
| YIELD
| IDENT [ "(" _maybe_empty_seq{pterm} ")" ]
| bracketed_process
| piped_process
| replicated_process
| replicated_process_bounds
| sample_process
| if_process
| in_process
| out_process
| let_process
| insert_process
| get_process
| event_process
| phase
| sync
bracketed_process: "(" process ")"
piped_process: process "|" process
replicated_process: "!" process
replicated_process_bounds: "!" IDENT "<=" IDENT process
| "foreach" IDENT "<=" IDENT "do" process
sample_process: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" process]
| IDENT "<-R" typeid [";" process]
let_process: "let" pattern "=" pterm ["in" process [ "else" process ]]
| IDENT [":" typeid] "<-" pterm [";" process]
| "let" typedecl "suchthat" pterm options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
if_process: "if" pterm "then" process [ "else" process ]
in_process: "in" "(" pterm "," pattern ")" options{OPTIONS_PROCESS} [ ";" process ]
get_process: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" process [ "else" process ] ]
out_process: "out" "(" pterm "," pterm ")" [ ";" process ]
insert_process: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" [ ";" process ]
event_process: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] [ ";" process ]
term: IDENT
| NAT
| "(" _maybe_empty_seq{term} ")"
| IDENT "(" _maybe_empty_seq{term} ")"
| term ( "+" | "-" ) NAT
| NAT "+" term
| term INFIX term
| "not" "(" term ")"
phase: "phase" NAT [";" process]
sync: "sync" NAT ["[" TAG "]"] [";" process]
query: gterm ["public_vars" _non_empty_seq{IDENT}] [";" query]
| "secret" IDENT ["public_vars" _non_empty_seq{IDENT}] options{OPTIONS_QUERY_SECRET} [";" query]
| "putbegin" "event" ":" _non_empty_seq{IDENT} [";" query] // Opportunistically left a space between "event" and ":", ProVerif might not accept it with spaces.
| "putbegin" "inj-event" ":" _non_empty_seq{IDENT} [";" query]
nounifdecl: "let" IDENT "=" gformat "in" nounifdecl
| IDENT ["(" _maybe_empty_seq{gformat} ")" ["phase" NAT]]
gformat: IDENT
| "*" IDENT
| IDENT "(" _maybe_empty_seq{gformat} ")"
| "choice" "[" gformat "," gformat "]"
| "not" "(" _maybe_empty_seq{gformat} ")"
| "new" IDENT [ "[" [ fbinding ] "]" ]
| "let" IDENT "=" gformat "in" gformat
fbinding: "!" NAT "=" gformat [";" fbinding]
| IDENT "=" gformat [";" fbinding]
nounifoption: "hypothesis"
| "conclusion"
| "ignoreAFewTimes"
| "inductionOn" "=" IDENT
| "inductionOn" "=" "{" _non_empty_seq{IDENT} "}"
query_annotation: [(REACHABLE|QUERY) ESCAPED_STRING]
query_decl: query_annotation query_decl_core
query_decl_core: "query" [ typedecl ";"] query options{OPTIONS_QUERY} "."
not_decl: "not" [ typedecl ";"] gterm "."
TAG: IDENT
lemma: gterm [";" lemma]
| gterm "for" "{" "public_vars" _non_empty_seq{IDENT} "}" [";" lemma]
| gterm "for" "{" "secret" IDENT [ "public_vars" _non_empty_seq{IDENT}] "[real_or_random]" "}" [";" lemma]
axiom_decl: "axiom" [ typedecl ";"] lemma options{OPTIONS_AXIOM} "."
restriction_decl: "restriction" [ typedecl ";"] lemma options{OPTIONS_RESTRICTION} "."
lemma_annotation: [LEMMA ESCAPED_STRING]
lemma_decl: lemma_annotation lemma_decl_core
lemma_decl_core: "lemma" [ typedecl ";"] lemma options{OPTIONS_LEMMA} "."
gterm: ident_gterm
| fun_gterm
| choice_gterm
| infix_gterm
| arith_gterm
| arith2_gterm
| event_gterm
| injevent_gterm
| implies_gterm
| paren_gterm
| sample_gterm
| let_gterm
ident_gterm: IDENT
fun_gterm: IDENT "(" _maybe_empty_seq{gterm} ")" ["phase" NAT] ["@" IDENT]
choice_gterm: "choice" "[" gterm "," gterm "]"
infix_gterm: gterm INFIX gterm
arith_gterm: gterm ( "+" | "-" ) NAT
arith2_gterm: NAT "+" gterm
event_gterm: "event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
injevent_gterm: "inj-event" "(" _maybe_empty_seq{gterm} ")" ["@" IDENT]
implies_gterm: gterm "==>" gterm
paren_gterm: "(" _maybe_empty_seq{gterm} ")"
sample_gterm: "new" IDENT [ "[" [ gbinding ] "]" ]
let_gterm: "let" IDENT "=" gterm "in" gterm
gbinding: "!" NAT "=" gterm [";" gbinding]
| IDENT "=" gterm [";" gbinding]
pterm: IDENT
| NAT
| "(" _maybe_empty_seq{pterm} ")"
| IDENT "(" _maybe_empty_seq{pterm} ")"
| choice_pterm
| pterm ("+" | "-") NAT
| NAT "+" pterm
| pterm INFIX pterm
| not_pterm
| sample_pterm
| if_pterm
| let_pterm
| insert_pterm
| get_pterm
| event_pterm
choice_pterm: "choice[" pterm "," pterm "]"
if_pterm: "if" pterm "then" pterm [ "else" pterm ]
not_pterm: "not" "(" pterm ")"
let_pterm: "let" pattern "=" pterm "in" pterm [ "else" pterm ]
| IDENT [":" typeid] "<-" pterm ";" pterm
| "let" typedecl "suchthat" pterm "in" pterm [ "else" pterm ]
sample_pterm: "new" IDENT [ "[" _maybe_empty_seq{IDENT} "]" ] ":" typeid [";" pterm]
| IDENT "<-R" typeid [";" pterm]
insert_pterm: "insert" IDENT "(" _maybe_empty_seq{pterm} ")" ";" pterm
event_pterm: "event" IDENT [ "(" _maybe_empty_seq{pterm} ")" ] ";" pterm
get_pterm: "get" IDENT "(" _maybe_empty_seq{pattern} ")" [ "suchthat" pterm ] options{OPTIONS_PROCESS} [ "in" pterm [ "else" pterm ] ]
pattern: IDENT [":" typeid]
| "_" [ ":" typeid ]
| NAT
| pattern "+" NAT
| NAT "+" pattern
| "(" _maybe_empty_seq{pattern} ")"
| IDENT "(" _maybe_empty_seq{pattern} ")"
| "=" pterm
mayfailterm: term
| "fail"
mayfailterm_seq: "(" _non_empty_seq{mayfailterm} ")"
typedecl: _non_empty_seq{IDENT} ":" typeid [ "," typedecl ]
failtypedecl: _non_empty_seq{IDENT} ":" typeid [ "or fail" ] [ "," failtypedecl ]

View File

@@ -1,8 +1,36 @@
import re import re
import sys
from pathlib import Path from pathlib import Path
from lark import Lark, Token, Transformer, exceptions, tree from lark import Discard, Lark, Transformer
from lark.reconstruct import Reconstructor
class TreeVisitor(Transformer):
# def __default_token__(self, token):
# if token.type == "PROCESS":
# return ""
# elif token.type == "IDENT":
# return token
# # return f"<ident>{token}</ident>"
# def __default__(self, data, children, meta):
# #return "\n".join([c for c in children if c])
# amazing_str = ""
# for c in children:
# if c:
# amazing_str += c
# return f"<{data}> {amazing_str} </{data}>"
# lemma_decl_core: original_stuff
# lemma_annotation: [LEMMA ESCAPED_STRING]
# lemma_decl: lemma_annotation lemma_decl_core
# lemma_decl: original_stuff
def lemma_annotation(self, children):
return Discard
def query_annotation(self, children):
return Discard
# taken from Page 17 in the ProVerif manual # taken from Page 17 in the ProVerif manual
# At the moment, we do not reject a ProVerif model that uses reserved words as identifier, # At the moment, we do not reject a ProVerif model that uses reserved words as identifier,
@@ -118,35 +146,47 @@ LEMMA: "@lemma"
# add @query and @reachable to query_decl, @lemma to lemma_decl # add @query and @reachable to query_decl, @lemma to lemma_decl
def modify_decl_rule(rules: str, decl_rule: str, new_rule: str) -> str: def modify_decl_rule(
old_decl_renamed = f"{decl_rule}_core" grammar: str, old_rule_name: str, annot_rule_name: str, annot_rule: str
rename_target = f"{decl_rule}\s*:" ) -> str:
old_decl_renamed = f"{old_rule_name}_core"
rename_target = f"{old_rule_name}\s*:"
# rename *_decl -> *_decl_core # rename *_decl -> *_decl_core
rules, count = re.subn(rename_target, f"{old_decl_renamed}:", rules, count=1) grammar, count = re.subn(rename_target, f"{old_decl_renamed}:", grammar, count=1)
if count == 0: if count == 0:
raise RuntimeError("*_decl not found!") raise RuntimeError("*_decl not found!")
wrapper = f"{decl_rule}: {new_rule} {old_decl_renamed}" # lemma_annotation: [LEMMA ESCAPED_STRING]
annot_rule = f"{annot_rule_name}: {annot_rule}"
# lemma_decl: lemma_annotation lemma_decl_core
wrapper = f"{old_rule_name}: {annot_rule_name} {old_decl_renamed}"
old_decl_target = f"{old_decl_renamed}\s*:" old_decl_target = f"{old_decl_renamed}\s*:"
# get index of *_decl_core rule # get index of *_decl_core rule
match = re.search(old_decl_target, rules, flags=re.MULTILINE) match = re.search(old_decl_target, grammar, flags=re.MULTILINE)
if not match: if not match:
raise RuntimeError("*_decl_core: rule not found after rename") raise RuntimeError("*_decl_core: rule not found after rename")
insert_pos = match.start() insert_pos = match.start()
rules = rules[:insert_pos] + wrapper + "\n" + rules[insert_pos:] grammar = (
return rules grammar[:insert_pos] + annot_rule + "\n" + wrapper + "\n" + grammar[insert_pos:]
)
return grammar
query_rules = modify_decl_rule( query_rules = modify_decl_rule(
query_rules, "query_decl", "[(REACHABLE|QUERY) ESCAPED_STRING]" query_rules, "query_decl", "query_annotation", "[(REACHABLE|QUERY) ESCAPED_STRING]"
) )
lemma_rules = modify_decl_rule(lemma_rules, "lemma_decl", "[LEMMA ESCAPED_STRING]") lemma_rules = modify_decl_rule(
lemma_rules, "lemma_decl", "lemma_annotation", "[LEMMA ESCAPED_STRING]"
)
grammar = ( grammar = (
common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules common_rules + decl_rules + process_rules + query_rules + lemma_rules + term_rules
) )
parser = Lark(grammar) with open("./src/grammars/awk_proverif.lark", "w") as f:
f.write(grammar)
parser = Lark(grammar, maybe_placeholders=False)
# COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/ # COMMENT: /\(\*(\*(?!\))|[^*])*\*\)/
# COMMENT: "(*" /(\*(?!\))|[^*])*/ "*)" # COMMENT: "(*" /(\*(?!\))|[^*])*/ "*)"
@@ -165,8 +205,26 @@ def parsertest(input):
return parsetree return parsetree
def parse_main(file_path): """
with open(file_path, "r") as f: parse in i.pv file using new awk-marzipan grammar,
eliminate aliases from i.pv using lark TreeVisitor,
then return o.pv (awk_prep)
"""
def parse_main(ipv_path, opv_path):
with open(ipv_path, "r") as f:
content = f.read() content = f.read()
# print(content) # content += "\nprocess main"
parsertest(content)
forest = parsertest(content)
with open("i.pv", "w") as ipvf:
ipvf.write(forest.pretty())
tree = TreeVisitor().transform(forest)
# TODO: tree -> o.pv
new_json = Reconstructor(parser).reconstruct(tree)
with open(opv_path, "w") as opvf:
# opvf.write(tree.pretty())
opvf.write(new_json)