From 7715e8827de1e8adc7d8a2bbf5e85dc7fed2b056 Mon Sep 17 00:00:00 2001 From: Timo Bingmann Date: Thu, 13 Sep 2018 11:08:06 +0200 Subject: [PATCH] Seven instructive examples how to use Boost Spirit --- .gitignore | 9 + Makefile | 46 ++ example.html | 7 + regex.cpp | 128 +++++ spirit1_simple.cpp | 166 +++++++ spirit2_grammar.cpp | 203 ++++++++ spirit3_arithmetic.cpp | 88 ++++ spirit4_struct.cpp | 151 ++++++ spirit5_ast.cpp | 126 +++++ spirit6_ast.cpp | 182 +++++++ spirit7_html.cpp | 1032 ++++++++++++++++++++++++++++++++++++++++ stock_list.txt | 80 ++++ 12 files changed, 2218 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 example.html create mode 100644 regex.cpp create mode 100644 spirit1_simple.cpp create mode 100644 spirit2_grammar.cpp create mode 100644 spirit3_arithmetic.cpp create mode 100644 spirit4_struct.cpp create mode 100644 spirit5_ast.cpp create mode 100644 spirit6_ast.cpp create mode 100644 spirit7_html.cpp create mode 100644 stock_list.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1a89caa --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +*.o +/regex +/spirit1_simple +/spirit2_grammar +/spirit3_arithmetic +/spirit4_struct +/spirit5_ast +/spirit6_ast +/spirit7_html diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2bab7b2 --- /dev/null +++ b/Makefile @@ -0,0 +1,46 @@ +# really simple Makefile + +CXX=g++ +CXXFLAGS=-W -Wall -pedantic -std=c++14 + +PROGRAMS= \ + regex \ + spirit1_simple \ + spirit2_grammar \ + spirit3_arithmetic \ + spirit4_struct \ + spirit5_ast \ + spirit6_ast \ + spirit7_html + +all: $(PROGRAMS) + +clean: + rm -f *.o $(PROGRAMS) + +%.o: %.cpp + $(CXX) $(CXXFLAGS) -c -o $@ $< + +regex: regex.o + $(CXX) $(CXXFLAGS) -o $@ $^ -lboost_regex + +spirit1_simple: spirit1_simple.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit2_grammar: spirit2_grammar.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit3_arithmetic: spirit3_arithmetic.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit4_struct: spirit4_struct.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit5_ast: spirit5_ast.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit6_ast: spirit6_ast.o + $(CXX) $(CXXFLAGS) -o $@ $^ + +spirit7_html: spirit7_html.o + $(CXX) $(CXXFLAGS) -o $@ $^ diff --git a/example.html b/example.html new file mode 100644 index 0000000..843f8d8 --- /dev/null +++ b/example.html @@ -0,0 +1,7 @@ +

Example for C++ HTML Parser

+ +Welcome to the example text for the HTML snippet parser, +which can also interpret *Markdown* style. + +Furthermore, the markup parser can read additional tags like <% func(a,5) %> +which could then filled at evaluation with the result of C++ functions. diff --git a/regex.cpp b/regex.cpp new file mode 100644 index 0000000..c463ae9 --- /dev/null +++ b/regex.cpp @@ -0,0 +1,128 @@ +// Example how to use C++11 and Boost.Regex + +#include + +/******************************************************************************/ +// use std::regex to find a date in a string + +#include + +void std_regex() { + std::string str = "C++ Meetup on 2018-09-12 about String Parsing"; + + // simple regex match: "on ####-##-##" + std::regex re1("on ([0-9]{4}-[0-9]{2}-[0-9]{2})"); + + if (std::regex_search(str, re1)) { + std::cout << "std::regex_search() with re1: matched!" << std::endl; + } + else { + std::cout << "std::regex_search() with re1: no match!" << std::endl; + } + + if (std::regex_match(str, re1)) { + std::cout << "std::regex_match() with re1: matched!" << std::endl; + } + else { + std::cout << "std::regex_match() with re1: no match!" << std::endl; + } + + // regex match and std::string captures + std::smatch match; + + if (std::regex_search(str, match, re1)) { + std::cout << "std::regex_search() with re1: matched!" << std::endl + << " match.size() = " << match.size() << std::endl + << " match[0] = " << match[0] << std::endl + << " match[1] = " << match[1] << std::endl; + } + else { + std::cout << "std::regex_search() with re1: no match!" << std::endl; + } + + // Also: std::cmatch for const char* captures + const char* cstr = "Hello on 2018-09-13"; + std::cmatch cmatch; + + if (std::regex_search(cstr, cmatch, re1)) { + std::cout << "std::regex_search() with re1: matched!" << std::endl + << " match.size() = " << cmatch.size() << std::endl + << " match[0] = " << cmatch[0] << std::endl + << " match[1] = " << cmatch[1] << std::endl; + } + else { + std::cout << "std::regex_search() with re1: no match!" << std::endl; + } + + // use regex_replace and construct a new string + std::string result = std::regex_replace(str, re1, "TODAY"); + std::cout << "std::regex_replace() result = " << result << std::endl; +} + +/******************************************************************************/ +// alternative: use Boost.Regex + +#include + +void boost_regex() { + std::string str = "C++ Meetup on 2018-09-12 about String Parsing"; + + // simple regex match + boost::regex re1("on ([0-9]{4}-[0-9]{2}-[0-9]{2})"); + + if (boost::regex_search(str, re1)) { + std::cout << "boost::regex_search() with re1: matched!" << std::endl; + } + else { + std::cout << "boost::regex_search() with re1: no match!" << std::endl; + } + + if (boost::regex_match(str, re1)) { + std::cout << "boost::regex_match() with re1: matched!" << std::endl; + } + else { + std::cout << "boost::regex_match() with re1: no match!" << std::endl; + } + + // regex match and std::string captures + boost::smatch match; + + if (boost::regex_search(str, match, re1)) { + std::cout << "boost::regex_search() with re1: matched!" << std::endl + << " match.size() = " << match.size() << std::endl + << " match[0] = " << match[0] << std::endl + << " match[1] = " << match[1] << std::endl; + } + else { + std::cout << "boost::regex_search() with re1: no match!" << std::endl; + } + + // also: boost::cmatch for const char* captures, and regex_replace. +} + +/******************************************************************************/ +// Note: I usually make a "compatibility" include which defines + +#if (__cplusplus >= 201103L) + +using std::regex_search; +// ... and other symbols + +#else + +// or import from Boost +using boost::regex_search; + +#endif + +/******************************************************************************/ + +int main() +{ + std_regex(); + boost_regex(); + + return 0; +} + +/******************************************************************************/ diff --git a/spirit1_simple.cpp b/spirit1_simple.cpp new file mode 100644 index 0000000..6255094 --- /dev/null +++ b/spirit1_simple.cpp @@ -0,0 +1,166 @@ +// Example how to use Boost Spirit to parse plain integers and lists of integers +// +// test1() parses "5", +// test2() parses "76131 Karlsruhe", +// test3() parses "[12345,42,5,]" +// test4() parses "[12345,42,5]" +// test5() parses "[12345, 42, 5 ]" + +#include +#include +#include + +#include + +namespace qi = boost::spirit::qi; + +/******************************************************************************/ +// First Example: parse a single integer + +void test1() +{ + std::string input = "12345"; + int out_int; + + qi::parse( + // input string (iterators) + input.begin(), input.end(), + // parser grammar + qi::int_, + // output fields + out_int); + + std::cout << "test1() parse result: " + << out_int << std::endl; +} + +/******************************************************************************/ +// Parse an integer followed by a space and a string + +void test2() +{ + std::string input = "76131 Karlsruhe"; + int out_int; + std::string out_string; + + qi::parse( + // input string (iterators) + input.begin(), input.end(), + // parser grammar + qi::int_ >> ' ' >> *qi::char_, + // output fields + out_int, out_string); + + std::cout << "test2() parse result: " + << out_int << " " << out_string << std::endl; +} + +/******************************************************************************/ +// Parse a bracketed list of integers + +void test3() +{ + std::string input = "[12345,42,5,]"; + std::vector out_int_list; + + qi::parse( + // input string (iterators) + input.begin(), input.end(), + // parser grammar + '[' >> *(qi::int_ >> ',') >> ']', + // output list + out_int_list); + + std::cout << "test3() parse result: size " + << out_int_list.size() << std::endl; + for (const size_t &i : out_int_list) + std::cout << i << std::endl; +} + +/******************************************************************************/ +// Parse a bracketed list of integers without last comma + +// Helper to run a parser, check for errors, and capture the results. +template +void ParseOrDie(const std::string& input, const Parser& p, Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + bool ok = qi::parse(begin, end, p, std::forward(args) ...); + if (!ok || begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +void test4(std::string input) +{ + std::vector out_int_list; + + ParseOrDie( + // input string + input, + // parser grammar with '%' operator + '[' >> (qi::int_ % ',') >> ']', + // output list + out_int_list); + + std::cout << "test4() parse result: size " + << out_int_list.size() << std::endl; + for (const size_t &i : out_int_list) + std::cout << i << std::endl; +} + +/******************************************************************************/ +// Parse a bracketed list of integers with spaces between symbols + +// Helper to run a parser, check for errors, and capture the results. +template +void PhraseParseOrDie( + const std::string& input, const Parser& p, const Skipper& s, + Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + boost::spirit::qi::phrase_parse( + begin, end, p, s, std::forward(args) ...); + if (begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +void test5(std::string input) +{ + std::vector out_int_list; + + PhraseParseOrDie( + // input string + input, + // parser grammar + '[' >> (qi::int_ % ',') >> ']', + // skip parser + qi::space, + // output list + out_int_list); + + std::cout << "test5() parse result: size " + << out_int_list.size() << std::endl; + for (const size_t &i : out_int_list) + std::cout << i << std::endl; +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + test1(); + test2(); + test3(); + test4(argc >= 2 ? argv[1] : "[12345,42,5]"); + test5(argc >= 3 ? argv[2] : "[12345, 42, 5]"); + + return 0; +} + +/******************************************************************************/ diff --git a/spirit2_grammar.cpp b/spirit2_grammar.cpp new file mode 100644 index 0000000..422eb4e --- /dev/null +++ b/spirit2_grammar.cpp @@ -0,0 +1,203 @@ +// Example how to use Boost Spirit to parse simple arithmetic expressions such +// as "1 + 2 * 3". +// +// test1() parses and accepts "1" +// test2() parses "1" and returns it in an integer variable. +// test3() parses "1+2*3" but only accepts it without calculating. +// test4() parses "1 + 2 * 3" +// +// Evaluation of the expression is added in spirit3_arithmetic.cpp + +#include +#include +#include + +#include + +namespace qi = boost::spirit::qi; + +/******************************************************************************/ + +// Helper to run a parser, check for errors, and capture the results. +template +void ParseOrDie(const std::string& input, const Parser& p, Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + bool ok = qi::parse(begin, end, p, std::forward(args) ...); + if (!ok || begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +/******************************************************************************/ +// First grammar example: parse a single integer + +class ArithmeticGrammar1 : public qi::grammar< + // the string iterator to parse: can also be const char* or templated. + std::string::const_iterator> +{ +public: + // string iterator to parse + using Iterator = std::string::const_iterator; + + ArithmeticGrammar1() + // call base constructor and specify start symbol + : ArithmeticGrammar1::base_type(start) + { + // construct the grammar: just set "start" for now. + start = qi::int_; + } + + // List of rule objects in the grammar. Templates just like qi::grammar. + qi::rule start; +}; + +void test1() +{ + std::string input = "12345"; + + ArithmeticGrammar1 g; + ParseOrDie(input, g); +} + +/******************************************************************************/ +// Modify grammar to actually return an integer + +class ArithmeticGrammar2 : public qi::grammar< + // the string iterator to parse: can also be const char* or templated. + std::string::const_iterator, + // return value of the grammar, written in function syntax! + int()> +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar2() : ArithmeticGrammar2::base_type(start) + { + start %= qi::int_; + } + + // List of rule objects in the grammar. Each rule can have a return type. + qi::rule start; +}; + +void test2() +{ + std::string input = "12345"; + int out_int; + + // note that the grammar object does not contain any return values. + ParseOrDie(input, ArithmeticGrammar2(), out_int); + + std::cout << "test2() parse result: " + << out_int << std::endl; +} + +/******************************************************************************/ +// Let's make the grammar more interesting. + +class ArithmeticGrammar3 : public qi::grammar +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar3() : ArithmeticGrammar3::base_type(start) + { + start = product >> *('+' >> product); + product = factor >> *('*' >> factor); + factor = qi::int_ | group; + group = '(' >> start >> ')'; + } + + // List of rule objects in the grammar. Now there are four rules and each + // returns an integer value. + qi::rule start, group, product, factor; +}; + +void test3() +{ + std::string input = "1+2*3"; + int out_int; + + ParseOrDie(input, ArithmeticGrammar3(), out_int); + + std::cout << "test3() parse result: " + << out_int << std::endl; +} + +/******************************************************************************/ +// Introduce error checking when running the arithmetic grammar and add a skip +// parser to jump over spaces. + +// Helper to run a parser, check for errors, and capture the results. +template +void PhraseParseOrDie( + const std::string& input, const Parser& p, const Skipper& s, + Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + boost::spirit::qi::phrase_parse( + begin, end, p, s, std::forward(args) ...); + if (begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +class ArithmeticGrammar4 : public qi::grammar< + // the string iterator to parse: can also be const char* or templated. + std::string::const_iterator, + // return value of the grammar, written in function syntax! + int(), + // the _type_ of the skip parser + qi::space_type> +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar4() : ArithmeticGrammar4::base_type(start) + { + start = product >> *('+' >> product); + product = factor >> *('*' >> factor); + factor = qi::int_ | group; + group = '(' >> start >> ')'; + } + + // as before, mirrors the template arguments of qi::grammar. + qi::rule start, group, product, factor; +}; + +void test4(std::string input) +{ + int out_int; + + PhraseParseOrDie( + // input string + input, + // grammar + ArithmeticGrammar4(), + // skip parser + qi::space, + // output variable + out_int); + + std::cout << "test4() parse result: " + << out_int << std::endl; +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + test1(); + test2(); + test3(); + test4(argc >= 2 ? argv[1] : "1 + 2 * 3"); + + return 0; +} + +/******************************************************************************/ diff --git a/spirit3_arithmetic.cpp b/spirit3_arithmetic.cpp new file mode 100644 index 0000000..b8cd292 --- /dev/null +++ b/spirit3_arithmetic.cpp @@ -0,0 +1,88 @@ +// Example how to use Boost Spirit to parse and _evaluate_ a simple arithmetic +// grammar. Evaluation is added by amending rules with semantic actions. + +#include +#include +#include + +#include +#include +#include + +namespace qi = boost::spirit::qi; + +/******************************************************************************/ +// Arithmetic parser with semantic actions which calculate the arithmetic +// expression's result + +// Helper to run a parser, check for errors, and capture the results. +template +void PhraseParseOrDie( + const std::string& input, const Parser& p, const Skipper& s, + Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + boost::spirit::qi::phrase_parse( + begin, end, p, s, std::forward(args) ...); + if (begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +class ArithmeticGrammar1 : public qi::grammar< + std::string::const_iterator, + // define grammar to return an integer ... which we will calculate from the + // expression + int(), qi::space_type> +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar1() : ArithmeticGrammar1::base_type(start) + { + start = + // first component: product, and transfer the result of product + // (qi::_1) to the result of this rule (start, qi::_val). + product [qi::_val = qi::_1] + // zero or more components: add result of product (qi::_1) to the + // result of this rule (qi::_val). + >> *('+' >> product [qi::_val += qi::_1]); + + // product is defined in same way as start, but with multiplication + product = factor [qi::_val = qi::_1] + >> *('*' >> factor [qi::_val *= qi::_1]); + + // factor is either option, both return an int, and with "%=" is + // equivalent to [qi::_val = qi::_1] in both cases. + factor %= qi::int_ | group; + + // group's result is identical to start. again "%=" is a shortcut + group %= '(' >> start >> ')'; + } + + // each rule also returns an integer + qi::rule start, group, product, factor; +}; + +void test1(std::string input) +{ + int out_int; + + PhraseParseOrDie(input, ArithmeticGrammar1(), qi::space, out_int); + + std::cout << "test1() parse result: " + << out_int << std::endl; +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + test1(argc >= 2 ? argv[1] : "1 + 2 * 3"); + + return 0; +} + +/******************************************************************************/ diff --git a/spirit4_struct.cpp b/spirit4_struct.cpp new file mode 100644 index 0000000..5934173 --- /dev/null +++ b/spirit4_struct.cpp @@ -0,0 +1,151 @@ +// Example how to use Boost Spirit to parse CSV data directly into a C++ struct +// +// This example is designed to read the file "stock_list.txt" + +#include +#include +#include +#include + +#include +#include +#include + +namespace qi = boost::spirit::qi; +namespace phx = boost::phoenix; + +/******************************************************************************/ + +// Helper to run a parser, check for errors, and capture the results. +template +void ParseOrDie(const std::string& input, const Parser& p, Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + bool ok = qi::parse(begin, end, p, std::forward(args) ...); + if (!ok || begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +/******************************************************************************/ +// Our simple stock struct: two strings and a double. + +struct Stock +{ + std::string symbol; + std::string name; + double price; + + // constructors + Stock() { } + Stock(std::string symbol, std::string name, double price) + : symbol(symbol), name(name), price(price) { } + + // and how to format it to cout + friend std::ostream& operator << (std::ostream& os, const Stock& s) + { + return os << "[Stock" + << " symbol=" << std::quoted(s.symbol) + << " name=" << std::quoted(s.name) + << " price=" << s.price + << "]"; + } +}; + +/******************************************************************************/ +// First Grammar: use Boost Phoenix in semantic action to construct a Stock +// object with parsed parameters + +class StockGrammar1 : public qi::grammar< + // new grammar, this time the result type is a "Stock" object! + std::string::const_iterator, Stock()> +{ +public: + using Iterator = std::string::const_iterator; + + StockGrammar1() : StockGrammar1::base_type(start) + { + // define name rule: returns all characters up to ';' as a string. + name %= *(~qi::char_(';')); + + // parse a CSV line, and construct Stock object using the three symbols + // stored as qi::_1, .. qi::_3. Optionally allow a trailing ';'. + start = (name >> ';' >> name >> ';' >> qi::double_ >> -(qi::lit(';'))) + [qi::_val = phx::construct(qi::_1, qi::_2, qi::_3) ]; + } + + // a helper rule which parser a name + qi::rule name; + // rule which actually parses a CSV line containing the information + qi::rule start; +}; + +void test1_stream(std::istream& input) +{ + // function to read each line of input and parse it. + std::string line; + StockGrammar1 g; + while (std::getline(input, line)) { + Stock stock; + ParseOrDie(line, g, stock); + std::cout << stock << std::endl; + } +} + +/******************************************************************************/ +// First Grammar: use Boost Fusion to instrument the Stock class and enable +// automatic semantic actions + +BOOST_FUSION_ADAPT_STRUCT( + Stock, + (std::string, symbol) + (std::string, name) + (double, price) +) + +class StockGrammar2 + : public qi::grammar +{ +public: + using Iterator = std::string::const_iterator; + + StockGrammar2() : StockGrammar2::base_type(start) + { + name %= *(~qi::char_(';')); + // parse CSV line, and let Boost Fusion automatically map results into + // the Stock struct (this does not use the constructor). + start %= name >> ';' >> name >> ';' >> qi::double_ >> -(qi::lit(';')); + } + + qi::rule name; + qi::rule start; +}; + +void test2_stream(std::istream& input) +{ + std::string line; + while (std::getline(input, line)) { + Stock stock; + ParseOrDie(line, StockGrammar2(), stock); + std::cout << stock << std::endl; + } +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + if (argc >= 2) { + std::ifstream in(argv[1]); + test1_stream(in); + } + else { + std::cout << "Reading stdin" << std::endl; + test2_stream(std::cin); + } + return 0; +} + +/******************************************************************************/ diff --git a/spirit5_ast.cpp b/spirit5_ast.cpp new file mode 100644 index 0000000..493e046 --- /dev/null +++ b/spirit5_ast.cpp @@ -0,0 +1,126 @@ +// Example how to use Boost Spirit to construct an abstract syntax tree (AST) +// for a simple arithmetic grammar and to evaluate expressions. +// +// The grammar accepts expressions like "1 + 2 * 3", constructs an AST and +// evaluates it correctly. + +#include +#include +#include +#include + +#include +#include + +namespace qi = boost::spirit::qi; +namespace phx = boost::phoenix; + +/******************************************************************************/ + +// Utility to run a parser, check for errors, and capture the results. +template +void PhraseParseOrDie( + const std::string& input, const Parser& p, const Skipper& s, + Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + boost::spirit::qi::phrase_parse( + begin, end, p, s, std::forward(args) ...); + if (begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +/******************************************************************************/ + +class ASTNode +{ +public: + virtual double evaluate() = 0; + virtual ~ASTNode() { } +}; + +using ASTNodePtr = ASTNode*; + +template +class OperatorNode : public ASTNode +{ +public: + OperatorNode(const ASTNodePtr& left, const ASTNodePtr& right) + : left(left), right(right) { } + + double evaluate() { + if (Operator == '+') + return left->evaluate() + right->evaluate(); + else if (Operator == '*') + return left->evaluate() * right->evaluate(); + } + + ~OperatorNode() { + delete left; + delete right; + } + +private: + ASTNodePtr left, right; +}; + +class ConstantNode : public ASTNode +{ +public: + ConstantNode(double value) + : value(value) { } + + double evaluate() { + return value; + } + +private: + double value; +}; + +/******************************************************************************/ + +class ArithmeticGrammar1 + : public qi::grammar +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar1() : ArithmeticGrammar1::base_type(start) + { + start = (product >> '+' >> start) + [qi::_val = phx::new_ >(qi::_1, qi::_2) ] | + product [qi::_val = qi::_1]; + product = (factor >> '*' >> product) + [qi::_val = phx::new_ >(qi::_1, qi::_2) ] | + factor [qi::_val = qi::_1]; + factor = group [qi::_val = qi::_1] | + qi::int_ [qi::_val = phx::new_(qi::_1) ]; + group %= '(' >> start >> ')'; + } + + qi::rule start, group, product, factor; +}; + +void test1(std::string input) +{ + ASTNode* out_node; + PhraseParseOrDie(input, ArithmeticGrammar1(), qi::space, out_node); + + std::cout << "evaluate() = " << out_node->evaluate() << std::endl; + delete out_node; +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + test1(argc >= 2 ? argv[1] : "1 + 2 * 3"); + + return 0; +} + +/******************************************************************************/ diff --git a/spirit6_ast.cpp b/spirit6_ast.cpp new file mode 100644 index 0000000..751e947 --- /dev/null +++ b/spirit6_ast.cpp @@ -0,0 +1,182 @@ +// Example how to use Boost Spirit to construct an abstract syntax tree (AST) +// for a simple arithmetic grammar and to evaluate expressions _with_ variables! +// +// The grammar accepts expressions like "y = 1 + 2 * x", constructs an AST and +// evaluates it correctly. Non-assignment expression are also evaluated. + +#include +#include +#include +#include +#include + +#include +#include + +namespace qi = boost::spirit::qi; +namespace phx = boost::phoenix; + +/******************************************************************************/ + +// Utility to run a parser, check for errors, and capture the results. +template +void PhraseParseOrDie( + const std::string& input, const Parser& p, const Skipper& s, + Args&& ... args) +{ + std::string::const_iterator begin = input.begin(), end = input.end(); + boost::spirit::qi::phrase_parse( + begin, end, p, s, std::forward(args) ...); + if (begin != end) { + std::cout << "Unparseable: " + << std::quoted(std::string(begin, end)) << std::endl; + throw std::runtime_error("Parse error"); + } +} + +/******************************************************************************/ + +// the variable value map +std::map variable_map; + +class ASTNode +{ +public: + virtual double evaluate() = 0; + virtual ~ASTNode() { } +}; + +using ASTNodePtr = ASTNode*; + +template +class OperatorNode : public ASTNode +{ +public: + OperatorNode(const ASTNodePtr& left, const ASTNodePtr& right) + : left(left), right(right) { } + + double evaluate() { + if (Operator == '+') + return left->evaluate() + right->evaluate(); + else if (Operator == '*') + return left->evaluate() * right->evaluate(); + } + + ~OperatorNode() { + delete left; + delete right; + } + +private: + ASTNodePtr left, right; +}; + +class ConstantNode : public ASTNode +{ +public: + ConstantNode(double value) + : value(value) { } + + double evaluate() { + return value; + } + +private: + double value; +}; + +class VariableNode : public ASTNode +{ +public: + VariableNode(std::string identifier) + : identifier(identifier) { } + + double evaluate() { + return variable_map[identifier]; + } + +private: + std::string identifier; +}; + +class AssignmentNode : public ASTNode +{ +public: + AssignmentNode(std::string identifier, const ASTNodePtr& value) + : identifier(identifier), value(value) { } + + double evaluate() { + double v = value->evaluate(); + variable_map[identifier] = v; + return v; + } + +private: + std::string identifier; + ASTNodePtr value; +}; + +/******************************************************************************/ + +class ArithmeticGrammar1 + : public qi::grammar +{ +public: + using Iterator = std::string::const_iterator; + + ArithmeticGrammar1() : ArithmeticGrammar1::base_type(start) + { + varname %= qi::alpha >> *qi::alnum; + + start = (varname >> '=' >> term) + [qi::_val = phx::new_(qi::_1, qi::_2) ] | + term [qi::_val = qi::_1]; + + term = (product >> '+' >> term) + [qi::_val = phx::new_ >(qi::_1, qi::_2) ] | + product [qi::_val = qi::_1]; + product = (factor >> '*' >> product) + [qi::_val = phx::new_ >(qi::_1, qi::_2) ] | + factor [qi::_val = qi::_1]; + factor = group [qi::_val = qi::_1] | + varname [qi::_val = phx::new_(qi::_1) ] | + qi::int_ [qi::_val = phx::new_(qi::_1) ]; + group %= '(' >> term >> ')'; + } + + qi::rule varname; + qi::rule start, term, group, product, factor; +}; + +void test1(std::string input) +{ + try { + ASTNode* out_node; + PhraseParseOrDie(input, ArithmeticGrammar1(), qi::space, out_node); + + std::cout << "evaluate() = " << out_node->evaluate() << std::endl; + delete out_node; + } + catch (std::exception& e) { + std::cout << "EXCEPTION: " << e.what() << std::endl; + } +} + +/******************************************************************************/ + +int main() +{ + // important variables + variable_map["x"] = 42; + + std::cout << "Reading stdin" << std::endl; + + std::string line; + while (std::getline(std::cin, line)) { + test1(line); + } + + return 0; +} + +/******************************************************************************/ diff --git a/spirit7_html.cpp b/spirit7_html.cpp new file mode 100644 index 0000000..bac6a63 --- /dev/null +++ b/spirit7_html.cpp @@ -0,0 +1,1032 @@ +// Example how to use Boost Spirit to parse a HTML-like markup language with +// Markdown elements and enable additional instructions. This example was +// extracted from a HTML template engine, but only the AST printer is included. +// +// This example is designed to read "example.html". + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace qi = boost::spirit::qi; +namespace phx = boost::phoenix; +namespace ascii = boost::spirit::ascii; + +/******************************************************************************/ +// AST node structs + +struct ast_null; +struct ast_comment; +struct ast_nodelist; + +struct ast_func_variable; +struct ast_func_string; +struct ast_func_integer; +struct ast_func_double; +struct ast_func_call; +struct ast_func_filter; +struct ast_func_set; +struct ast_func_if; +struct ast_func_for; +struct ast_func_expr; +struct ast_func_template; + +struct ast_tagged_node; +struct ast_html_node; +struct ast_html_selfnode; +struct ast_highlight; + +// boost variant representing an AST node + +typedef boost::variant< + ast_null, + std::string, + ast_comment, + boost::recursive_wrapper, + ast_func_variable, + ast_func_string, + ast_func_integer, + ast_func_double, + ast_func_template, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + boost::recursive_wrapper, + ast_highlight + > +ast_node; + +// *** Individual AST node structs + +//! represent null or undefined +struct ast_null +{ +}; + +//! a comment <# clause #> +struct ast_comment : public std::string +{ +}; + +//! a sequence of multiple AST nodes +struct ast_nodelist : public std::vector +{ +}; + +//! MyFunc node representing a variable +struct ast_func_variable : public std::string +{ +}; + +//! MyFunc node representing a literal string +struct ast_func_string : public std::string +{ +}; + +//! MyFunc node representing a template name +struct ast_func_template : public std::string +{ +}; + +//! MyFunc node representing a literal integer +struct ast_func_integer +{ + long long value; + + explicit inline ast_func_integer(const long long& v=0) + : value(v) {} +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_integer, + (long long, value) +) + +//! MyFunc node representing a literal double +struct ast_func_double +{ + double value; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_double, + (double, value) +) + +//! tagged sequence of multiple AST nodes with HTML attributes, like

[nodes]

+struct ast_highlight +{ + std::string language; + std::string content; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_highlight, + (std::string, language) + (std::string, content) +) + +//! MyFunc node representing a function call with argument list +struct ast_func_call +{ + std::string funcname; + ast_nodelist args; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_call, + (std::string, funcname) + (ast_nodelist, args) +) + +//! MyFunc node representing a conditional clause +struct ast_func_filter +{ + ast_node node; + std::string content; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_filter, + (ast_node, node) + (std::string, content) +) + +//! MyFunc node representing a function call with argument list and filter content +struct ast_func_set +{ + std::string varname; + ast_node value; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_set, + (std::string, varname) + (ast_node, value) +) + +//! MyFunc node representing a function call with argument list and filter content +struct ast_func_if +{ + ast_node condition, iftrue, iffalse; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_if, + (ast_node, condition) + (ast_node, iftrue) + (ast_node, iffalse) +) + +//! MyFunc node representing a function call with argument list and filter content +struct ast_func_for +{ + std::string varname; + ast_node arg; + ast_node subtree; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_func_for, + (std::string, varname) + (ast_node, arg) + (ast_node, subtree) +) + +//! MyFunc node representing a sequence of expressions with operators intermingled +struct ast_func_expr : public ast_nodelist +{ +}; + +//! tagged sequence of multiple AST nodes like

[nodes]

+struct ast_tagged_node +{ + std::string tag; + ast_node subtree; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_tagged_node, + (std::string, tag) + (ast_node, subtree) +) + +//! key-value attributes for HTML like name=value +struct ast_html_attr +{ + std::string name; + ast_node value; +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_html_attr, + (std::string, name) + (ast_node, value) +) + +//! a sequence of multiple key-value attributes for HTML +struct ast_html_attrlist : public std::vector +{ + const ast_html_attr& find(const std::string& key) const + { + for (const_iterator it = begin(); it != end(); ++it) + { + if (it->name != key) continue; + return *it; + } + std::cout << "{ERROR cannot find HTML attribute " << key << "}" + << std::endl; + abort(); + } +}; + +//! tagged sequence of multiple AST nodes with HTML attributes, like

[nodes]

+struct ast_html_node +{ + std::string tag; + ast_html_attrlist attrlist; + ast_node subtree; + + ast_html_node() {} + + ast_html_node(const std::string& _tag, const ast_html_attr& attr, const ast_node& _subtree) + : tag(_tag), subtree(_subtree) + { + attrlist.push_back(attr); + } +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_html_node, + (std::string, tag) + (ast_html_attrlist, attrlist) + (ast_node, subtree) +) + +//! tagged sequence of multiple AST nodes with HTML attributes, like +struct ast_html_selfnode +{ + std::string tag; + ast_html_attrlist attrlist; + + ast_html_selfnode() {} + + ast_html_selfnode(const std::string& _tag, const ast_html_attr& attr1) + : tag(_tag) + { + attrlist.push_back(attr1); + } + + ast_html_selfnode(const std::string& _tag, const ast_html_attr& attr1, + const ast_html_attr& attr2) + : tag(_tag) + { + attrlist.push_back(attr1); + attrlist.push_back(attr2); + } +}; + +BOOST_FUSION_ADAPT_STRUCT( + ast_html_selfnode, + (std::string, tag) + (ast_html_attrlist, attrlist) +) + +/******************************************************************************/ +// MyMarkup parser + +struct MyMarkupParser : qi::grammar +{ + typedef std::string::const_iterator Iterator; + + // *** General Base Character Parsers + + qi::rule HtmlText, SpecialChar, PlainText; + + qi::rule BlankLine, Indent; + + // *** Inline Blocks with Special Formatting + + qi::rule Inline, InlinePlain; + + qi::rule Comment, CommentBlock; + + qi::rule Code, Emph, Strong; + qi::rule CodeBlock, EmphBlock, StrongBlock; + + qi::rule MarkLink; + qi::rule MarkLinkText, MarkLinkRefList; + qi::rule MarkLinkRef; + + qi::rule MarkImage; + qi::rule MarkImageAlt, MarkImageSrc; + + qi::rule MarkDownload; + qi::rule MarkDownloadRef; + + qi::rule HttpLink, SelfLink; + + qi::rule FuncBlock, FuncInline; + qi::rule FilterBlock, FilterInline; + qi::rule VerbatimBlock, VerbatimInline; + + // *** Inline HTML blocks + + qi::rule HtmlTagName, HtmlComment; + + qi::rule HtmlPhrase; + qi::rule HtmlTagBlock; + qi::rule HtmlTagSelfClose; + qi::rule HtmlInline; + + qi::rule HtmlAttribute; + qi::rule HtmlQuoted; + qi::rule HtmlQuotedText; + + // *** Paragraph Blocks: Enumerations + + qi::rule Bullet, Enumet; + + qi::rule BulletList0, BulletList1, BulletList2; + qi::rule OrderedList0, OrderedList1, OrderedList2; + + qi::rule List0, List1, List2; + + qi::rule ListItem0, ListItem1, ListItem2; + + qi::rule ListBlock0, ListBlock1, ListBlock2; + qi::rule ListBlockLine0, ListBlockLine1, ListBlockLine2; + + qi::rule Line; + + // *** Paragraph Blocks: Headers + + qi::rule Header, Header1, Header2, Header3, Header4, Header5, Header6; + + qi::rule HeaderA; + qi::rule HeaderAnchor; + + qi::rule Header1A, Header2A, Header3A, Header4A, Header5A, Header6A; + + // *** Source Highlighting Code Blocks + + qi::rule HighlightBlock; + + // *** Paragraph Blocks: Paragraphs and Plain + + qi::rule Paragraph; + qi::rule ParagraphBlock; + + qi::rule InlineList; + + qi::rule Block; + + qi::rule BlockList; + + qi::rule Start; + + // *** Inline Procedural Language + + typedef qi::space_type Skip; + + qi::rule FIdentifier; + qi::rule FVariable; + qi::rule FString; + qi::rule FDouble; + qi::rule FInteger; + qi::rule FCall; + qi::rule FBracket; + + qi::rule FAtomic; + qi::rule FExpr; + + qi::rule FSetClause; + qi::rule FIfClause, FEvalIfClause; + qi::rule FForClause; + qi::rule FInclude; + + qi::rule FClause; + + qi::rule FFilterSetClause; + qi::rule FFilterTemplateClause; + qi::rule FFilterClause; + + // *** Construction + + MyMarkupParser(); + + static const MyMarkupParser& get(); // get singleton +}; + +MyMarkupParser::MyMarkupParser() : base_type(Start, "MyMarkupParser") +{ + using namespace boost::spirit::ascii; + using namespace qi::labels; + + using qi::lit; + using qi::eoi; + using qi::eol; + using qi::attr; + using qi::omit; + using qi::as_string; + + // ******************************************************************** + // *** General Base Character Parsers + + // text is composed of non-special characters + HtmlText = +( char_("A-Za-z0-9~@$^.,:;_=+({}|?/-") [ _val += _1 ] + | lit('&') [ _val += "&" ] + | lit('"') [ _val += """ ] + | lit('\'') [ _val += "'" ] + | lit('>') [ _val += ">" ] + | lit('\304') [ _val += "Ä" ] + | lit('\326') [ _val += "Ö" ] + | lit('\334') [ _val += "Ü" ] + | lit('\337') [ _val += "ß" ] + | lit('\344') [ _val += "ä" ] + | lit('\350') [ _val += "è" ] + | lit('\351') [ _val += "é" ] + | lit('\366') [ _val += "ö" ] + | lit('\374') [ _val += "ü" ] + | (+blank >> -(eol >> *blank >> !eol)) [ _val += " " ] + | (eol >> *blank >> !eol) [ _val += " " ] + ); + + // special characters, accepted if no special meaning + SpecialChar = ( char_("*`#[])!") [ _val += _1 ] + //| (lit('<') >> !lit("%")) [ _val += "<" ] + | lit("\\\\") [ _val += '\\' ] + | lit("\\\"") [ _val += '"' ] + | lit("\\&") [ _val += '&' ] + | lit("\\*") [ _val += '*' ] + | lit("\\#") [ _val += '#' ] + | lit("\\`") [ _val += '`' ] + | lit("\\[") [ _val += '[' ] + | lit("\\<") [ _val += "<" ] + | (lit('%') >> !lit('%')) [ _val += '%' ] + ); + + // a blank file + BlankLine = *blank >> eol; + + // identation for lists + Indent = lit('\t') | lit(" "); + + // ******************************************************************** + // *** Inline Blocks with Special Formatting + + Inline %= Comment | VerbatimInline | FilterInline | FuncInline | Code | Strong | Emph | SelfLink | MarkDownload | MarkLink | MarkImage | HtmlPhrase | HtmlText | SpecialChar; + + InlinePlain %= Comment | VerbatimInline | FilterInline | FuncInline | PlainText; + + // inline comments + + Comment %= "<%#" >> *(!lit("%>") >> char_) >> "%>"; + + CommentBlock %= "<%#" >> *(!lit("%>") >> char_) >> "%>" >> omit[*eol]; + + // inline styling blocks + + Code %= '`' >> attr("code") >> CodeBlock >> '`'; + CodeBlock %= +(!lit('`') >> Inline); + + Emph %= '*' >> attr("i") >> EmphBlock >> '*'; + EmphBlock %= +(!lit('*') >> Inline); + + Strong %= "**" >> attr("b") >> StrongBlock >> "**"; + StrongBlock %= +(!lit("**") >> Inline); + + // markdown inline links + + MarkLink = ('[' >> MarkLinkText >> "](" >> MarkLinkRef >> ')') + [ _val = phx::construct(std::string("markdown-a"), _2, _1) ]; + + MarkLinkText %= +(!lit(']') >> Inline); + MarkLinkRef %= attr("href") >> MarkLinkRefList; + MarkLinkRefList %= +(!lit(')') >> Inline); + + // markdown inline images + + MarkImage = ("![" >> MarkImageAlt >> "](" >> MarkImageSrc >> ')') + [ _val = phx::construct(std::string("markdown-img"), _1, _2) ]; + + MarkImageAlt %= attr("alt") >> MarkLinkText; + MarkImageSrc %= attr("src") >> MarkLinkRefList; + + // markdown download/view links + + MarkDownload = "[[" >> + MarkDownloadRef [ _val = phx::construct(std::string("markdown-download"), _1) ] >> + "]]"; + + MarkDownloadRef %= attr("href") >> as_string[ +~char_(']') ]; + + // self-link inline + + HttpLink %= string("http") >> +~char_('>'); + + SelfLink = &lit("> '<' >> HttpLink + [ _val = "" + _1 + "" ] >> '>'; + + // inline functional language + + FuncBlock %= "<%" >> qi::skip(qi::space)[FClause] >> omit[*space] >> "%>" >> omit[eol]; + + FuncInline %= "<%" >> qi::skip(qi::space)[FClause] >> omit[*space] >> "%>"; + + FilterBlock %= "<%|" >> qi::skip(qi::space)[FFilterClause] >> omit[*space] >> "%>" >> omit[eol] + >> *(!(eol >> "<%|%>") >> char_) + >> omit[eol] >> "<%|%>"; + + FilterInline %= "<%|" >> qi::skip(qi::space)[FFilterClause] >> omit[*space] >> "%>" >> omit[-eol] + >> *(!(-eol >> "<%|%>") >> char_) + >> omit[-eol] >> "<%|%>"; + + VerbatimBlock %= "<%$" >> omit[eol] >> *(!lit("%>") >> char_) >> "%>" >> omit[eol]; + + VerbatimInline %= "<%$" >> *(!lit("%>") >> char_) >> "%>"; + + // ******************************************************************** + // *** Inline HTML blocks + + HtmlTagName = + string("big") | + string("br") | + string("button") | + string("caption") | + string("code") | + string("col") | + string("dd") | + string("div") | + string("dl") | + string("dt") | + string("em") | + string("form") | + string("h1") | string("h2") | string("h3") | string("h4") | string("h5") | string("h6") | + string("hr") | + string("iframe") | + string("img") | + string("input") | + string("li") | + string("longversion") | + string("object") | + string("ol") | + string("option") | + string("param") | + string("pre") | + string("select") | + string("script") | + string("span") | + string("strong") | + string("sup") | + string("table") | + string("tbody") | + string("td") | + string("textarea") | + string("tfoot") | + string("thead") | + string("tr") | + string("tt") | + string("ul") | + // two letter overlap + string("th") | + // one letter matches + string("a") | string("b") | string("i") | string("p") + ; + + HtmlPhrase %= &lit('<') >> ( HtmlTagBlock | HtmlComment | HtmlTagSelfClose ); + + HtmlTagBlock %= '<' >> HtmlTagName [phx::at_c<0>(_val) = qi::_1] + >> *HtmlAttribute >> omit[*space] >> '>' >> omit[*eol] + >> HtmlInline + >> omit[" string(phx::at_c<0>(_val)) >> '>'] + >> omit[*eol]; + + HtmlInline %= *( Inline >> omit[*eol] ); + + HtmlTagSelfClose %= '<' >> HtmlTagName + >> *HtmlAttribute >> omit[*space] >> "/>" + >> omit[*eol]; + + HtmlComment %= string("") >> char_) >> string("-->") >> omit[*eol]; + + HtmlAttribute %= omit[+space] >> +(alnum | char_('-')) >> omit[*space >> '=' >> *space] >> HtmlQuoted; + + HtmlQuotedText = +( char_("A-Za-z0-9~!@#$%^.,:;_=+*()[]{}>'|?/ -") [ _val += _1 ] + | (lit('<') >> !lit('%')) [ _val += '<' ] + | lit('&') [ _val += "&" ] + | lit('\304') [ _val += "Ä" ] + | lit('\326') [ _val += "Ö" ] + | lit('\334') [ _val += "Ü" ] + | lit('\337') [ _val += "ß" ] + | lit('\344') [ _val += "ä" ] + | lit('\350') [ _val += "è" ] + | lit('\351') [ _val += "é" ] + | lit('\366') [ _val += "ö" ] + | lit('\374') [ _val += "ü" ] + | lit("\\\"") [ _val += '"' ] + ); + + HtmlQuoted %= '"' >> *(!lit('"') >> (Comment | FuncInline | HtmlQuotedText)) >> '"'; + + qi::on_error( + HtmlTagBlock, + std::cout << phx::val("{debug error expecting ") << _4 << phx::val(" here: \"") + << phx::construct(_3, _2) // iterators to error-pos, end + << phx::val("\"}") << std::endl + ); + + // ******************************************************************** + // *** Paragraph Blocks: Enumerations + + Bullet = char_("+*-") >> +blank; + Enumet = +digit >> '.' >> +blank; + + BulletList0 %= &Bullet >> attr("ul") >> List0; + OrderedList0 %= &Enumet >> attr("ol") >> List0; + + BulletList1 %= &(Indent >> Bullet) >> attr("ul") >> List1; + OrderedList1 %= &(Indent >> Enumet) >> attr("ol") >> List1; + + BulletList2 %= &(Indent >> Indent >> Bullet) >> attr("ul") >> List2; + OrderedList2 %= &(Indent >> Indent >> Enumet) >> attr("ol") >> List2; + + List0 %= +ListItem0; + List1 %= +ListItem1; + List2 %= +ListItem2; + + ListItem0 %= omit[(Bullet | Enumet)] >> attr("li") >> ListBlock0; + ListItem1 %= omit[Indent >> (Bullet | Enumet)] >> attr("li") >> ListBlock1; + ListItem2 %= omit[Indent >> Indent >> (Bullet | Enumet)] >> attr("li") >> ListBlock2; + + ListBlock0 %= !BlankLine >> Line >> *( BulletList1 | OrderedList1 | ListBlockLine0 ); + ListBlock1 %= !BlankLine >> Line >> *( BulletList2 | OrderedList2 | ListBlockLine1 ); + ListBlock2 %= !BlankLine >> Line >> *( ListBlockLine2 ); + + ListBlockLine0 %= !BlankLine >> !( *Indent >> (Bullet | Enumet) ) + >> Indent >> attr(" ") >> Line; + + ListBlockLine1 %= !BlankLine >> !( *Indent >> (Bullet | Enumet) ) + >> Indent >> Indent >> attr(" ") >> Line; + + ListBlockLine2 %= !BlankLine >> !( *Indent >> (Bullet | Enumet) ) + >> Indent >> Indent >> Indent >> attr(" ") >> Line; + + // inline will gobble single eols, but stop at double eols. + Line %= +Inline >> omit[(eol >> BlankLine) | (*eol >> eoi)]; + + // ******************************************************************** + // *** Paragraph Blocks: Headers + + Header6 %= "###### " >> attr("h6") >> InlineList; + Header5 %= "##### " >> attr("h5") >> InlineList; + Header4 %= "#### " >> attr("h4") >> InlineList; + Header3 %= "### " >> attr("h3") >> InlineList; + Header2 %= "## " >> attr("h2") >> InlineList; + Header1 %= "# " >> attr("h1") >> InlineList; + + HeaderAnchor = as_string[ +~char_(')') ] + [ _val = "" ]; + + HeaderA %= HeaderAnchor >> lit(") ") >> InlineList; + + Header6A %= "######(" >> attr("h6") >> HeaderA; + Header5A %= "#####(" >> attr("h5") >> HeaderA; + Header4A %= "####(" >> attr("h4") >> HeaderA; + Header3A %= "###(" >> attr("h3") >> HeaderA; + Header2A %= "##(" >> attr("h2") >> HeaderA; + Header1A %= "#(" >> attr("h1") >> HeaderA; + + Header %= &lit('#') >> ( Header6A | Header5A | Header4A | Header3A | Header2A | Header1A | + Header6 | Header5 | Header4 | Header3 | Header2 | Header1 ); + + // ******************************************************************** + // *** Source Highlighting Code Blocks + + HighlightBlock %= "```" >> omit[*blank] >> *print >> omit[eol] + >> *(!(eol >> "```") >> char_) + >> omit[eol] >> "```" >> omit[*blank >> eol]; + + // ******************************************************************** + // *** Paragraph Blocks: Paragraphs and Plain + + Paragraph %= attr("p") >> ParagraphBlock >> omit[+(eol | blank >> eoi)]; + ParagraphBlock %= InlineList; + + InlineList %= +Inline; + + Block %= omit[*BlankLine] >> ( + CommentBlock | + VerbatimBlock | FilterBlock | FuncBlock | + HighlightBlock | + Header | + BulletList0 | OrderedList0 | + HtmlPhrase | + Paragraph | InlineList ); + + BlockList %= *Block; + + Start %= BlockList; + + // ******************************************************************** + // *** Inline Procedural Language + + FIdentifier %= char_("A-Za-z_") >> *char_("A-Za-z0-9_"); + + FVariable %= FIdentifier; + + FString %= '"' >> *(!lit('"') >> ((lit("\\\"") >> attr('"')) | char_)) >> '"'; + + FDouble %= qi::real_parser< double, qi::strict_real_policies >(); + + FInteger %= qi::long_long; + + FCall %= FIdentifier >> '(' >> -(FExpr % ',') >> ')'; + + FBracket %= '(' >> FExpr >> ')'; + + FAtomic %= FBracket | FCall | FString | FDouble | FInteger | FVariable; + + FExpr %= FAtomic % as_string[char_("+")] [ phx::push_back(_val,_1) ]; + + FSetClause %= -lit("SET") >> FIdentifier >> '=' >> FExpr; + + FIfClause %= "IF" >> FExpr >> "%%" >> Start >> "%%" + >> -(lit("ELSE") >> "%%" >> Start >> "%%") + >> "ENDIF"; + + FEvalIfClause %= "EVALIF" >> FExpr >> "%%" >> FClause >> "%%" + >> -(lit("ELSE") >> "%%" >> FClause >> "%%") + >> "ENDIF"; + + FForClause %= "FOR" >> FIdentifier >> '=' >> FExpr >> "%%" >> Start >> "%%" + >> "ENDFOR"; + + FInclude %= "INCLUDE" >> attr("include") >> FIdentifier; + + FClause %= FSetClause | FEvalIfClause | FIfClause | FForClause | FInclude | FExpr; + + FFilterSetClause %= "SET" >> FIdentifier; + + FFilterTemplateClause %= "TEMPLATE" >> FIdentifier; + + FFilterClause %= FFilterSetClause | FFilterTemplateClause | FCall; + + // ******************************************************************** +} + +/******************************************************************************/ +// Interpret a boost::variant<> object by recursively visiting the nodes inside. + +struct ast_debug : boost::static_visitor<> +{ + int depth; + + std::ostringstream oss; + + ast_debug(const ast_node& ast) : depth(0) + { + boost::apply_visitor(*this, ast); + } + + inline std::string tab() + { + return std::string(2 * depth, ' '); + } + + void operator()(const ast_null& ) + { + oss << tab() << "NULL" << std::endl; + } + + void operator()(const std::string& text) + { + oss << tab() << "text: \"" << text << '"' << std::endl; + } + + void operator()(const ast_comment& text) + { + oss << tab() << "comment: \"" << text << '"' << std::endl; + } + + void recurse(const ast_node& node) + { + ++depth; + boost::apply_visitor(*this, node); + --depth; + } + + void recurse_list(const ast_nodelist& nodelist) + { + oss << tab() << '{' << std::endl; + + for (const ast_node& n : nodelist) + recurse(n); + + oss << tab() << '}' << std::endl; + } + + void operator()(const ast_nodelist& ast) + { + recurse_list(ast); + } + + void operator()(const ast_tagged_node& ast) + { + oss << tab() << '<' << ast.tag << '>' << std::endl; + recurse(ast.subtree); + } + + void operator()(const ast_html_node& ast) + { + oss << tab() << '<' << ast.tag << '>'; + if (ast.attrlist.size()) + { + ++depth; + oss << " [" << std::endl; + for (const ast_html_attr& attr : ast.attrlist) + { + oss << tab() << attr.name << '=' << std::endl; + recurse(attr.value); + } + oss << tab() << ']'; + --depth; + } + oss << std::endl; + + recurse(ast.subtree); + } + + void operator()(const ast_html_selfnode& ast) + { + oss << tab() << '<' << ast.tag << '>'; + if (ast.attrlist.size()) + { + ++depth; + oss << " [" << std::endl; + for (const ast_html_attr& attr : ast.attrlist) + { + oss << tab() << attr.name << '=' << std::endl; + recurse(attr.value); + } + oss << tab() << ']'; + --depth; + } + oss << std::endl; + } + + void operator()(const ast_func_variable& ast) + { + oss << tab() << "var: " << ast << std::endl; + } + + void operator()(const ast_func_string& ast) + { + oss << tab() << "string: " << ast << std::endl; + } + + void operator()(const ast_func_integer& ast) + { + oss << tab() << "integer: " << ast.value << std::endl; + } + + void operator()(const ast_func_double& ast) + { + oss << tab() << "double: " << ast.value << std::endl; + } + + void operator()(const ast_func_template& ast) + { + oss << tab() << "template: " << ast << std::endl; + } + + void operator()(const ast_func_call& ast) + { + oss << tab() << "call: " << ast.funcname << " {" << std::endl; + recurse_list(ast.args); + oss << tab() << '}' << std::endl; + } + + void operator()(const ast_func_filter& ast) + { + oss << tab() << "filter: [" << std::endl; + recurse(ast.node); + oss << tab() << "] on \"" << ast.content << "\"" << std::endl; + } + + void operator()(const ast_func_expr& ast) + { + oss << tab() << "expr: {" << std::endl; + recurse_list(ast); + oss << tab() << '}' << std::endl; + } + + void operator()(const ast_func_set& ast) + { + oss << tab() << "set: " << ast.varname << std::endl; + oss << tab() << "value: " << std::endl; + recurse(ast.value); + } + + void operator()(const ast_func_if& ast) + { + oss << tab() << "if: [" << std::endl; + recurse(ast.condition); + oss << tab() << "]" << std::endl; + oss << tab() << "true: " << std::endl; + recurse(ast.iftrue); + oss << tab() << "else: " << std::endl; + recurse(ast.iffalse); + } + + void operator()(const ast_func_for& ast) + { + oss << tab() << "for: " << ast.varname << "[" << std::endl; + recurse(ast.arg); + oss << tab() << "]" << std::endl; + oss << tab() << "subtree: " << std::endl; + recurse(ast.subtree); + } + + void operator()(const ast_highlight& ast) + { + oss << tab() << "highlight[" << ast.language << "]" << std::endl + << tab() << "\"" << ast.content << "\"" << std::endl; + } +}; + +/******************************************************************************/ + +ast_node parse_markup(const std::string& input, const std::string& name) +{ + std::string::const_iterator + begin = input.begin(), end = input.end(); + + static const MyMarkupParser p; + ast_node ast; + bool r = phrase_parse(begin, end, p, qi::space, ast); + + if (r && begin == end) + { + std::cout << std::string(80, '-') << std::endl; + std::cout << "Parsing " << name << " succeeded." << std::endl; + std::cout << std::string(80, '-') << std::endl; + + ast_debug prn(ast); + std::cout << prn.oss.str(); + + std::cout << std::string(80, '-') << std::endl; + } + else + { + std::cout << std::string(80, '-') << std::endl; + std::cout << "Parsing " << name << " failed, stopped at" << std::endl; + std::cout << std::string(80, '-') << std::endl; + + ast_debug prn(ast); + std::cout << prn.oss.str(); + + std::cout << std::string(80, '-') << std::endl; + std::cout << "Remaining input" << std::endl; + + std::cout << std::string(begin,end) << std::endl; + + std::cout << std::string(80, '-') << std::endl; + std::cout << "!!! " << name << " parsing FAILED!" << std::endl; + } + + return ast; +} + +/******************************************************************************/ + +int main(int argc, char* argv[]) +{ + if (argc >= 2) { + std::ifstream in(argv[1]); + std::string input((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + parse_markup(input, argv[1]); + } + else { + std::cout << "Reading stdin" << std::endl; + std::string input((std::istreambuf_iterator(std::cin)), + std::istreambuf_iterator()); + parse_markup(input, "stdin"); + } + return 0; +} + +/******************************************************************************/ diff --git a/stock_list.txt b/stock_list.txt new file mode 100644 index 0000000..1f8a284 --- /dev/null +++ b/stock_list.txt @@ -0,0 +1,80 @@ +AAPL;Apple Inc.;221.68 +ABBV;AbbVie Inc.;92.89 +ABT;Abbott Laboratories;66.79 +ACN;Accenture plc;170.15 +ADBE;Adobe Systems Incorporated;267.58 +AMGN;Amgen Inc.;198.30 +AMZN;Amazon.com, Inc.;1976.44 +BA;The Boeing Company;353.36 +BABA;Alibaba Group Holding Limited;160.67 +BAC;Bank of America Corporation;30.50 +BBL;BHP Billiton plc;39.57 +BHP;BHP Billiton Limited;44.26 +BP;BP p.l.c.;43.24 +BRK-A;Berkshire Hathaway Inc.;322874.00 +BRK-B;Berkshire Hathaway Inc.;214.83 +BTI;British American Tobacco p.l.c.;49.01 +BUD;Anheuser-Busch InBev SA/NV;88.23 +C;Citigroup Inc.;70.81 +CHL;China Mobile Limited;48.40 +CMCSA;Comcast Corporation;35.94 +COST;Costco Wholesale Corporation;243.16 +CRM;salesforce.com, inc.;154.03 +CSCO;Cisco Systems, Inc.;46.61 +CVX;Chevron Corporation;116.79 +DIS;The Walt Disney Company;109.86 +DWDP;DowDuPont Inc.;69.65 +FB;Facebook, Inc.;162.30 +GE;General Electric Company;12.53 +GOOG;Alphabet Inc.;1159.74 +GOOGL;Alphabet Inc.;1168.81 +HD;The Home Depot, Inc.;212.08 +HON;Honeywell International Inc.;164.21 +HSBC;HSBC Holdings plc;42.85 +IBM;International Business Machines Corporation;147.02 +INTC;Intel Corporation;44.49 +IVV;iShares Core S&P 500 ETF;290.81 +JNJ;Johnson & Johnson;139.22 +JPM;JPMorgan Chase & Co.;113.63 +KO;The Coca-Cola Company;46.22 +LLY;Eli Lilly and Company;106.17 +MA;Mastercard Incorporated;213.10 +MCD;McDonald's Corporation;165.01 +MDT;Medtronic plc;95.92 +MMM;3M Company;214.29 +MO;Altria Group, Inc.;63.83 +MRK;Merck & Co., Inc.;69.80 +MSFT;Microsoft Corporation;110.72 +NFLX;Netflix, Inc.;362.39 +NKE;NIKE, Inc.;82.22 +NVDA;NVIDIA Corporation;265.14 +NVS;Novartis AG;84.61 +ORCL;Oracle Corporation;49.19 +PEP;PepsiCo, Inc.;113.61 +PFE;Pfizer Inc.;42.51 +PG;The Procter & Gamble Company;82.97 +PM;Philip Morris International Inc.;80.95 +PTR;PetroChina Company Limited;72.73 +PYPL;PayPal Holdings, Inc.;90.98 +QCOM;QUALCOMM Incorporated;70.50 +RDS-A;Royal Dutch Shell plc;64.50 +RDS-B;Royal Dutch Shell plc;66.51 +RY;Royal Bank of Canada;79.46 +SAP;SAP SE;120.68 +SNP;China Petroleum & Chemical Corporation;97.03 +T;AT&T Inc.;33.35 +TD;The Toronto-Dominion Bank;60.52 +TM;Toyota Motor Corporation;119.99 +TOT;TOTAL S.A.;62.47 +TSM;Taiwan Semiconductor Manufacturing Company Limited;44.09 +UL;Unilever PLC;56.29 +UN;Unilever N.V.;56.79 +UNH;UnitedHealth Group Incorporated;263.02 +UNP;Union Pacific Corporation;157.25 +UPS;United Parcel Service, Inc.;123.58 +UTX;United Technologies Corporation;133.96 +V;Visa Inc.;145.84 +VZ;Verizon Communications Inc.;55.01 +WFC;Wells Fargo & Company;56.04 +WMT;Walmart Inc.;96.26 +XOM;Exxon Mobil Corporation;83.33