'Implementation of a parser of nested and optional structure in boost spirit [closed]

I would like to implement a parser for a nested (and optional) structure with boos::spirit as "namespace" in C++ language.

What is the simplest way to do it?



Solution 1:[1]

You can create a recursive rule like so:

namespace_ = lexeme["namespace"] >> '{' >> *namespace_ >> '}';

To also allow optional name identifiers:

rule<It, std::string()> identifier_ =     //
    raw[(alpha | '_') >> *(alnum | '_')]; //

namespace_ =
    lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

To also account for C++-style comments:

using Skip = rule<It>;
Skip skip  = space                           //
    | "//" >> *~char_("\r\n") >> (eol | eoi) //
    | "/*" >> *(char_ - "*/") >> "*/"        //
    ;

rule<It, Skip> namespace_;

rule<It, std::string()> identifier_ =     //
    raw[(alpha | '_') >> *(alnum | '_')]; //

namespace_ =
    lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

Demo:

Live On Coliru

#include <boost/spirit/include/qi.hpp>

auto parse(std::string_view input) {
    using namespace boost::spirit::qi;
    using It = std::string_view::const_iterator;

    using Skip = rule<It>;
    Skip skip  = space                           //
        | "//" >> *~char_("\r\n") >> (eol | eoi) //
        | "/*" >> *(char_ - "*/") >> "*/"        //
        ;

    rule<It, Skip> namespace_;

    rule<It, std::string()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //

    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

    phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip);
}

int main() {
    parse(R"(
        namespace X { namespace Y {
            namespace Z1 {
            }
            namespace Z2 {
            }
        }} // namespace X::Y
    )");
}

BONUS

Adding AST representation and debug output of the parsed data: Live On Coliruhttp://coliru.stacked-crooked.com/a/58542397b7f751e0

#include <boost/fusion/adapted.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>

namespace Ast {
    using Id = std::string;

    struct Namespace;
    using Namespaces = std::vector<Namespace>;

    struct Namespace {
        boost::optional<Id> id;
        Namespaces          children;
    };
}

BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace, id, children)

auto parse(std::string_view input) {
    using namespace boost::spirit::qi;
    using It = std::string_view::const_iterator;

    using Skip = rule<It>;
    Skip skip  = space                           //
        | "//" >> *~char_("\r\n") >> (eol | eoi) //
        | "/*" >> *(char_ - "*/") >> "*/"        //
        ;

    rule<It, Ast::Namespace(), Skip> namespace_;

    rule<It, Ast::Id()> identifier_ =     //
        raw[(alpha | '_') >> *(alnum | '_')]; //

    namespace_ =
        lexeme["namespace"] >> -identifier_ >> '{' >> *namespace_ >> '}';

    Ast::Namespaces parsed;
    phrase_parse(begin(input), end(input), eps > *namespace_ > eoi, skip, parsed);
    return parsed;
}

namespace Ast {
    void print(std::ostream& os, Namespace const& ns, unsigned indent = 0) {
        os << std::setw(indent) << ""
           << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
        for (auto& child : ns.children) {
            print(os, child, indent+2);
        }
        os << std::setw(indent) << "" << "}\n";
    }
}

int main() {
    auto program = parse(R"(
        namespace X { namespace Y {
            namespace Z1 {
            }
            namespace Z2 {
            }
        }} // namespace X::Y
        namespace { }
    )");

    for (auto& ns : program) {
        print(std::cout, ns);
    }
}

Prints

namespace X {
  namespace Y {
    namespace Z1 {
    }
    namespace Z2 {
    }
  }
}
namespace /*anonymous*/ {
}

UPDATE

In response to the comments I made a more involved example that parses input where struct can appear at global or namespace level (or, indeed inside a struct namespace), like:

namespace Math {
    long factorial(int x);
}

struct GlobalA {
    int foo();
    double bar(string stuff, int i, bool flag);
    struct Nested {
        /* todo implementation */
    };
};

namespace X { namespace Y {
    struct Mixin{};
    namespace Z1 {
        struct Derived : GlobalA, Mixin {
            void qux();
        };
    }
    namespace Z2 {
    }
}} // namespace X::Y
namespace { }

See it Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/adapted.hpp>
#include <boost/fusion/include/io.hpp>
#include <boost/optional/optional_io.hpp>
#include <boost/spirit/include/qi.hpp>

namespace Ast {
    struct Id : std::string {
        using std::string::string;
        using std::string::operator=;
    };

    struct Namespace;
    using Namespaces = std::vector<Namespace>;

    struct VariableDeclaration { Id type, name; };
    using VariableDeclarations = std::vector<VariableDeclaration>;

    struct FunctionDeclaration {
        Id return_type, name;
        VariableDeclarations args;
    };

    struct StructDeclaration;
    using Declaration = boost::variant< //
        VariableDeclaration,            //
        FunctionDeclaration,            //
        boost::recursive_wrapper<StructDeclaration>>;

    using Bases   = std::list<Id>;
    using Members = std::vector<Declaration>;

    struct StructDeclaration {
        Id      name;
        Bases   bases;
        Members members;
    };

    using NsMember  = boost::variant<Declaration, Namespace>;
    using NsMembers = std::vector<NsMember>;

    struct Namespace {
        boost::optional<Id> id;
        NsMembers           members;
    };

    using Program = NsMembers;
} // namespace Ast

BOOST_FUSION_ADAPT_STRUCT(Ast::VariableDeclaration, type,        name)
BOOST_FUSION_ADAPT_STRUCT(Ast::FunctionDeclaration, return_type, name,    args)
BOOST_FUSION_ADAPT_STRUCT(Ast::StructDeclaration,   name,        bases,   members)
BOOST_FUSION_ADAPT_STRUCT(Ast::Namespace,           id,          members)

///// BEGIN DEBUG OUTPUT FACILITIES
namespace Ast {
    static std::ostream& operator<<(std::ostream& os, Namespace const& ns) {
        os << "namespace " << ns.id.value_or("/*anonymous*/") << " {\n";
        for (auto& mem : ns.members)
            os << mem;
        return os << "}\n";
    }

    static std::ostream& operator<<(std::ostream& os, FunctionDeclaration const& fd) {
        os << fd.return_type << " " << fd.name << "(";
        for (bool first = true; auto& arg : fd.args) {
            os << (std::exchange(first, false) ? "" : ", ") //
               << arg.type << " " << arg.name;
        }

        return os << ");";
    }

    static std::ostream& operator<<(std::ostream& os, VariableDeclaration const& vd) {
        return os << vd.type << " " << vd.name << ";";
    }

    static std::ostream& operator<<(std::ostream& os, StructDeclaration const& sd) {
        os << "struct " << sd.name;
        if (sd.bases.size())
            for (bool first = true; auto const& base : sd.bases)
                os << (std::exchange(first, false) ? " : " : ", ") << base;

        os << " {\n";
        for (auto& mem : sd.members)
            os << mem << "\n";

        return os << "};\n";
    }
}
///// END DEBUG OUTPUT FACILITIES

namespace qi = boost::spirit::qi;

template <typename It> struct ProgramParser : qi::grammar<It, Ast::Program()> {
    ProgramParser() : ProgramParser::base_type(start) {
        using namespace qi;

        keywords_ += "if", "do", "for", "else", "while", "not", "and", "or",
            "xor", "continue", "break", "case", "goto", "struct", "class",
            "enum", "namespace";

        kw_lexeme = keywords_ >> !(alnum|'_');

        skipper_ = space                             //
            | "//" >> *~char_("\r\n") >> (eol | eoi) //
            | "/*" >> *(char_ - "*/") >> "*/"        //
            ;

        identifier_ = !kw_lexeme >> raw[(alpha | '_') >> *(alnum | '_')];
        vardecl_    = identifier_ >> identifier_;
        fundecl_    = identifier_ >> identifier_ >> //
            '(' >> -(vardecl_ % ',') >> ')' >> ';';

        decl_ = fundecl_ | vardecl_ | struct_;

        Ast::Bases const no_bases;
        baselist_ = ':' >> identifier_ % ',' | attr(no_bases);

        struct_ =                                     //
            lexeme["struct" >> !graph] >> identifier_ //
            >> baselist_ >> '{'                       //
            >> *decl_                                 //
            >> '}' >> ';';

        nsmember_ = namespace_ | decl_;

        namespace_ = lexeme["namespace" >> ((!graph) | '{')] >>
            -identifier_ >> '{' >> *nsmember_ >> '}';

        program_  = *nsmember_;
        start     = skip(skipper_.alias())[program_ > eoi];

        BOOST_SPIRIT_DEBUG_NODES((start)(program_)(nsmember_)(namespace_)(
            struct_)(decl_)(vardecl_)(fundecl_)(baselist_)(identifier_))
    }

  private:
    qi::symbols<char> keywords_;
    qi::rule<It>      kw_lexeme;

    qi::rule<It, Ast::Program()> start;
    qi::rule<It, Ast::Id()>      identifier_;

    using Skip = qi::rule<It>;
    Skip skipper_;

    qi::rule<It, Ast::Bases(),               Skip> baselist_;
    qi::rule<It, Ast::Declaration(),         Skip> decl_;
    qi::rule<It, Ast::FunctionDeclaration(), Skip> fundecl_;
    qi::rule<It, Ast::Namespace(),           Skip> namespace_;
    qi::rule<It, Ast::NsMember(),            Skip> nsmember_;
    qi::rule<It, Ast::Program(),             Skip> program_;
    qi::rule<It, Ast::StructDeclaration(),   Skip> struct_;
    qi::rule<It, Ast::VariableDeclaration(), Skip> vardecl_;
};

Ast::Program parse_program(std::string_view input) {
    using It = std::string_view::const_iterator;

    Ast::Program parsed;
    static ProgramParser<It> const p;
    parse(begin(input), end(input), p, parsed);
    return parsed;
}

int main() {
    auto program = parse_program(R"(
            namespace Math {
                long factorial(int x);
            }

            struct GlobalA {
                int foo();
                double bar(string stuff, int i, bool flag);
                struct Nested {
                    /* todo implementation */
                };
            };

            namespace X { namespace Y {
                struct Mixin{};
                namespace Z1 {
                    struct Derived : GlobalA, Mixin {
                        void qux();
                    };
                }
                namespace Z2 {
                }
            }} // namespace X::Y
            namespace { }
        )");

    for (auto& member : program)
        std::cout << member << '\n';
}

The output (not pretty-printed):

namespace Math {
long factorial(int x);}

struct GlobalA {
int foo();
double bar(string stuff, int i, bool flag);
struct Nested {
};

};

namespace X {
namespace Y {
struct Mixin {
};
namespace Z1 {
struct Derived : GlobalA, Mixin {
void qux();
};
}
namespace Z2 {
}
}
}

namespace /*anonymous*/ {
}

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1