RegEx.hpp
Basic regular expression handler.
A fully (well, mostly) functional regular expression processor.
Special chars: ‘|’ - or ‘*’ - zero or more of previous ‘+’ - one or more of previous ‘?’ - previous is optional ‘.’ - Match any character except
Plus the following group contents (and change may translation rules) ‘(’ and ‘)’ - group contents ‘”’ - Ignore special characters in contents (quotes still need to be escaped) ‘[’ and ‘]’ - character set
— choose ONE character ‘^’ as first char negates contents ‘-’ indicates range UNLESS first or last.
Additional overloads for functions in lexer_utils.h:
static NFA to_NFA(const RegEx & regex, int stop_id=1); static DFA to_DFA(const RegEx & regex);
-
Todo:
Implement ^ and $ (beginning and end of line)
Implement {n}, {n,} and {n,m} (exactly n, at least n, and n-m copies, respectively)
Implement \d (for digits), \s (for whitespace), etc.
Consider a separator (maybe backtick?) to divide up a regex expression; the result can be returned by each section as a vector of strings.
Functions
-
static NFA to_NFA(const RegEx ®ex, size_t stop_id = 1)
Simple conversion of RegEx to NFA (mostly implemented in RegEx)
-
static DFA to_DFA(const RegEx ®ex)
Conversion of RegEx to DFA, via NFA intermediate.
-
class RegEx
#include <RegEx.hpp>
A basic regular expression handler.
Public Functions
-
RegEx() = delete
-
inline RegEx(const std::string &r)
-
inline RegEx(const RegEx &r)
-
inline ~RegEx()
-
inline RegEx &operator=(const RegEx &r)
Set this RegEx equal to another.
-
inline std::string AsString() const
Convert the RegEx to an standard string, readable from outsite this class.
-
inline void AddToNFA(NFA &nfa, size_t start, size_t stop) const
Add this regex to an NFA being built.
-
void Generate() const
Assume the RegEx is ready and setup processing for it.
-
inline bool Test(const std::string &str) const
Test if a string statisfies this regex.
-
inline void PrintInternal() const
For debugging: print the internal representation of the regex.
-
inline void PrintNotes() const
For debugging: print any internal notes generated about this regex.
-
inline void PrintDebug() const
Print general debuging information about this regex.
Private Functions
-
template<typename ...T>
inline void Error(T&&... args)
-
inline bool EnsureNext(char x)
Make sure that there is another element in the RegEx (e.g., after an ‘|’) or else trigger and error to report the problem.
-
inline Ptr<re_charset> ConstructSet()
Construct a character range.
-
inline Ptr<re_string> ConstructString()
Construct a string, loading everything needed.
-
inline Ptr<re_base> ConstructSegment()
Should only be called when we know we have a single unit to produce. Build and return it.
-
inline Ptr<re_block> Process(Ptr<re_block> cur_block = nullptr)
Process the input regex into a tree representaion.
Private Members
-
std::string regex
Original string to define this RegEx.
-
vector<std::string> notes
Any warnings or errors would be provided here.
-
bool valid = true
Set to false if regex cannot be processed.
-
size_t pos = 0
Position being read in regex.
-
mutable DFA dfa
DFA that this RegEx translates to.
-
mutable bool dfa_ready = false
Is the dfa ready? (or does it need to be generated?)
-
re_block head
Private Static Attributes
-
static constexpr size_t NUM_SYMBOLS = 128
Maximum number of symbol the RegEx can handle.
-
struct re_base
Internal base representation of a portion of a regex.
Public Functions
-
inline virtual ~re_base()
-
inline virtual void Print(std::ostream &os) const
-
inline virtual Ptr<re_block> AsBlock()
-
inline virtual Ptr<re_charset> AsCharSet()
-
inline virtual Ptr<re_parent> AsParent()
-
inline virtual Ptr<re_string> AsString()
-
inline virtual size_t GetSize() const
-
inline virtual bool Simplify()
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const
-
struct re_block : public RegEx::re_parent
Representation of a series of components…
Public Functions
-
inline void Print(std::ostream &os) const override
-
inline Ptr<re_block> AsBlock() override
-
inline bool Simplify() override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
-
struct re_charset : public RegEx::re_base
Representation of a character set e.g., [abc].
Public Functions
-
inline re_charset()
-
inline re_charset(char x, bool neg = false)
-
inline re_charset(const std::string &s, bool neg = false)
-
inline void Print(std::ostream &os) const override
-
inline Ptr<re_charset> AsCharSet() override
-
inline size_t GetSize() const override
-
inline char First() const
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
Public Members
-
opts_t char_set
-
struct re_or : public RegEx::re_parent
Representation of two options in a regex, e.g., a|b.
Public Functions
-
inline re_or(Ptr<re_base> l, Ptr<re_base> r)
-
inline void Print(std::ostream &os) const override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
-
struct re_parent : public RegEx::re_base
Intermediate base class for RegEx components that have children (such as “and” and “or”)
Public Functions
-
inline re_parent()
-
inline ~re_parent()
-
inline void Clear()
-
inline virtual void push(Ptr<re_base> x)
-
inline Ptr<re_base> pop()
-
inline size_t GetSize() const override
-
inline Ptr<re_parent> AsParent() override
-
inline bool Simplify() override
-
struct re_plus : public RegEx::re_parent
Representations of one-or-more instances of a component. e.g., a+.
Public Functions
-
inline re_plus(Ptr<re_base> c)
-
inline void Print(std::ostream &os) const override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
-
struct re_qm : public RegEx::re_parent
Representations of zero-or-one instances of a component. e.g., a?
Public Functions
-
inline re_qm(Ptr<re_base> c)
-
inline void Print(std::ostream &os) const override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
-
struct re_star : public RegEx::re_parent
Representations of zero-or-more instances of a component. e.g., a*.
Public Functions
-
inline re_star(Ptr<re_base> c)
-
inline void Print(std::ostream &os) const override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
-
struct re_string : public RegEx::re_base
Representation of strings stored in a RegEx.
Public Functions
-
inline re_string()
-
inline re_string(char c)
-
inline re_string(const std::string &s)
-
inline void Print(std::ostream &os) const override
-
inline Ptr<re_string> AsString() override
-
inline size_t GetSize() const override
-
inline virtual void AddToNFA(NFA &nfa, size_t start, size_t stop) const override
Public Members
-
std::string str