| 1 | // |
|---|
| 2 | // regex.hpp 1.0 Copyright (c) 2003 Peter Petersen (pp@on-time.de) |
|---|
| 3 | // Simple C++ wrapper for PCRE |
|---|
| 4 | // |
|---|
| 5 | // This source file is freeware. You may use it for any purpose without |
|---|
| 6 | // restriction except that the copyright notice as the top of this file as |
|---|
| 7 | // well as this paragraph may not be removed or altered. |
|---|
| 8 | // |
|---|
| 9 | // This header file declares class RegEx, a simple and small API wrapper |
|---|
| 10 | // for PCRE. |
|---|
| 11 | // |
|---|
| 12 | // RegEx::RegEx(const char * regex, int options = 0) |
|---|
| 13 | // |
|---|
| 14 | // The constructor's first parameter is the regular expression the |
|---|
| 15 | // created object shall implement. Optional parameter options can be |
|---|
| 16 | // any combination of PCRE options accepted by pcre_compile(). If |
|---|
| 17 | // compiling the regular expression fails, an error message string is |
|---|
| 18 | // thrown as an exception. |
|---|
| 19 | // |
|---|
| 20 | // RegEx::~RegEx() |
|---|
| 21 | // |
|---|
| 22 | // The destructor frees all resources held by the RegEx object. |
|---|
| 23 | // |
|---|
| 24 | // int RegEx::SubStrings(void) const |
|---|
| 25 | // |
|---|
| 26 | // Method SubStrings() returns the number of substrings defined by |
|---|
| 27 | // the regular expression. The match of the entire expression is also |
|---|
| 28 | // considered a substring, so the return value will always be >= 1. |
|---|
| 29 | // |
|---|
| 30 | // bool RegEx::Search(const char * subject, int len = -1, int options = 0) |
|---|
| 31 | // |
|---|
| 32 | // Method Search() applies the regular expression to parameter subject. |
|---|
| 33 | // Optional parameter len can be used to pass the subject's length to |
|---|
| 34 | // Search(). If not specified (or less than 0), strlen() is used |
|---|
| 35 | // internally to determine the length. Parameter options can contain |
|---|
| 36 | // any combination of options PCRE_ANCHORED, PCRE_NOTBOL, PCRE_NOTEOL. |
|---|
| 37 | // PCRE_NOTEMPTY. Search() returns true if a match is found. |
|---|
| 38 | // |
|---|
| 39 | // bool RegEx::SearchAgain(int options = 0) |
|---|
| 40 | // |
|---|
| 41 | // SearchAgain() again applies the regular expression to parameter |
|---|
| 42 | // subject last passed to a successful call of Search(). It returns |
|---|
| 43 | // true if a further match is found. Subsequent calls to SearchAgain() |
|---|
| 44 | // will find all matches in subject. Example: |
|---|
| 45 | // |
|---|
| 46 | // if (Pattern.Search(astring)) { |
|---|
| 47 | // do { |
|---|
| 48 | // printf("%s\n", Pattern.Match()); |
|---|
| 49 | // } while (Pattern.SearchAgain()); |
|---|
| 50 | // } |
|---|
| 51 | // |
|---|
| 52 | // Parameter options is interpreted as for method Search(). |
|---|
| 53 | // |
|---|
| 54 | // const char * RegEx::Match(int i = 1) |
|---|
| 55 | // |
|---|
| 56 | // Method Match() returns a pointer to the matched substring specified |
|---|
| 57 | // with parameter i. Match() may only be called after a successful |
|---|
| 58 | // call to Search() or SearchAgain() and applies to that last |
|---|
| 59 | // Search()/SearchAgain() call. Parameter i must be less than |
|---|
| 60 | // SubStrings(). Match(-1) returns the last searched subject. |
|---|
| 61 | // Match(0) returns the match of the complete regular expression. |
|---|
| 62 | // Match(1) returns $1, etc. |
|---|
| 63 | // |
|---|
| 64 | // The bottom of this file contains an example using class RegEx. It's |
|---|
| 65 | // the simplest version of grep I could come with. You can compile it by |
|---|
| 66 | // defining REGEX_DEMO on the compiler command line. |
|---|
| 67 | // |
|---|
| 68 | |
|---|
| 69 | #ifndef _REGEX_H |
|---|
| 70 | #define _REGEX_H |
|---|
| 71 | |
|---|
| 72 | #include <string.h> |
|---|
| 73 | |
|---|
| 74 | #ifndef _PCRE_H |
|---|
| 75 | #include "pcre.h" |
|---|
| 76 | #endif |
|---|
| 77 | |
|---|
| 78 | class RegEx |
|---|
| 79 | { |
|---|
| 80 | public: |
|---|
| 81 | ///////////////////////////////// |
|---|
| 82 | RegEx(const char * regex, int options = 0) |
|---|
| 83 | { |
|---|
| 84 | const char * error; |
|---|
| 85 | int erroffset; |
|---|
| 86 | |
|---|
| 87 | re = pcre_compile(regex, options, &error, &erroffset, NULL); |
|---|
| 88 | if (re == NULL) |
|---|
| 89 | throw error; |
|---|
| 90 | pe = pcre_study(re, 0, &error); |
|---|
| 91 | pcre_fullinfo(re, pe, PCRE_INFO_CAPTURECOUNT, &substrcount); |
|---|
| 92 | substrcount++; |
|---|
| 93 | ovector = new int[3*substrcount]; |
|---|
| 94 | matchlist = NULL; |
|---|
| 95 | }; |
|---|
| 96 | |
|---|
| 97 | ///////////////////////////////// |
|---|
| 98 | ~RegEx() |
|---|
| 99 | { |
|---|
| 100 | ClearMatchList(); |
|---|
| 101 | delete ovector; |
|---|
| 102 | if (pe) |
|---|
| 103 | pcre_free(pe); |
|---|
| 104 | pcre_free(re); |
|---|
| 105 | } |
|---|
| 106 | |
|---|
| 107 | ///////////////////////////////// |
|---|
| 108 | inline int SubStrings(void) const |
|---|
| 109 | { |
|---|
| 110 | return substrcount; |
|---|
| 111 | } |
|---|
| 112 | |
|---|
| 113 | ///////////////////////////////// |
|---|
| 114 | bool Search(const char * subject, int len = -1, int options = 0) |
|---|
| 115 | { |
|---|
| 116 | ClearMatchList(); |
|---|
| 117 | return pcre_exec(re, pe, lastsubject = subject, slen = (len >= 0) ? len : strlen(subject), 0, options, ovector, 3*substrcount) > 0; |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | ///////////////////////////////// |
|---|
| 121 | bool SearchAgain(int options = 0) |
|---|
| 122 | { |
|---|
| 123 | ClearMatchList(); |
|---|
| 124 | return pcre_exec(re, pe, lastsubject, slen, ovector[1], options, ovector, 3*substrcount) > 0; |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | ///////////////////////////////// |
|---|
| 128 | const char * Match(int i = 1) |
|---|
| 129 | { |
|---|
| 130 | if (i < 0) |
|---|
| 131 | return lastsubject; |
|---|
| 132 | if (matchlist == NULL) |
|---|
| 133 | pcre_get_substring_list(lastsubject, ovector, substrcount, &matchlist); |
|---|
| 134 | return matchlist[i]; |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | private: |
|---|
| 138 | inline void ClearMatchList(void) |
|---|
| 139 | { |
|---|
| 140 | if (matchlist) |
|---|
| 141 | pcre_free_substring_list(matchlist), |
|---|
| 142 | matchlist = NULL; |
|---|
| 143 | } |
|---|
| 144 | pcre * re; |
|---|
| 145 | pcre_extra * pe; |
|---|
| 146 | int substrcount; |
|---|
| 147 | int * ovector; |
|---|
| 148 | const char * lastsubject; |
|---|
| 149 | int slen; |
|---|
| 150 | const char * * matchlist; |
|---|
| 151 | }; |
|---|
| 152 | |
|---|
| 153 | // Below is a little demo/test program using class RegEx |
|---|
| 154 | |
|---|
| 155 | #ifdef REGEX_DEMO |
|---|
| 156 | |
|---|
| 157 | #include <stdio.h> |
|---|
| 158 | #include "regex.hpp" |
|---|
| 159 | |
|---|
| 160 | /////////////////////////////////////// |
|---|
| 161 | int main(int argc, char * argv[]) |
|---|
| 162 | { |
|---|
| 163 | if (argc != 2) |
|---|
| 164 | { |
|---|
| 165 | fprintf(stderr, "Usage: grep pattern\n\n" |
|---|
| 166 | "Reads stdin, searches 'pattern', writes to stdout\n"); |
|---|
| 167 | return 2; |
|---|
| 168 | } |
|---|
| 169 | try |
|---|
| 170 | { |
|---|
| 171 | RegEx Pattern(argv[1]); |
|---|
| 172 | int count = 0; |
|---|
| 173 | char buffer[1024]; |
|---|
| 174 | |
|---|
| 175 | while (fgets(buffer, sizeof(buffer), stdin)) |
|---|
| 176 | if (Pattern.Search(buffer)) |
|---|
| 177 | fputs(buffer, stdout), |
|---|
| 178 | count++; |
|---|
| 179 | return count == 0; |
|---|
| 180 | } |
|---|
| 181 | catch (const char * ErrorMsg) |
|---|
| 182 | { |
|---|
| 183 | fprintf(stderr, "error in regex '%s': %s\n", argv[1], ErrorMsg); |
|---|
| 184 | return 2; |
|---|
| 185 | } |
|---|
| 186 | } |
|---|
| 187 | |
|---|
| 188 | #endif // REGEX_DEMO |
|---|
| 189 | |
|---|
| 190 | #endif // _REGEX_H |
|---|