#include <cmt_regexp.h>
Collaboration diagram for cmt_regexp:
Public Methods | |
cmt_regexp (const cmt_string& expression) | |
~cmt_regexp () | |
bool | is_valid () const |
iterator | begin (const cmt_string& text, int pos = 0) |
iterator | end () |
iterator | begin (const cmt_string& text, int pos = 0) const |
iterator | end () const |
bool | match (const cmt_string& text) const |
Private Attributes | |
cmt_node* | _root |
|
Definition at line 949 of file cmt_regexp.cxx. 00950 { 00951 _root = 0; 00952 00953 // 00954 // The root is the cmt_or_node which will be returned. It is 00955 // the top of the hierarchy. 00956 // 00957 // top is the running cmt_and_node. 00958 // 00959 cmt_node_set* or_root = 0; 00960 cmt_node_set* top_and = 0; 00961 00962 // abcd 00963 // ab|cd 00964 // a|b|cd 00965 // a|b*|cd 00966 // a|b*|cd?e 00967 // 00968 // exp : and 00969 // | exp '|' and 00970 // 00971 // and : unary 00972 // | unary and 00973 // 00974 // unary : primary '*' 00975 // | primary '?' 00976 // 00977 // primary : '[' opt_begin opt_chars opt_end ']' 00978 // | '^' 00979 // | '$' 00980 // | char 00981 // | '(' exp ')' 00982 // 00983 00984 { 00985 // 00986 // First we build an cmt_or_node (corresponding to the 00987 // first grammatical rule) 00988 // 00989 // Then cmt_and_nodes are pushed into it. 00990 // and standard nodes are pushed into the running (top_and) cmt_and_node 00991 // 00992 or_root = new cmt_or_node (0); 00993 top_and = new cmt_and_node (or_root); 00994 } 00995 00996 int i; 00997 00998 for (i = 0; i < expression.size (); i++) 00999 { 01000 char c = expression[i]; 01001 switch (c) 01002 { 01003 case '[': 01004 { 01005 // 01006 // The case is 01007 // 01008 // exp : '[' char ... ']' 01009 // exp : '[' '^' char ... ']' 01010 // 01011 01012 if (i >= expression.size ()) 01013 { 01014 // syntax error : unbalanced '[' 01015 delete or_root; 01016 return; 01017 } 01018 i++; 01019 01020 int i0 = i; 01021 01022 bool done = false; 01023 bool has_not = false; 01024 01025 cmt_string choices = ""; 01026 01027 for (; i < expression.size (); i++) 01028 { 01029 c = expression[i]; 01030 switch (c) 01031 { 01032 case ']': 01033 done = true; 01034 break; 01035 case '^': 01036 if (i == i0) has_not = true; 01037 else choices += c; 01038 break; 01039 case '\\': 01040 choices += c; 01041 if (i >= expression.size ()) 01042 { 01043 // syntax error : unbalanced '[' and unfinished 01044 // escape sequence 01045 delete or_root; 01046 return; 01047 } 01048 i++; 01049 c = expression[i]; 01050 choices += c; 01051 break; 01052 default: 01053 choices += c; 01054 break; 01055 } 01056 if (done) break; 01057 } 01058 01059 if (!done) 01060 { 01061 // syntax error : unbalanced '[' 01062 delete or_root; 01063 return; 01064 } 01065 if (has_not) 01066 top_and->push (new cmt_not_char_list_node (choices)); 01067 else 01068 top_and->push (new cmt_char_list_node (choices)); 01069 } 01070 break; 01071 case '*': 01072 { 01073 // 01074 // exp : exp '*' 01075 // 01076 if (top_and->nodes () == 0) 01077 { 01078 // Syntax error : '*' is not preceded by an expression 01079 delete or_root; 01080 return; 01081 } 01082 01083 cmt_node* n = top_and->pop (); 01084 top_and->push (new cmt_zero_more (n)); 01085 } 01086 break; 01087 case '+': 01088 { 01089 // 01090 // exp : exp '+' 01091 // 01092 if (top_and->nodes () == 0) 01093 { 01094 // Syntax error : '+' is not preceded by an expression 01095 delete or_root; 01096 return; 01097 } 01098 01099 cmt_node* n = top_and->pop (); 01100 top_and->push (new cmt_one_more (n)); 01101 } 01102 break; 01103 case '?': 01104 { 01105 // 01106 // exp : exp '?' 01107 // 01108 if (top_and->nodes () == 0) 01109 { 01110 // Syntax error : '?' is not preceded by an expression 01111 delete or_root; 01112 return; 01113 } 01114 01115 cmt_node* n = top_and->pop (); 01116 top_and->push (new cmt_zero_one (n)); 01117 } 01118 break; 01119 case '.': 01120 // 01121 // exp : '.' 01122 // 01123 top_and->push (new cmt_any_node ()); 01124 break; 01125 case '(': 01126 { 01127 // 01128 // exp : '(' exp ')' 01129 // 01130 if (top_and->parentheses ()) 01131 { 01132 // This should never happen. 01133 delete or_root; 01134 return; 01135 } 01136 01137 top_and->set_parentheses (true); 01138 01139 // 01140 // A new complete expression is started. 01141 // -> do as for top_and parsing. 01142 // 01143 01144 top_and = new cmt_and_node (new cmt_or_node (top_and)); 01145 } 01146 break; 01147 case ')': 01148 { 01149 // 01150 // exp : '(' exp ')' 01151 // 01152 01153 // top_and is the cmt_and_node into which new nodes are pushed. 01154 cmt_node_set* or_node = top_and->father (); 01155 if (or_node == 0) 01156 { 01157 // This should never happen : top_and should always be 01158 // at least an cmt_and_node hanging at an cmt_or_node 01159 delete or_root; 01160 return; 01161 } 01162 01163 // 01164 // The last cmt_and_node was empty, thus we had either '()' or '(...|)' 01165 // 01166 01167 if (top_and->nodes () == 0) 01168 { 01169 delete (or_node->pop ()); 01170 } 01171 else 01172 { 01173 top_and->reduce (); 01174 } 01175 01176 top_and = or_node->father (); 01177 01178 if (top_and == 0) 01179 { 01180 // Syntax error : too many ')' 01181 delete or_root; 01182 return; 01183 } 01184 01185 // 01186 // top_and is now the previous running cmt_and_node where the '(' 01187 // was originally met its top_and node contains the parenthesized 01188 // sub expression If this one is empty, (due to an empty '()' 01189 // expression) then it may simply be discarded. 01190 // 01191 01192 if (!top_and->parentheses ()) 01193 { 01194 // Syntax error : too many ')' 01195 delete or_root; 01196 return; 01197 } 01198 01199 top_and->set_parentheses (false); 01200 01201 cmt_node* unique = 0; 01202 if (or_node->nodes () == 1) 01203 { 01204 cmt_node_set* and_node = (cmt_node_set*) or_node->top (); 01205 if (and_node->nodes () == 1) 01206 { 01207 unique = and_node->pop (); 01208 delete (or_node->pop ()); 01209 } 01210 else if (and_node->nodes () == 0) 01211 { 01212 delete (or_node->pop ()); 01213 } 01214 } 01215 01216 if (or_node->nodes () == 0) delete (top_and->pop ()); 01217 if (unique != 0) top_and->push (unique); 01218 } 01219 01220 break; 01221 case '|': 01222 { 01223 // 01224 // exp : exp '|' exp 01225 // 01226 01227 cmt_node_set* or_node = top_and->father (); 01228 01229 top_and->reduce (); 01230 01231 // 01232 // or is the father cmt_or_node, which only contains cmt_and_nodes 01233 // 01234 01235 const cmt_node_set* and_node = (cmt_node_set*) or_node->top (); 01236 if (and_node->nodes () == 0) 01237 { 01238 // the previous node was empty. 01239 // we may discard it 01240 or_node->pop (); 01241 } 01242 01243 top_and = new cmt_and_node (or_node); 01244 } 01245 break; 01246 case '^': 01247 // 01248 // exp : '^' 01249 // 01250 top_and->push (new cmt_begin_node ()); 01251 break; 01252 case '$': 01253 // 01254 // exp : '$' 01255 // 01256 top_and->push (new cmt_end_node ()); 01257 break; 01258 case '\\': 01259 if (i >= expression.size ()) 01260 { 01261 delete or_root; 01262 return; 01263 } 01264 i++; 01265 c = expression[i]; 01266 switch (c) 01267 { 01268 case '[': 01269 case ']': 01270 case '(': 01271 case ')': 01272 case '.': 01273 case '*': 01274 case '?': 01275 case '^': 01276 case '$': 01277 case '\\': 01278 break; 01279 case 'r': 01280 c = '\r'; 01281 break; 01282 case 't': 01283 c = '\t'; 01284 break; 01285 case 'n': 01286 c = '\n'; 01287 break; 01288 default: 01289 break; 01290 } 01291 default: 01292 top_and->push (new cmt_char_node (c)); 01293 break; 01294 } 01295 } 01296 01297 if (or_root != 0) 01298 { 01299 cmt_node_set* and_node = (cmt_node_set*) or_root->top (); 01300 01301 if (or_root->nodes () == 1) 01302 { 01303 // 01304 // Check whether there is at least one non-empty 01305 // cmt_and_node 01306 // 01307 if (and_node->nodes () == 0) 01308 { 01309 delete or_root; 01310 return; 01311 } 01312 } 01313 01314 if (and_node != 0) 01315 { 01316 and_node->reduce (); 01317 01318 if (and_node->parentheses ()) 01319 { 01320 delete or_root; 01321 return; 01322 } 01323 } 01324 } 01325 01326 _root = or_root; 01327 } |
|
Definition at line 1329 of file cmt_regexp.cxx. 01330 { 01331 if (_root != 0) 01332 { 01333 delete _root; 01334 } 01335 } |
|
Definition at line 1364 of file cmt_regexp.cxx. 01365 { 01366 if (_root != 0) 01367 { 01368 int i; 01369 01370 for (i = pos; i < text.size (); i++) 01371 { 01372 cmt_regexp::iterator it = _root->match (text, i); 01373 if (it != end ()) return (it); 01374 } 01375 } 01376 01377 return (end ()); 01378 } |
|
Definition at line 1343 of file cmt_regexp.cxx. Referenced by match(). 01344 { 01345 if (_root != 0) 01346 { 01347 int i; 01348 01349 for (i = pos; i < text.size (); i++) 01350 { 01351 cmt_regexp::iterator it = _root->match (text, i); 01352 if (it != end ()) return (it); 01353 } 01354 } 01355 01356 return (end ()); 01357 } |
|
Definition at line 1380 of file cmt_regexp.cxx. 01381 { 01382 return (cmt_regexp::iterator::null ()); 01383 } |
|
Definition at line 1359 of file cmt_regexp.cxx. Referenced by begin(), and match(). 01360 { 01361 return (cmt_regexp::iterator::null ()); 01362 } |
|
Definition at line 1337 of file cmt_regexp.cxx. 01338 { 01339 if (_root != 0) return (true); 01340 else return (false); 01341 } |
|
Definition at line 1385 of file cmt_regexp.cxx. Referenced by CvsImplementation::filter_list(), CvsImplementation::match_version_request(), Parser::parse_line(), PAwk::run(), Awk::run(), and CmtSystem::scan_dir(). |
|
Definition at line 43 of file cmt_regexp.h. |