Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

cmt_regexp Class Reference

#include <cmt_regexp.h>

Collaboration diagram for cmt_regexp:

Collaboration graph
[legend]
List of all members.

Public Methods

 cmt_regexp (const cmt_string& expression)
 ~cmt_regexp ()
bool is_valid () const
iterator begin (const cmt_string& text, int pos = 0)
iterator end ()
iterator begin (const cmt_string& text, int pos = 0) const
iterator end () const
bool match (const cmt_string& text) const

Private Attributes

cmt_node_root

Constructor & Destructor Documentation

cmt_regexp::cmt_regexp ( const cmt_string & expression )
 

Definition at line 949 of file cmt_regexp.cxx.

00950 {
00951   _root = 0;
00952 
00953     //
00954     // The root is the cmt_or_node which will be returned. It is
00955     // the top of the hierarchy.
00956     //
00957     //  top is the running cmt_and_node.
00958     //
00959   cmt_node_set* or_root = 0;
00960   cmt_node_set* top_and = 0;
00961   
00962     // abcd
00963     // ab|cd
00964     // a|b|cd
00965     // a|b*|cd
00966     // a|b*|cd?e
00967     //
00968     // exp     : and
00969     //         | exp '|' and
00970     //
00971     // and     : unary 
00972     //         | unary and
00973     //
00974     // unary   : primary '*'
00975     //         | primary '?'
00976     //
00977     // primary : '[' opt_begin opt_chars opt_end ']'
00978     //         | '^'
00979     //         | '$'
00980     //         | char
00981     //         | '(' exp ')'
00982     //
00983   
00984   {
00985       //
00986       // First we build an cmt_or_node (corresponding to the
00987       // first grammatical rule)
00988       //
00989       //  Then cmt_and_nodes are pushed into it.
00990       //  and standard nodes are pushed into the running (top_and) cmt_and_node
00991       //
00992     or_root = new cmt_or_node (0);
00993     top_and = new cmt_and_node (or_root);
00994   }
00995   
00996   int i;
00997   
00998   for (i = 0; i < expression.size (); i++)
00999     {
01000       char c = expression[i];
01001       switch (c)
01002         {
01003           case '[':
01004           {
01005               //
01006               // The case is 
01007               //
01008               //  exp   : '['     char ... ']'
01009               //  exp   : '[' '^' char ... ']'
01010               //
01011 
01012             if (i >= expression.size ()) 
01013               {
01014                   // syntax error : unbalanced '['
01015                 delete or_root;
01016                 return;
01017               }
01018             i++;
01019             
01020             int i0 = i;
01021             
01022             bool done = false;
01023             bool has_not = false;
01024             
01025             cmt_string choices = "";
01026             
01027             for (; i < expression.size (); i++)
01028               {
01029                 c = expression[i];
01030                 switch (c)
01031                   {
01032                     case ']':
01033                       done = true;
01034                       break;
01035                     case '^':
01036                       if (i == i0) has_not = true;
01037                       else choices += c;
01038                       break;
01039                     case '\\':
01040                       choices += c;
01041                       if (i >= expression.size ())
01042                         {
01043                             // syntax error : unbalanced '[' and unfinished
01044                             // escape sequence
01045                           delete or_root;
01046                           return;
01047                         }
01048                       i++;
01049                       c = expression[i];
01050                       choices += c;
01051                       break;
01052                     default:
01053                       choices += c;
01054                       break;
01055                   }
01056                 if (done) break;
01057               }
01058             
01059             if (!done)
01060               {
01061                   // syntax error : unbalanced '['
01062                 delete or_root;
01063                 return;
01064               }
01065             if (has_not)
01066               top_and->push (new cmt_not_char_list_node (choices));
01067             else        
01068               top_and->push (new cmt_char_list_node (choices));
01069           }
01070           break;
01071           case '*':
01072           {
01073               //
01074               //  exp : exp '*'
01075               //
01076             if (top_and->nodes () == 0)
01077               {
01078                   // Syntax error : '*' is not preceded by an expression
01079                 delete or_root;
01080                 return;
01081               }
01082             
01083             cmt_node* n = top_and->pop ();
01084             top_and->push (new cmt_zero_more (n));
01085           }
01086           break;
01087           case '+':
01088           {
01089               //
01090               //  exp : exp '+'
01091               //
01092             if (top_and->nodes () == 0)
01093               {
01094                   // Syntax error : '+' is not preceded by an expression
01095                 delete or_root;
01096                 return;
01097               }
01098             
01099             cmt_node* n = top_and->pop ();
01100             top_and->push (new cmt_one_more (n));
01101           }
01102           break;
01103           case '?':
01104           {
01105               //
01106               //  exp : exp '?'
01107               //
01108             if (top_and->nodes () == 0)
01109               {
01110                   // Syntax error : '?' is not preceded by an expression
01111                 delete or_root;
01112                 return;
01113               }
01114             
01115             cmt_node* n = top_and->pop ();
01116             top_and->push (new cmt_zero_one (n));
01117           }
01118           break;
01119           case '.':
01120               //
01121               //  exp : '.'
01122               //
01123             top_and->push (new cmt_any_node ());
01124             break;
01125           case '(':
01126           {
01127               //
01128               //  exp : '(' exp ')'
01129               //
01130             if (top_and->parentheses ())
01131               {
01132                   // This should never happen.
01133                 delete or_root;
01134                 return;
01135               }
01136             
01137             top_and->set_parentheses (true);
01138             
01139               //
01140               // A new complete expression is started.
01141               //  -> do as for top_and parsing.
01142               //
01143             
01144             top_and = new cmt_and_node (new cmt_or_node (top_and));
01145           }
01146           break;
01147           case ')':
01148           {
01149               //
01150               //  exp : '(' exp ')'
01151               //
01152             
01153               // top_and is the cmt_and_node into which new nodes are pushed.
01154             cmt_node_set* or_node = top_and->father ();
01155             if (or_node == 0) 
01156               {
01157                   // This should never happen : top_and should always be
01158                   // at least an cmt_and_node hanging at an cmt_or_node
01159                 delete or_root;
01160                 return;
01161               }
01162             
01163               //
01164               // The last cmt_and_node was empty, thus we had either '()' or '(...|)'
01165               //
01166             
01167             if (top_and->nodes () == 0) 
01168               {
01169                 delete (or_node->pop ());
01170               }
01171             else
01172               {
01173                 top_and->reduce ();
01174               }
01175             
01176             top_and = or_node->father ();
01177             
01178             if (top_and == 0)
01179               {
01180                   // Syntax error : too many ')'
01181                 delete or_root;
01182                 return;
01183               }
01184             
01185               //
01186               // top_and is now the previous running cmt_and_node where the '(' 
01187               // was originally met its top_and node contains the parenthesized 
01188               // sub expression  If this one is empty, (due to an empty '()' 
01189               // expression) then it may simply be discarded.
01190               //
01191             
01192             if (!top_and->parentheses ())
01193               {
01194                   // Syntax error : too many ')'
01195                 delete or_root;
01196                 return;
01197               }
01198             
01199             top_and->set_parentheses (false);
01200             
01201             cmt_node* unique = 0;
01202             if (or_node->nodes () == 1)
01203               {
01204                 cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01205                 if (and_node->nodes () == 1)
01206                   {
01207                     unique = and_node->pop ();
01208                     delete (or_node->pop ());
01209                   }
01210                 else if (and_node->nodes () == 0)
01211                   {
01212                     delete (or_node->pop ());
01213                   }
01214               }
01215             
01216             if (or_node->nodes () == 0) delete (top_and->pop ());
01217             if (unique != 0) top_and->push (unique);
01218           }
01219           
01220           break;
01221           case '|':
01222           {
01223               //
01224               //  exp : exp '|' exp
01225               //
01226 
01227             cmt_node_set* or_node = top_and->father ();
01228             
01229             top_and->reduce ();
01230             
01231               //
01232               // or is the father cmt_or_node, which only contains cmt_and_nodes
01233               //
01234             
01235             const cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01236             if (and_node->nodes () == 0)
01237               {
01238                   // the previous node was empty.
01239                   // we may discard it
01240                 or_node->pop ();
01241               }
01242             
01243             top_and = new cmt_and_node (or_node);
01244           }
01245           break;
01246           case '^':
01247               //
01248               //  exp : '^'
01249               //
01250             top_and->push (new cmt_begin_node ());
01251             break;
01252           case '$':
01253               //
01254               //  exp : '$'
01255               //
01256             top_and->push (new cmt_end_node ());
01257             break;
01258           case '\\':
01259             if (i >= expression.size ())
01260               {
01261                 delete or_root;
01262                 return;
01263               }
01264             i++;
01265             c = expression[i];
01266             switch (c)
01267               {
01268                 case '[':
01269                 case ']':
01270                 case '(':
01271                 case ')':
01272                 case '.':
01273                 case '*':
01274                 case '?':
01275                 case '^':
01276                 case '$':
01277                 case '\\':
01278                   break;
01279                 case 'r':
01280                   c = '\r';
01281                   break;
01282                 case 't':
01283                   c = '\t';
01284                   break;
01285                 case 'n':
01286                   c = '\n';
01287                   break;
01288                 default:
01289                   break;
01290               }
01291           default:
01292             top_and->push (new cmt_char_node (c));
01293             break;
01294         }
01295     }
01296   
01297   if (or_root != 0)
01298     {
01299       cmt_node_set* and_node = (cmt_node_set*) or_root->top ();
01300       
01301       if (or_root->nodes () == 1)
01302         {
01303             //
01304             // Check whether there is at least one non-empty
01305             // cmt_and_node
01306             //
01307           if (and_node->nodes () == 0)
01308             {
01309               delete or_root;
01310               return;
01311             }
01312         }
01313       
01314       if (and_node != 0)
01315         {
01316           and_node->reduce ();
01317           
01318           if (and_node->parentheses ())
01319             {
01320               delete or_root;
01321               return;
01322             }
01323         }
01324     }
01325   
01326   _root = or_root;
01327 }

cmt_regexp::~cmt_regexp ( )
 

Definition at line 1329 of file cmt_regexp.cxx.

01330 {
01331   if (_root != 0)
01332     {
01333       delete _root;
01334     }
01335 }


Member Function Documentation

cmt_regexp::iterator cmt_regexp::begin ( const cmt_string & text,
int pos = 0 ) const
 

Definition at line 1364 of file cmt_regexp.cxx.

01365 {
01366   if (_root != 0)
01367     {
01368       int i;
01369       
01370       for (i = pos; i < text.size (); i++)
01371         {
01372           cmt_regexp::iterator it = _root->match (text, i);
01373           if (it != end ()) return (it);
01374         }
01375     }
01376   
01377   return (end ());
01378 }

cmt_regexp::iterator cmt_regexp::begin ( const cmt_string & text,
int pos = 0 )
 

Definition at line 1343 of file cmt_regexp.cxx.

Referenced by match().

01344 {
01345   if (_root != 0)
01346     {
01347       int i;
01348       
01349       for (i = pos; i < text.size (); i++)
01350         {
01351           cmt_regexp::iterator it = _root->match (text, i);
01352           if (it != end ()) return (it);
01353         }
01354     }
01355   
01356   return (end ());
01357 }

cmt_regexp::iterator cmt_regexp::end ( ) const
 

Definition at line 1380 of file cmt_regexp.cxx.

01381 {
01382   return (cmt_regexp::iterator::null ());
01383 }

cmt_regexp::iterator cmt_regexp::end ( )
 

Definition at line 1359 of file cmt_regexp.cxx.

Referenced by begin(), and match().

01360 {
01361   return (cmt_regexp::iterator::null ());
01362 }

bool cmt_regexp::is_valid ( ) const
 

Definition at line 1337 of file cmt_regexp.cxx.

01338 {
01339   if (_root != 0) return (true);
01340   else return (false);
01341 }

bool cmt_regexp::match ( const cmt_string & text ) const
 

Definition at line 1385 of file cmt_regexp.cxx.

Referenced by CvsImplementation::filter_list(), CvsImplementation::match_version_request(), Parser::parse_line(), PAwk::run(), Awk::run(), and CmtSystem::scan_dir().

01386 {
01387   iterator it = begin (text);
01388   if (it == end ()) return (false);
01389   else return (true);
01390 }


Member Data Documentation

cmt_node * cmt_regexp::_root [private]
 

Definition at line 43 of file cmt_regexp.h.


The documentation for this class was generated from the following files:
Generated at Thu May 16 16:27:43 2002 for CMT by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000