00001
00002 #include "cmt_std.h"
00003 #include "cmt_regexp.h"
00004 #include "cmt_vector.h"
00005
00006
00007
00008
00009
00010
00011
00012
00013 class cmt_node
00014 {
00015 public:
00016 static cmt_node& null ();
00017 static int node_count ();
00018
00019 cmt_node ();
00020 virtual ~cmt_node ();
00021
00022 virtual const cmt_regexp::iterator match (const cmt_string& text,
00023 int pos) const;
00024 virtual bool is_char () const;
00025 virtual bool is_many_node () const;
00026
00027 private:
00028 static int _node_count;
00029 };
00030
00031
00032
00033 class cmt_char_node : public cmt_node
00034 {
00035 public:
00036 cmt_char_node (char c);
00037
00038 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00039
00040 bool is_char () const;
00041 operator char ();
00042
00043 private:
00044 char _c;
00045 };
00046
00047
00048
00049 class cmt_string_node : public cmt_node
00050 {
00051 public:
00052 cmt_string_node (const cmt_string& s);
00053
00054 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00055
00056 private:
00057 cmt_string _s;
00058 };
00059
00060
00061
00062 class cmt_char_list_node : public cmt_node
00063 {
00064 public:
00065 cmt_char_list_node (cmt_string list);
00066
00067 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00068
00069 protected:
00070 cmt_string _list;
00071 cmt_string _choices;
00072 };
00073
00074
00075
00076 class cmt_not_char_list_node : public cmt_char_list_node
00077 {
00078 public:
00079 cmt_not_char_list_node (cmt_string list);
00080
00081 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00082 };
00083
00084
00085
00086 class cmt_any_node : public cmt_node
00087 {
00088 public:
00089 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00090 };
00091
00092
00093
00094 class cmt_zero_one : public cmt_node
00095 {
00096 public:
00097 cmt_zero_one (cmt_node* n);
00098 ~cmt_zero_one ();
00099
00100 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00101
00102 protected:
00103 cmt_node* _node;
00104 };
00105
00106
00107
00108 class cmt_begin_node : public cmt_node
00109 {
00110 public:
00111 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00112 };
00113
00114
00115
00116 class cmt_end_node : public cmt_node
00117 {
00118 public:
00119 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00120 };
00121
00122
00123
00124 class cmt_node_set : public cmt_node
00125 {
00126 public:
00127 cmt_node_set ();
00128 cmt_node_set (cmt_node_set* father);
00129 ~cmt_node_set ();
00130
00131 cmt_node_set* father ();
00132 void clear ();
00133 void push (cmt_node* n);
00134 cmt_node* pop ();
00135 cmt_node* top () const;
00136 int nodes () const;
00137 const cmt_node* nodeAt (int index) const;
00138 bool parentheses () const;
00139 void set_parentheses (bool value);
00140 virtual void reduce ();
00141
00142 protected:
00143 cmt_node_set* _father;
00144 cmt_vector<cmt_node*> _nodes;
00145 bool _parentheses;
00146 };
00147
00148
00149
00150 class cmt_and_node : public cmt_node_set
00151 {
00152 public:
00153 cmt_and_node ();
00154 cmt_and_node (cmt_node_set* father);
00155
00156 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00157 void reduce ();
00158 void fill (cmt_and_node& other, int start_index);
00159 };
00160
00161
00162
00163 class cmt_or_node : public cmt_node_set
00164 {
00165 public:
00166 cmt_or_node (cmt_node_set* father);
00167
00168 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00169 };
00170
00171
00172
00173 class cmt_many_node : public cmt_node
00174 {
00175 public:
00176 bool is_many_node () const;
00177 void install (cmt_and_node& other, int index);
00178 void reduce ();
00179
00180 protected:
00181 cmt_many_node (cmt_node* n);
00182 virtual ~cmt_many_node ();
00183 cmt_node* _node;
00184 cmt_and_node _follower;
00185 };
00186
00187
00188
00189 class cmt_zero_more : public cmt_many_node
00190 {
00191 public:
00192 cmt_zero_more (cmt_node* n);
00193
00194 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00195 };
00196
00197
00198
00199 class cmt_one_more : public cmt_many_node
00200 {
00201 public:
00202 cmt_one_more (cmt_node* n);
00203
00204 const cmt_regexp::iterator match (const cmt_string& text, int pos) const;
00205 };
00206
00207
00208
00209 cmt_node& cmt_node::null ()
00210 {
00211 static cmt_node null_instance;
00212
00213 return (null_instance);
00214 }
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226 cmt_node::cmt_node ()
00227 {
00228 _node_count++;
00229 }
00230
00231 cmt_node::~cmt_node ()
00232 {
00233 _node_count--;
00234 }
00235
00236 int cmt_node::node_count ()
00237 {
00238 return (_node_count);
00239 }
00240
00241 const cmt_regexp::iterator cmt_node::match (const cmt_string& ,
00242 int ) const
00243 {
00244 return (cmt_regexp::iterator::null());
00245 }
00246
00247 bool cmt_node::is_char () const
00248 {
00249 return (false);
00250 }
00251
00252 bool cmt_node::is_many_node () const
00253 {
00254 return (false);
00255 }
00256
00257 int cmt_node::_node_count = 0;
00258
00259
00260
00261 cmt_char_node::cmt_char_node (char c)
00262 {
00263 _c = c;
00264 }
00265
00266 const cmt_regexp::iterator cmt_char_node::match (const cmt_string& text,
00267 int pos) const
00268 {
00269 if ((pos < 0) || (pos > text.size ()))
00270 {
00271 return (cmt_regexp::iterator::null ());
00272 }
00273
00274 char c = text[pos];
00275
00276 if (c == _c)
00277 {
00278 return (cmt_regexp::iterator (pos, 1));
00279 }
00280
00281 return (cmt_regexp::iterator::null ());
00282 }
00283
00284 bool cmt_char_node::is_char () const
00285 {
00286 return (true);
00287 }
00288
00289 cmt_char_node::operator char ()
00290 {
00291 return (_c);
00292 }
00293
00294
00295
00296 cmt_string_node::cmt_string_node (const cmt_string& s)
00297 {
00298 _s = s;
00299 }
00300
00301 const cmt_regexp::iterator cmt_string_node::match (const cmt_string& text,
00302 int pos) const
00303 {
00304 if ((pos < 0) || (pos > text.size ()))
00305 {
00306 return (cmt_regexp::iterator::null ());
00307 }
00308
00309 int length = _s.size ();
00310
00311 cmt_string s = text.substr (pos, length);
00312
00313 if ((length == 0) || (s == _s))
00314 {
00315 return (cmt_regexp::iterator (pos, length));
00316 }
00317
00318 return (cmt_regexp::iterator::null ());
00319 }
00320
00321
00322
00323 cmt_char_list_node::cmt_char_list_node (cmt_string list)
00324 {
00325 _list = list;
00326
00327 _choices = "";
00328
00329 char c;
00330 int i;
00331
00332 for (i = 0; i < list.size (); i++)
00333 {
00334 c = list[i];
00335
00336 switch (c)
00337 {
00338 case '-':
00339 i++;
00340 {
00341 char c1 = _choices[_choices.size () - 1];
00342 char c2 = list[i];
00343 int j;
00344 int j0 = (c1 < c2) ? c1 : c2;
00345 int j1 = (c1 > c2) ? c1 : c2;
00346 for (j = j0; j <= j1; j++)
00347 {
00348 _choices += j;
00349 }
00350 }
00351 break;
00352 case '\\':
00353 i++;
00354 c = list[i];
00355 switch (c)
00356 {
00357 case '[':
00358 case ']':
00359 case '(':
00360 case ')':
00361 case '.':
00362 case '*':
00363 case '?':
00364 case '^':
00365 case '$':
00366 case '\\':
00367 break;
00368 case 'r':
00369 c = '\r';
00370 break;
00371 case 't':
00372 c = '\t';
00373 break;
00374 case 'n':
00375 c = '\n';
00376 break;
00377 default:
00378 break;
00379 }
00380 default:
00381 _choices += c;
00382 break;
00383 }
00384 }
00385 }
00386
00387 const cmt_regexp::iterator cmt_char_list_node::match (const cmt_string& text,
00388 int pos) const
00389 {
00390 if ((pos < 0) || (pos > text.size ()))
00391 {
00392 return (cmt_regexp::iterator::null ());
00393 }
00394
00395 char c = text[pos];
00396
00397 int i;
00398
00399 for (i = 0; i < _choices.size (); i++)
00400 {
00401 if (c == _choices[i]) return (cmt_regexp::iterator (pos, 1));
00402 }
00403
00404 return (cmt_regexp::iterator::null ());
00405 }
00406
00407
00408
00409 cmt_not_char_list_node::cmt_not_char_list_node (cmt_string list) :
00410 cmt_char_list_node (list)
00411 {
00412 }
00413
00414 const cmt_regexp::iterator cmt_not_char_list_node::match (const cmt_string& text,
00415 int pos) const
00416 {
00417 if ((pos < 0) || (pos > text.size ()))
00418 {
00419 return (cmt_regexp::iterator::null ());
00420 }
00421
00422 char c = text[pos];
00423
00424 int i;
00425
00426 for (i = 0; i < _choices.size (); i++)
00427 {
00428 if (c == _choices[i]) return (cmt_regexp::iterator::null ());
00429 }
00430
00431 return (cmt_regexp::iterator (pos, 1));
00432 }
00433
00434
00435
00436 const cmt_regexp::iterator cmt_any_node::match (const cmt_string& text,
00437 int pos) const
00438 {
00439 if ((pos < 0) | (pos >= text.size ()))
00440 {
00441 return (cmt_regexp::iterator::null ());
00442 }
00443
00444 return (cmt_regexp::iterator (pos, 1));
00445 }
00446
00447
00448
00449 cmt_zero_one::cmt_zero_one (cmt_node* n) : _node (n)
00450 {
00451 }
00452
00453 cmt_zero_one::~cmt_zero_one ()
00454 {
00455 delete _node;
00456 }
00457
00458 const cmt_regexp::iterator cmt_zero_one::match (const cmt_string& text,
00459 int pos) const
00460 {
00461 if ((pos < 0) || (pos > text.size ()))
00462 {
00463 return (cmt_regexp::iterator::null ());
00464 }
00465
00466 int total = 0;
00467
00468 if (pos < text.size ())
00469 {
00470 const cmt_regexp::iterator it = _node->match (text, pos);
00471 if (it != cmt_regexp::iterator::null ())
00472 {
00473 total += it._length;
00474 pos += it._length;
00475 }
00476 }
00477
00478 return (cmt_regexp::iterator (pos, total));
00479 }
00480
00481
00482
00483 cmt_many_node::cmt_many_node (cmt_node* n) : _node (n)
00484 {
00485 }
00486
00487 bool cmt_many_node::is_many_node () const
00488 {
00489 return (true);
00490 }
00491
00492 cmt_many_node::~cmt_many_node ()
00493 {
00494 delete _node;
00495 }
00496
00497 void cmt_many_node::install (cmt_and_node& other, int start_index)
00498 {
00499 _follower.fill (other, start_index);
00500 }
00501
00502 void cmt_many_node::reduce ()
00503 {
00504 _follower.reduce ();
00505 }
00506
00507
00508
00509
00510
00511 cmt_zero_more::cmt_zero_more (cmt_node* n) : cmt_many_node (n)
00512 {
00513 }
00514
00515 const cmt_regexp::iterator cmt_zero_more::match (const cmt_string& text,
00516 int pos) const
00517 {
00518 if ((pos < 0) || (pos > text.size ()))
00519 {
00520 return (cmt_regexp::iterator::null ());
00521 }
00522
00523 int total = 0;
00524
00525
00526
00527
00528
00529 int saved_pos = -1;
00530 int saved_total = -1;
00531
00532 do
00533 {
00534 const cmt_regexp::iterator itx = _node->match (text, pos);
00535 const cmt_regexp::iterator ity = _follower.match (text, pos);
00536
00537 if ((itx == cmt_regexp::iterator::null ()) &&
00538 (ity == cmt_regexp::iterator::null ()))
00539 {
00540
00541
00542
00543
00544 if (saved_pos >= 0)
00545 {
00546
00547
00548
00549 pos = saved_pos;
00550 total = saved_total;
00551 }
00552 else
00553 {
00554
00555
00556
00557 return (cmt_regexp::iterator::null ());
00558 }
00559
00560 break;
00561 }
00562
00563 if (itx == cmt_regexp::iterator::null ())
00564 {
00565
00566
00567
00568 total += ity._length;
00569 pos += ity._length;
00570 break;
00571 }
00572
00573 if (ity != cmt_regexp::iterator::null ())
00574 {
00575
00576
00577
00578
00579 saved_total = total + ity._length;
00580 saved_pos = pos + ity._length;
00581 }
00582 total += itx._length;
00583 pos += itx._length;
00584 } while (true);
00585
00586 return (cmt_regexp::iterator (pos, total));
00587 }
00588
00589
00590
00591 cmt_one_more::cmt_one_more (cmt_node* n) : cmt_many_node (n)
00592 {
00593 }
00594
00595 const cmt_regexp::iterator cmt_one_more::match (const cmt_string& text,
00596 int pos) const
00597 {
00598 if ((pos < 0) || (pos > text.size ()))
00599 {
00600 return (cmt_regexp::iterator::null ());
00601 }
00602
00603 int total = 0;
00604
00605
00606
00607
00608
00609 int saved_pos = -1;
00610 int saved_total = -1;
00611 bool at_least_one = false;
00612
00613 do
00614 {
00615 const cmt_regexp::iterator itx = _node->match (text, pos);
00616 const cmt_regexp::iterator ity = _follower.match (text, pos);
00617
00618 if ((itx == cmt_regexp::iterator::null ()) &&
00619 (ity == cmt_regexp::iterator::null ()))
00620 {
00621
00622
00623
00624
00625 if (saved_pos >= 0)
00626 {
00627
00628
00629
00630 pos = saved_pos;
00631 total = saved_total;
00632 }
00633 else
00634 {
00635
00636
00637
00638 return (cmt_regexp::iterator::null ());
00639 }
00640
00641 break;
00642 }
00643
00644 if (itx == cmt_regexp::iterator::null ())
00645 {
00646
00647
00648
00649 total += ity._length;
00650 pos += ity._length;
00651 break;
00652 }
00653
00654 if (ity != cmt_regexp::iterator::null ())
00655 {
00656
00657
00658
00659
00660 saved_total = total + ity._length;
00661 saved_pos = pos + ity._length;
00662 }
00663
00664 total += itx._length;
00665 pos += itx._length;
00666
00667 at_least_one = true;
00668 } while (true);
00669
00670 if (!at_least_one) return (cmt_regexp::iterator::null ());
00671
00672 return (cmt_regexp::iterator (pos, total));
00673 }
00674
00675
00676
00677
00678
00679 const cmt_regexp::iterator cmt_begin_node::match (const cmt_string& ,
00680 int pos) const
00681 {
00682 if (pos == 0) return (cmt_regexp::iterator (pos, 0));
00683 return (cmt_regexp::iterator::null ());
00684 }
00685
00686
00687
00688 const cmt_regexp::iterator cmt_end_node::match (const cmt_string& text,
00689 int pos) const
00690 {
00691 if (pos == text.size ()) return (cmt_regexp::iterator (pos, 0));
00692 return (cmt_regexp::iterator::null ());
00693 }
00694
00695
00696
00697 cmt_node_set::cmt_node_set () : _father (0)
00698 {
00699 _parentheses = false;
00700 }
00701
00702 cmt_node_set::cmt_node_set (cmt_node_set* father) : _father (father)
00703 {
00704 if (father != 0) father->push (this);
00705 _parentheses = false;
00706 }
00707
00708 cmt_node_set::~cmt_node_set ()
00709 {
00710 clear ();
00711 }
00712
00713 cmt_node_set* cmt_node_set::father ()
00714 {
00715 return (_father);
00716 }
00717
00718 void cmt_node_set::clear ()
00719 {
00720 int i;
00721
00722 for (i = 0; i < _nodes.size (); i++)
00723 {
00724 cmt_node* n = _nodes[i];
00725 delete n;
00726 }
00727 _nodes.clear ();
00728 }
00729
00730 void cmt_node_set::push (cmt_node* n)
00731 {
00732 _nodes.push_back (n);
00733 }
00734
00735 cmt_node* cmt_node_set::pop ()
00736 {
00737 if (_nodes.size () == 0) return (&cmt_node::null ());
00738
00739 int index = _nodes.size () - 1;
00740
00741 cmt_node* n = _nodes[index];
00742 _nodes.erase (index);
00743
00744 return (n);
00745 }
00746
00747 cmt_node* cmt_node_set::top () const
00748 {
00749 if (_nodes.size () == 0) return (&cmt_node::null ());
00750
00751 int index = _nodes.size () - 1;
00752
00753 cmt_node* n = _nodes[index];
00754
00755 return (n);
00756 }
00757
00758 int cmt_node_set::nodes () const
00759 {
00760 return (_nodes.size ());
00761 }
00762
00763 const cmt_node* cmt_node_set::nodeAt (int index) const
00764 {
00765 return (_nodes[index]);
00766 }
00767
00768 bool cmt_node_set::parentheses () const
00769 {
00770 return (_parentheses);
00771 }
00772
00773 void cmt_node_set::set_parentheses (bool value)
00774 {
00775 _parentheses = value;
00776 }
00777
00778 void cmt_node_set::reduce ()
00779 {
00780 }
00781
00782
00783
00784 cmt_and_node::cmt_and_node () : cmt_node_set ()
00785 {
00786 }
00787
00788 cmt_and_node::cmt_and_node (cmt_node_set* father) : cmt_node_set (father)
00789 {
00790 }
00791
00792 const cmt_regexp::iterator cmt_and_node::match (const cmt_string& text,
00793 int pos) const
00794 {
00795 if ((pos < 0) || (pos > text.size ()))
00796 {
00797 return (cmt_regexp::iterator::null ());
00798 }
00799
00800 if (_nodes.size () == 0) return (cmt_regexp::iterator (pos, 0));
00801
00802 int i;
00803 int total = 0;
00804 int p = pos;
00805
00806 for (i = 0; i < _nodes.size (); i++)
00807 {
00808 cmt_node* n = _nodes[i];
00809
00810 const cmt_regexp::iterator it = n->match (text, p);
00811
00812 if (it == cmt_regexp::iterator::null ()) return (it);
00813
00814 total += it._length;
00815 p += it._length;
00816 }
00817
00818
00819
00820 return (cmt_regexp::iterator (pos, total));
00821 }
00822
00823 void cmt_and_node::reduce ()
00824 {
00825 if (_nodes.size () < 2) return;
00826
00827 char c = ' ';
00828 cmt_string s = "";
00829 cmt_vector<cmt_node*> new_nodes;
00830
00831
00832
00833
00834
00835 for (int i = 0; i <= _nodes.size (); i++)
00836 {
00837 cmt_node* n = 0;
00838
00839 if (i < _nodes.size ()) n = _nodes[i];
00840
00841 if ((i >= _nodes.size ()) || (!n->is_char ()))
00842 {
00843 if (s.size () == 1)
00844 {
00845
00846
00847
00848
00849 new_nodes.push_back (new cmt_char_node (c));
00850 s = "";
00851 }
00852 else if (s.size () > 1)
00853 {
00854
00855
00856
00857
00858 new_nodes.push_back (new cmt_string_node (s));
00859 s = "";
00860 }
00861
00862 if (i >= _nodes.size ()) break;
00863 }
00864
00865 if (n->is_char ())
00866 {
00867
00868
00869
00870 cmt_char_node& cn = *((cmt_char_node*) n);
00871 c = (char) cn;
00872 s += c;
00873 delete n;
00874 _nodes[i] = 0;
00875 }
00876 else if (n->is_many_node ())
00877 {
00878 cmt_many_node& mn = *((cmt_many_node*) n);
00879 mn.install (*this, i + 1);
00880 mn.reduce ();
00881 new_nodes.push_back (n);
00882 break;
00883 }
00884 else
00885 {
00886 new_nodes.push_back (n);
00887 }
00888 }
00889
00890 _nodes = new_nodes;
00891 }
00892
00893 void cmt_and_node::fill (cmt_and_node& other, int start_index)
00894 {
00895 if ((start_index < 0) || (start_index > other.nodes ())) return;
00896
00897 for (int i = start_index; i < other.nodes (); i++)
00898 {
00899 cmt_node* n = other._nodes[i];
00900 push (n);
00901 }
00902 }
00903
00904
00905
00906 cmt_or_node::cmt_or_node (cmt_node_set* father) : cmt_node_set (father)
00907 {
00908 }
00909
00910 const cmt_regexp::iterator cmt_or_node::match (const cmt_string& text,
00911 int pos) const
00912 {
00913 if ((pos < 0) || (pos >= text.size ()))
00914 {
00915 return (cmt_regexp::iterator::null ());
00916 }
00917
00918 if (_nodes.size () == 0) return (cmt_regexp::iterator (pos, 0));
00919
00920 int i;
00921
00922 for (i = 0; i < _nodes.size (); i++)
00923 {
00924 const cmt_node* n = _nodes[i];
00925
00926 const cmt_regexp::iterator it = n->match (text, pos);
00927
00928
00929 if (it != cmt_regexp::iterator::null ()) return (it);
00930 }
00931
00932 return (cmt_regexp::iterator::null ());
00933 }
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00946
00947
00948
00949 cmt_regexp::cmt_regexp (const cmt_string& expression)
00950 {
00951 _root = 0;
00952
00953
00954
00955
00956
00957
00958
00959 cmt_node_set* or_root = 0;
00960 cmt_node_set* top_and = 0;
00961
00962
00963
00964
00965
00966
00967
00968
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984 {
00985
00986
00987
00988
00989
00990
00991
00992 or_root = new cmt_or_node (0);
00993 top_and = new cmt_and_node (or_root);
00994 }
00995
00996 int i;
00997
00998 for (i = 0; i < expression.size (); i++)
00999 {
01000 char c = expression[i];
01001 switch (c)
01002 {
01003 case '[':
01004 {
01005
01006
01007
01008
01009
01010
01011
01012 if (i >= expression.size ())
01013 {
01014
01015 delete or_root;
01016 return;
01017 }
01018 i++;
01019
01020 int i0 = i;
01021
01022 bool done = false;
01023 bool has_not = false;
01024
01025 cmt_string choices = "";
01026
01027 for (; i < expression.size (); i++)
01028 {
01029 c = expression[i];
01030 switch (c)
01031 {
01032 case ']':
01033 done = true;
01034 break;
01035 case '^':
01036 if (i == i0) has_not = true;
01037 else choices += c;
01038 break;
01039 case '\\':
01040 choices += c;
01041 if (i >= expression.size ())
01042 {
01043
01044
01045 delete or_root;
01046 return;
01047 }
01048 i++;
01049 c = expression[i];
01050 choices += c;
01051 break;
01052 default:
01053 choices += c;
01054 break;
01055 }
01056 if (done) break;
01057 }
01058
01059 if (!done)
01060 {
01061
01062 delete or_root;
01063 return;
01064 }
01065 if (has_not)
01066 top_and->push (new cmt_not_char_list_node (choices));
01067 else
01068 top_and->push (new cmt_char_list_node (choices));
01069 }
01070 break;
01071 case '*':
01072 {
01073
01074
01075
01076 if (top_and->nodes () == 0)
01077 {
01078
01079 delete or_root;
01080 return;
01081 }
01082
01083 cmt_node* n = top_and->pop ();
01084 top_and->push (new cmt_zero_more (n));
01085 }
01086 break;
01087 case '+':
01088 {
01089
01090
01091
01092 if (top_and->nodes () == 0)
01093 {
01094
01095 delete or_root;
01096 return;
01097 }
01098
01099 cmt_node* n = top_and->pop ();
01100 top_and->push (new cmt_one_more (n));
01101 }
01102 break;
01103 case '?':
01104 {
01105
01106
01107
01108 if (top_and->nodes () == 0)
01109 {
01110
01111 delete or_root;
01112 return;
01113 }
01114
01115 cmt_node* n = top_and->pop ();
01116 top_and->push (new cmt_zero_one (n));
01117 }
01118 break;
01119 case '.':
01120
01121
01122
01123 top_and->push (new cmt_any_node ());
01124 break;
01125 case '(':
01126 {
01127
01128
01129
01130 if (top_and->parentheses ())
01131 {
01132
01133 delete or_root;
01134 return;
01135 }
01136
01137 top_and->set_parentheses (true);
01138
01139
01140
01141
01142
01143
01144 top_and = new cmt_and_node (new cmt_or_node (top_and));
01145 }
01146 break;
01147 case ')':
01148 {
01149
01150
01151
01152
01153
01154 cmt_node_set* or_node = top_and->father ();
01155 if (or_node == 0)
01156 {
01157
01158
01159 delete or_root;
01160 return;
01161 }
01162
01163
01164
01165
01166
01167 if (top_and->nodes () == 0)
01168 {
01169 delete (or_node->pop ());
01170 }
01171 else
01172 {
01173 top_and->reduce ();
01174 }
01175
01176 top_and = or_node->father ();
01177
01178 if (top_and == 0)
01179 {
01180
01181 delete or_root;
01182 return;
01183 }
01184
01185
01186
01187
01188
01189
01190
01191
01192 if (!top_and->parentheses ())
01193 {
01194
01195 delete or_root;
01196 return;
01197 }
01198
01199 top_and->set_parentheses (false);
01200
01201 cmt_node* unique = 0;
01202 if (or_node->nodes () == 1)
01203 {
01204 cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01205 if (and_node->nodes () == 1)
01206 {
01207 unique = and_node->pop ();
01208 delete (or_node->pop ());
01209 }
01210 else if (and_node->nodes () == 0)
01211 {
01212 delete (or_node->pop ());
01213 }
01214 }
01215
01216 if (or_node->nodes () == 0) delete (top_and->pop ());
01217 if (unique != 0) top_and->push (unique);
01218 }
01219
01220 break;
01221 case '|':
01222 {
01223
01224
01225
01226
01227 cmt_node_set* or_node = top_and->father ();
01228
01229 top_and->reduce ();
01230
01231
01232
01233
01234
01235 const cmt_node_set* and_node = (cmt_node_set*) or_node->top ();
01236 if (and_node->nodes () == 0)
01237 {
01238
01239
01240 or_node->pop ();
01241 }
01242
01243 top_and = new cmt_and_node (or_node);
01244 }
01245 break;
01246 case '^':
01247
01248
01249
01250 top_and->push (new cmt_begin_node ());
01251 break;
01252 case '$':
01253
01254
01255
01256 top_and->push (new cmt_end_node ());
01257 break;
01258 case '\\':
01259 if (i >= expression.size ())
01260 {
01261 delete or_root;
01262 return;
01263 }
01264 i++;
01265 c = expression[i];
01266 switch (c)
01267 {
01268 case '[':
01269 case ']':
01270 case '(':
01271 case ')':
01272 case '.':
01273 case '*':
01274 case '?':
01275 case '^':
01276 case '$':
01277 case '\\':
01278 break;
01279 case 'r':
01280 c = '\r';
01281 break;
01282 case 't':
01283 c = '\t';
01284 break;
01285 case 'n':
01286 c = '\n';
01287 break;
01288 default:
01289 break;
01290 }
01291 default:
01292 top_and->push (new cmt_char_node (c));
01293 break;
01294 }
01295 }
01296
01297 if (or_root != 0)
01298 {
01299 cmt_node_set* and_node = (cmt_node_set*) or_root->top ();
01300
01301 if (or_root->nodes () == 1)
01302 {
01303
01304
01305
01306
01307 if (and_node->nodes () == 0)
01308 {
01309 delete or_root;
01310 return;
01311 }
01312 }
01313
01314 if (and_node != 0)
01315 {
01316 and_node->reduce ();
01317
01318 if (and_node->parentheses ())
01319 {
01320 delete or_root;
01321 return;
01322 }
01323 }
01324 }
01325
01326 _root = or_root;
01327 }
01328
01329 cmt_regexp::~cmt_regexp ()
01330 {
01331 if (_root != 0)
01332 {
01333 delete _root;
01334 }
01335 }
01336
01337 bool cmt_regexp::is_valid () const
01338 {
01339 if (_root != 0) return (true);
01340 else return (false);
01341 }
01342
01343 cmt_regexp::iterator cmt_regexp::begin (const cmt_string& text, int pos)
01344 {
01345 if (_root != 0)
01346 {
01347 int i;
01348
01349 for (i = pos; i < text.size (); i++)
01350 {
01351 cmt_regexp::iterator it = _root->match (text, i);
01352 if (it != end ()) return (it);
01353 }
01354 }
01355
01356 return (end ());
01357 }
01358
01359 cmt_regexp::iterator cmt_regexp::end ()
01360 {
01361 return (cmt_regexp::iterator::null ());
01362 }
01363
01364 cmt_regexp::iterator cmt_regexp::begin (const cmt_string& text, int pos) const
01365 {
01366 if (_root != 0)
01367 {
01368 int i;
01369
01370 for (i = pos; i < text.size (); i++)
01371 {
01372 cmt_regexp::iterator it = _root->match (text, i);
01373 if (it != end ()) return (it);
01374 }
01375 }
01376
01377 return (end ());
01378 }
01379
01380 cmt_regexp::iterator cmt_regexp::end () const
01381 {
01382 return (cmt_regexp::iterator::null ());
01383 }
01384
01385 bool cmt_regexp::match (const cmt_string& text) const
01386 {
01387 iterator it = begin (text);
01388 if (it == end ()) return (false);
01389 else return (true);
01390 }
01391
01392
01393
01394 const cmt_regexp::iterator cmt_regexp::iterator::null ()
01395 {
01396 static const iterator null_instance (-1, -1);
01397
01398 return (null_instance);
01399 }
01400
01401 cmt_regexp::iterator::iterator ()
01402 {
01403 _pos = 0;
01404 _length = 0;
01405 }
01406
01407 cmt_regexp::iterator::iterator (int pos, int length)
01408 {
01409 _pos = pos;
01410 _length = length;
01411 }
01412
01413 cmt_regexp::iterator::iterator (const iterator& other)
01414 {
01415 _pos = other._pos;
01416 _length = other._length;
01417 }
01418
01419 int cmt_regexp::iterator::operator != (const iterator& other) const
01420 {
01421 return ((this->_pos != other._pos) ||
01422 (this->_length != other._length));
01423 }
01424
01425 int cmt_regexp::iterator::operator == (const iterator& other) const
01426 {
01427 return ((this->_pos == other._pos) &&
01428 (this->_length == other._length));
01429 }
01430
01431