libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2013-2018 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex.tcc 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00034 00035 namespace __detail 00036 { 00037 // Result of merging regex_match and regex_search. 00038 // 00039 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 00040 // the other one if possible, for test purpose). 00041 // 00042 // That __match_mode is true means regex_match, else regex_search. 00043 template<typename _BiIter, typename _Alloc, 00044 typename _CharT, typename _TraitsT, 00045 _RegexExecutorPolicy __policy, 00046 bool __match_mode> 00047 bool 00048 __regex_algo_impl(_BiIter __s, 00049 _BiIter __e, 00050 match_results<_BiIter, _Alloc>& __m, 00051 const basic_regex<_CharT, _TraitsT>& __re, 00052 regex_constants::match_flag_type __flags) 00053 { 00054 if (__re._M_automaton == nullptr) 00055 return false; 00056 00057 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 00058 __m._M_begin = __s; 00059 __m._M_resize(__re._M_automaton->_M_sub_count()); 00060 for (auto& __it : __res) 00061 __it.matched = false; 00062 00063 bool __ret; 00064 if ((__re.flags() & regex_constants::__polynomial) 00065 || (__policy == _RegexExecutorPolicy::_S_alternate 00066 && !__re._M_automaton->_M_has_backref)) 00067 { 00068 _Executor<_BiIter, _Alloc, _TraitsT, false> 00069 __executor(__s, __e, __m, __re, __flags); 00070 if (__match_mode) 00071 __ret = __executor._M_match(); 00072 else 00073 __ret = __executor._M_search(); 00074 } 00075 else 00076 { 00077 _Executor<_BiIter, _Alloc, _TraitsT, true> 00078 __executor(__s, __e, __m, __re, __flags); 00079 if (__match_mode) 00080 __ret = __executor._M_match(); 00081 else 00082 __ret = __executor._M_search(); 00083 } 00084 if (__ret) 00085 { 00086 for (auto& __it : __res) 00087 if (!__it.matched) 00088 __it.first = __it.second = __e; 00089 auto& __pre = __m._M_prefix(); 00090 auto& __suf = __m._M_suffix(); 00091 if (__match_mode) 00092 { 00093 __pre.matched = false; 00094 __pre.first = __s; 00095 __pre.second = __s; 00096 __suf.matched = false; 00097 __suf.first = __e; 00098 __suf.second = __e; 00099 } 00100 else 00101 { 00102 __pre.first = __s; 00103 __pre.second = __res[0].first; 00104 __pre.matched = (__pre.first != __pre.second); 00105 __suf.first = __res[0].second; 00106 __suf.second = __e; 00107 __suf.matched = (__suf.first != __suf.second); 00108 } 00109 } 00110 else 00111 { 00112 __m._M_resize(0); 00113 for (auto& __it : __res) 00114 { 00115 __it.matched = false; 00116 __it.first = __it.second = __e; 00117 } 00118 } 00119 return __ret; 00120 } 00121 } 00122 00123 template<typename _Ch_type> 00124 template<typename _Fwd_iter> 00125 typename regex_traits<_Ch_type>::string_type 00126 regex_traits<_Ch_type>:: 00127 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 00128 { 00129 typedef std::ctype<char_type> __ctype_type; 00130 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00131 00132 static const char* __collatenames[] = 00133 { 00134 "NUL", 00135 "SOH", 00136 "STX", 00137 "ETX", 00138 "EOT", 00139 "ENQ", 00140 "ACK", 00141 "alert", 00142 "backspace", 00143 "tab", 00144 "newline", 00145 "vertical-tab", 00146 "form-feed", 00147 "carriage-return", 00148 "SO", 00149 "SI", 00150 "DLE", 00151 "DC1", 00152 "DC2", 00153 "DC3", 00154 "DC4", 00155 "NAK", 00156 "SYN", 00157 "ETB", 00158 "CAN", 00159 "EM", 00160 "SUB", 00161 "ESC", 00162 "IS4", 00163 "IS3", 00164 "IS2", 00165 "IS1", 00166 "space", 00167 "exclamation-mark", 00168 "quotation-mark", 00169 "number-sign", 00170 "dollar-sign", 00171 "percent-sign", 00172 "ampersand", 00173 "apostrophe", 00174 "left-parenthesis", 00175 "right-parenthesis", 00176 "asterisk", 00177 "plus-sign", 00178 "comma", 00179 "hyphen", 00180 "period", 00181 "slash", 00182 "zero", 00183 "one", 00184 "two", 00185 "three", 00186 "four", 00187 "five", 00188 "six", 00189 "seven", 00190 "eight", 00191 "nine", 00192 "colon", 00193 "semicolon", 00194 "less-than-sign", 00195 "equals-sign", 00196 "greater-than-sign", 00197 "question-mark", 00198 "commercial-at", 00199 "A", 00200 "B", 00201 "C", 00202 "D", 00203 "E", 00204 "F", 00205 "G", 00206 "H", 00207 "I", 00208 "J", 00209 "K", 00210 "L", 00211 "M", 00212 "N", 00213 "O", 00214 "P", 00215 "Q", 00216 "R", 00217 "S", 00218 "T", 00219 "U", 00220 "V", 00221 "W", 00222 "X", 00223 "Y", 00224 "Z", 00225 "left-square-bracket", 00226 "backslash", 00227 "right-square-bracket", 00228 "circumflex", 00229 "underscore", 00230 "grave-accent", 00231 "a", 00232 "b", 00233 "c", 00234 "d", 00235 "e", 00236 "f", 00237 "g", 00238 "h", 00239 "i", 00240 "j", 00241 "k", 00242 "l", 00243 "m", 00244 "n", 00245 "o", 00246 "p", 00247 "q", 00248 "r", 00249 "s", 00250 "t", 00251 "u", 00252 "v", 00253 "w", 00254 "x", 00255 "y", 00256 "z", 00257 "left-curly-bracket", 00258 "vertical-line", 00259 "right-curly-bracket", 00260 "tilde", 00261 "DEL", 00262 }; 00263 00264 string __s; 00265 for (; __first != __last; ++__first) 00266 __s += __fctyp.narrow(*__first, 0); 00267 00268 for (const auto& __it : __collatenames) 00269 if (__s == __it) 00270 return string_type(1, __fctyp.widen( 00271 static_cast<char>(&__it - __collatenames))); 00272 00273 // TODO Add digraph support: 00274 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 00275 00276 return string_type(); 00277 } 00278 00279 template<typename _Ch_type> 00280 template<typename _Fwd_iter> 00281 typename regex_traits<_Ch_type>::char_class_type 00282 regex_traits<_Ch_type>:: 00283 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 00284 { 00285 typedef std::ctype<char_type> __ctype_type; 00286 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00287 00288 // Mappings from class name to class mask. 00289 static const pair<const char*, char_class_type> __classnames[] = 00290 { 00291 {"d", ctype_base::digit}, 00292 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 00293 {"s", ctype_base::space}, 00294 {"alnum", ctype_base::alnum}, 00295 {"alpha", ctype_base::alpha}, 00296 {"blank", ctype_base::blank}, 00297 {"cntrl", ctype_base::cntrl}, 00298 {"digit", ctype_base::digit}, 00299 {"graph", ctype_base::graph}, 00300 {"lower", ctype_base::lower}, 00301 {"print", ctype_base::print}, 00302 {"punct", ctype_base::punct}, 00303 {"space", ctype_base::space}, 00304 {"upper", ctype_base::upper}, 00305 {"xdigit", ctype_base::xdigit}, 00306 }; 00307 00308 string __s; 00309 for (; __first != __last; ++__first) 00310 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 00311 00312 for (const auto& __it : __classnames) 00313 if (__s == __it.first) 00314 { 00315 if (__icase 00316 && ((__it.second 00317 & (ctype_base::lower | ctype_base::upper)) != 0)) 00318 return ctype_base::alpha; 00319 return __it.second; 00320 } 00321 return 0; 00322 } 00323 00324 template<typename _Ch_type> 00325 bool 00326 regex_traits<_Ch_type>:: 00327 isctype(_Ch_type __c, char_class_type __f) const 00328 { 00329 typedef std::ctype<char_type> __ctype_type; 00330 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 00331 00332 return __fctyp.is(__f._M_base, __c) 00333 // [[:w:]] 00334 || ((__f._M_extended & _RegexMask::_S_under) 00335 && __c == __fctyp.widen('_')); 00336 } 00337 00338 template<typename _Ch_type> 00339 int 00340 regex_traits<_Ch_type>:: 00341 value(_Ch_type __ch, int __radix) const 00342 { 00343 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 00344 long __v; 00345 if (__radix == 8) 00346 __is >> std::oct; 00347 else if (__radix == 16) 00348 __is >> std::hex; 00349 __is >> __v; 00350 return __is.fail() ? -1 : __v; 00351 } 00352 00353 template<typename _Bi_iter, typename _Alloc> 00354 template<typename _Out_iter> 00355 _Out_iter match_results<_Bi_iter, _Alloc>:: 00356 format(_Out_iter __out, 00357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 00358 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 00359 match_flag_type __flags) const 00360 { 00361 __glibcxx_assert( ready() ); 00362 regex_traits<char_type> __traits; 00363 typedef std::ctype<char_type> __ctype_type; 00364 const __ctype_type& 00365 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 00366 00367 auto __output = [&](size_t __idx) 00368 { 00369 auto& __sub = (*this)[__idx]; 00370 if (__sub.matched) 00371 __out = std::copy(__sub.first, __sub.second, __out); 00372 }; 00373 00374 if (__flags & regex_constants::format_sed) 00375 { 00376 bool __escaping = false; 00377 for (; __fmt_first != __fmt_last; __fmt_first++) 00378 { 00379 if (__escaping) 00380 { 00381 __escaping = false; 00382 if (__fctyp.is(__ctype_type::digit, *__fmt_first)) 00383 __output(__traits.value(*__fmt_first, 10)); 00384 else 00385 *__out++ = *__fmt_first; 00386 continue; 00387 } 00388 if (*__fmt_first == '\\') 00389 { 00390 __escaping = true; 00391 continue; 00392 } 00393 if (*__fmt_first == '&') 00394 { 00395 __output(0); 00396 continue; 00397 } 00398 *__out++ = *__fmt_first; 00399 } 00400 if (__escaping) 00401 *__out++ = '\\'; 00402 } 00403 else 00404 { 00405 while (1) 00406 { 00407 auto __next = std::find(__fmt_first, __fmt_last, '$'); 00408 if (__next == __fmt_last) 00409 break; 00410 00411 __out = std::copy(__fmt_first, __next, __out); 00412 00413 auto __eat = [&](char __ch) -> bool 00414 { 00415 if (*__next == __ch) 00416 { 00417 ++__next; 00418 return true; 00419 } 00420 return false; 00421 }; 00422 00423 if (++__next == __fmt_last) 00424 *__out++ = '$'; 00425 else if (__eat('$')) 00426 *__out++ = '$'; 00427 else if (__eat('&')) 00428 __output(0); 00429 else if (__eat('`')) 00430 { 00431 auto& __sub = _M_prefix(); 00432 if (__sub.matched) 00433 __out = std::copy(__sub.first, __sub.second, __out); 00434 } 00435 else if (__eat('\'')) 00436 { 00437 auto& __sub = _M_suffix(); 00438 if (__sub.matched) 00439 __out = std::copy(__sub.first, __sub.second, __out); 00440 } 00441 else if (__fctyp.is(__ctype_type::digit, *__next)) 00442 { 00443 long __num = __traits.value(*__next, 10); 00444 if (++__next != __fmt_last 00445 && __fctyp.is(__ctype_type::digit, *__next)) 00446 { 00447 __num *= 10; 00448 __num += __traits.value(*__next++, 10); 00449 } 00450 if (0 <= __num && __num < this->size()) 00451 __output(__num); 00452 } 00453 else 00454 *__out++ = '$'; 00455 __fmt_first = __next; 00456 } 00457 __out = std::copy(__fmt_first, __fmt_last, __out); 00458 } 00459 return __out; 00460 } 00461 00462 template<typename _Out_iter, typename _Bi_iter, 00463 typename _Rx_traits, typename _Ch_type> 00464 _Out_iter 00465 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 00466 const basic_regex<_Ch_type, _Rx_traits>& __e, 00467 const _Ch_type* __fmt, 00468 regex_constants::match_flag_type __flags) 00469 { 00470 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 00471 _IterT __i(__first, __last, __e, __flags); 00472 _IterT __end; 00473 if (__i == __end) 00474 { 00475 if (!(__flags & regex_constants::format_no_copy)) 00476 __out = std::copy(__first, __last, __out); 00477 } 00478 else 00479 { 00480 sub_match<_Bi_iter> __last; 00481 auto __len = char_traits<_Ch_type>::length(__fmt); 00482 for (; __i != __end; ++__i) 00483 { 00484 if (!(__flags & regex_constants::format_no_copy)) 00485 __out = std::copy(__i->prefix().first, __i->prefix().second, 00486 __out); 00487 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 00488 __last = __i->suffix(); 00489 if (__flags & regex_constants::format_first_only) 00490 break; 00491 } 00492 if (!(__flags & regex_constants::format_no_copy)) 00493 __out = std::copy(__last.first, __last.second, __out); 00494 } 00495 return __out; 00496 } 00497 00498 template<typename _Bi_iter, 00499 typename _Ch_type, 00500 typename _Rx_traits> 00501 bool 00502 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00503 operator==(const regex_iterator& __rhs) const 00504 { 00505 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 00506 return true; 00507 return _M_pregex == __rhs._M_pregex 00508 && _M_begin == __rhs._M_begin 00509 && _M_end == __rhs._M_end 00510 && _M_flags == __rhs._M_flags 00511 && _M_match[0] == __rhs._M_match[0]; 00512 } 00513 00514 template<typename _Bi_iter, 00515 typename _Ch_type, 00516 typename _Rx_traits> 00517 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00518 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00519 operator++() 00520 { 00521 // In all cases in which the call to regex_search returns true, 00522 // match.prefix().first shall be equal to the previous value of 00523 // match[0].second, and for each index i in the half-open range 00524 // [0, match.size()) for which match[i].matched is true, 00525 // match[i].position() shall return distance(begin, match[i].first). 00526 // [28.12.1.4.5] 00527 if (_M_match[0].matched) 00528 { 00529 auto __start = _M_match[0].second; 00530 auto __prefix_first = _M_match[0].second; 00531 if (_M_match[0].first == _M_match[0].second) 00532 { 00533 if (__start == _M_end) 00534 { 00535 _M_pregex = nullptr; 00536 return *this; 00537 } 00538 else 00539 { 00540 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 00541 _M_flags 00542 | regex_constants::match_not_null 00543 | regex_constants::match_continuous)) 00544 { 00545 __glibcxx_assert(_M_match[0].matched); 00546 auto& __prefix = _M_match._M_prefix(); 00547 __prefix.first = __prefix_first; 00548 __prefix.matched = __prefix.first != __prefix.second; 00549 // [28.12.1.4.5] 00550 _M_match._M_begin = _M_begin; 00551 return *this; 00552 } 00553 else 00554 ++__start; 00555 } 00556 } 00557 _M_flags |= regex_constants::match_prev_avail; 00558 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 00559 { 00560 __glibcxx_assert(_M_match[0].matched); 00561 auto& __prefix = _M_match._M_prefix(); 00562 __prefix.first = __prefix_first; 00563 __prefix.matched = __prefix.first != __prefix.second; 00564 // [28.12.1.4.5] 00565 _M_match._M_begin = _M_begin; 00566 } 00567 else 00568 _M_pregex = nullptr; 00569 } 00570 return *this; 00571 } 00572 00573 template<typename _Bi_iter, 00574 typename _Ch_type, 00575 typename _Rx_traits> 00576 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00578 operator=(const regex_token_iterator& __rhs) 00579 { 00580 _M_position = __rhs._M_position; 00581 _M_subs = __rhs._M_subs; 00582 _M_n = __rhs._M_n; 00583 _M_suffix = __rhs._M_suffix; 00584 _M_has_m1 = __rhs._M_has_m1; 00585 _M_normalize_result(); 00586 return *this; 00587 } 00588 00589 template<typename _Bi_iter, 00590 typename _Ch_type, 00591 typename _Rx_traits> 00592 bool 00593 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00594 operator==(const regex_token_iterator& __rhs) const 00595 { 00596 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 00597 return true; 00598 if (_M_suffix.matched && __rhs._M_suffix.matched 00599 && _M_suffix == __rhs._M_suffix) 00600 return true; 00601 if (_M_end_of_seq() || _M_suffix.matched 00602 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 00603 return false; 00604 return _M_position == __rhs._M_position 00605 && _M_n == __rhs._M_n 00606 && _M_subs == __rhs._M_subs; 00607 } 00608 00609 template<typename _Bi_iter, 00610 typename _Ch_type, 00611 typename _Rx_traits> 00612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 00613 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00614 operator++() 00615 { 00616 _Position __prev = _M_position; 00617 if (_M_suffix.matched) 00618 *this = regex_token_iterator(); 00619 else if (_M_n + 1 < _M_subs.size()) 00620 { 00621 _M_n++; 00622 _M_result = &_M_current_match(); 00623 } 00624 else 00625 { 00626 _M_n = 0; 00627 ++_M_position; 00628 if (_M_position != _Position()) 00629 _M_result = &_M_current_match(); 00630 else if (_M_has_m1 && __prev->suffix().length() != 0) 00631 { 00632 _M_suffix.matched = true; 00633 _M_suffix.first = __prev->suffix().first; 00634 _M_suffix.second = __prev->suffix().second; 00635 _M_result = &_M_suffix; 00636 } 00637 else 00638 *this = regex_token_iterator(); 00639 } 00640 return *this; 00641 } 00642 00643 template<typename _Bi_iter, 00644 typename _Ch_type, 00645 typename _Rx_traits> 00646 void 00647 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: 00648 _M_init(_Bi_iter __a, _Bi_iter __b) 00649 { 00650 _M_has_m1 = false; 00651 for (auto __it : _M_subs) 00652 if (__it == -1) 00653 { 00654 _M_has_m1 = true; 00655 break; 00656 } 00657 if (_M_position != _Position()) 00658 _M_result = &_M_current_match(); 00659 else if (_M_has_m1) 00660 { 00661 _M_suffix.matched = true; 00662 _M_suffix.first = __a; 00663 _M_suffix.second = __b; 00664 _M_result = &_M_suffix; 00665 } 00666 else 00667 _M_result = nullptr; 00668 } 00669 00670 _GLIBCXX_END_NAMESPACE_VERSION 00671 } // namespace