libstdc++
|
00001 // wstring_convert implementation -*- C++ -*- 00002 00003 // Copyright (C) 2015-2018 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** @file bits/locale_conv.h 00026 * This is an internal header file, included by other library headers. 00027 * Do not attempt to use it directly. @headername{locale} 00028 */ 00029 00030 #ifndef _LOCALE_CONV_H 00031 #define _LOCALE_CONV_H 1 00032 00033 #if __cplusplus < 201103L 00034 # include <bits/c++0x_warning.h> 00035 #else 00036 00037 #include <streambuf> 00038 #include "stringfwd.h" 00039 #include "allocator.h" 00040 #include "codecvt.h" 00041 #include "unique_ptr.h" 00042 00043 namespace std _GLIBCXX_VISIBILITY(default) 00044 { 00045 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00046 00047 /** 00048 * @addtogroup locales 00049 * @{ 00050 */ 00051 00052 template<typename _OutStr, typename _InChar, typename _Codecvt, 00053 typename _State, typename _Fn> 00054 bool 00055 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 00056 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 00057 size_t& __count, _Fn __fn) 00058 { 00059 if (__first == __last) 00060 { 00061 __outstr.clear(); 00062 __count = 0; 00063 return true; 00064 } 00065 00066 size_t __outchars = 0; 00067 auto __next = __first; 00068 const auto __maxlen = __cvt.max_length() + 1; 00069 00070 codecvt_base::result __result; 00071 do 00072 { 00073 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 00074 auto __outnext = &__outstr.front() + __outchars; 00075 auto const __outlast = &__outstr.back() + 1; 00076 __result = (__cvt.*__fn)(__state, __next, __last, __next, 00077 __outnext, __outlast, __outnext); 00078 __outchars = __outnext - &__outstr.front(); 00079 } 00080 while (__result == codecvt_base::partial && __next != __last 00081 && (__outstr.size() - __outchars) < __maxlen); 00082 00083 if (__result == codecvt_base::error) 00084 { 00085 __count = __next - __first; 00086 return false; 00087 } 00088 00089 if (__result == codecvt_base::noconv) 00090 { 00091 __outstr.assign(__first, __last); 00092 __count = __last - __first; 00093 } 00094 else 00095 { 00096 __outstr.resize(__outchars); 00097 __count = __next - __first; 00098 } 00099 00100 return true; 00101 } 00102 00103 // Convert narrow character string to wide. 00104 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00105 inline bool 00106 __str_codecvt_in(const char* __first, const char* __last, 00107 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00108 const codecvt<_CharT, char, _State>& __cvt, 00109 _State& __state, size_t& __count) 00110 { 00111 using _Codecvt = codecvt<_CharT, char, _State>; 00112 using _ConvFn 00113 = codecvt_base::result 00114 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 00115 _CharT*, _CharT*, _CharT*&) const; 00116 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 00117 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00118 __count, __fn); 00119 } 00120 00121 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00122 inline bool 00123 __str_codecvt_in(const char* __first, const char* __last, 00124 basic_string<_CharT, _Traits, _Alloc>& __outstr, 00125 const codecvt<_CharT, char, _State>& __cvt) 00126 { 00127 _State __state = {}; 00128 size_t __n; 00129 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 00130 } 00131 00132 // Convert wide character string to narrow. 00133 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00134 inline bool 00135 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00136 basic_string<char, _Traits, _Alloc>& __outstr, 00137 const codecvt<_CharT, char, _State>& __cvt, 00138 _State& __state, size_t& __count) 00139 { 00140 using _Codecvt = codecvt<_CharT, char, _State>; 00141 using _ConvFn 00142 = codecvt_base::result 00143 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 00144 char*, char*, char*&) const; 00145 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 00146 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 00147 __count, __fn); 00148 } 00149 00150 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 00151 inline bool 00152 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 00153 basic_string<char, _Traits, _Alloc>& __outstr, 00154 const codecvt<_CharT, char, _State>& __cvt) 00155 { 00156 _State __state = {}; 00157 size_t __n; 00158 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 00159 } 00160 00161 #ifdef _GLIBCXX_USE_WCHAR_T 00162 00163 _GLIBCXX_BEGIN_NAMESPACE_CXX11 00164 00165 /// String conversions 00166 template<typename _Codecvt, typename _Elem = wchar_t, 00167 typename _Wide_alloc = allocator<_Elem>, 00168 typename _Byte_alloc = allocator<char>> 00169 class wstring_convert 00170 { 00171 public: 00172 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 00173 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 00174 typedef typename _Codecvt::state_type state_type; 00175 typedef typename wide_string::traits_type::int_type int_type; 00176 00177 /** Default constructor. 00178 * 00179 * @param __pcvt The facet to use for conversions. 00180 * 00181 * Takes ownership of @p __pcvt and will delete it in the destructor. 00182 */ 00183 explicit 00184 wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) 00185 { 00186 if (!_M_cvt) 00187 __throw_logic_error("wstring_convert"); 00188 } 00189 00190 /** Construct with an initial converstion state. 00191 * 00192 * @param __pcvt The facet to use for conversions. 00193 * @param __state Initial conversion state. 00194 * 00195 * Takes ownership of @p __pcvt and will delete it in the destructor. 00196 * The object's conversion state will persist between conversions. 00197 */ 00198 wstring_convert(_Codecvt* __pcvt, state_type __state) 00199 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 00200 { 00201 if (!_M_cvt) 00202 __throw_logic_error("wstring_convert"); 00203 } 00204 00205 /** Construct with error strings. 00206 * 00207 * @param __byte_err A string to return on failed conversions. 00208 * @param __wide_err A wide string to return on failed conversions. 00209 */ 00210 explicit 00211 wstring_convert(const byte_string& __byte_err, 00212 const wide_string& __wide_err = wide_string()) 00213 : _M_cvt(new _Codecvt), 00214 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 00215 _M_with_strings(true) 00216 { 00217 if (!_M_cvt) 00218 __throw_logic_error("wstring_convert"); 00219 } 00220 00221 ~wstring_convert() = default; 00222 00223 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00224 // 2176. Special members for wstring_convert and wbuffer_convert 00225 wstring_convert(const wstring_convert&) = delete; 00226 wstring_convert& operator=(const wstring_convert&) = delete; 00227 00228 /// @{ Convert from bytes. 00229 wide_string 00230 from_bytes(char __byte) 00231 { 00232 char __bytes[2] = { __byte }; 00233 return from_bytes(__bytes, __bytes+1); 00234 } 00235 00236 wide_string 00237 from_bytes(const char* __ptr) 00238 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 00239 00240 wide_string 00241 from_bytes(const byte_string& __str) 00242 { 00243 auto __ptr = __str.data(); 00244 return from_bytes(__ptr, __ptr + __str.size()); 00245 } 00246 00247 wide_string 00248 from_bytes(const char* __first, const char* __last) 00249 { 00250 if (!_M_with_cvtstate) 00251 _M_state = state_type(); 00252 wide_string __out{ _M_wide_err_string.get_allocator() }; 00253 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 00254 _M_count)) 00255 return __out; 00256 if (_M_with_strings) 00257 return _M_wide_err_string; 00258 __throw_range_error("wstring_convert::from_bytes"); 00259 } 00260 /// @} 00261 00262 /// @{ Convert to bytes. 00263 byte_string 00264 to_bytes(_Elem __wchar) 00265 { 00266 _Elem __wchars[2] = { __wchar }; 00267 return to_bytes(__wchars, __wchars+1); 00268 } 00269 00270 byte_string 00271 to_bytes(const _Elem* __ptr) 00272 { 00273 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 00274 } 00275 00276 byte_string 00277 to_bytes(const wide_string& __wstr) 00278 { 00279 auto __ptr = __wstr.data(); 00280 return to_bytes(__ptr, __ptr + __wstr.size()); 00281 } 00282 00283 byte_string 00284 to_bytes(const _Elem* __first, const _Elem* __last) 00285 { 00286 if (!_M_with_cvtstate) 00287 _M_state = state_type(); 00288 byte_string __out{ _M_byte_err_string.get_allocator() }; 00289 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 00290 _M_count)) 00291 return __out; 00292 if (_M_with_strings) 00293 return _M_byte_err_string; 00294 __throw_range_error("wstring_convert::to_bytes"); 00295 } 00296 /// @} 00297 00298 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00299 // 2174. wstring_convert::converted() should be noexcept 00300 /// The number of elements successfully converted in the last conversion. 00301 size_t converted() const noexcept { return _M_count; } 00302 00303 /// The final conversion state of the last conversion. 00304 state_type state() const { return _M_state; } 00305 00306 private: 00307 unique_ptr<_Codecvt> _M_cvt; 00308 byte_string _M_byte_err_string; 00309 wide_string _M_wide_err_string; 00310 state_type _M_state = state_type(); 00311 size_t _M_count = 0; 00312 bool _M_with_cvtstate = false; 00313 bool _M_with_strings = false; 00314 }; 00315 00316 _GLIBCXX_END_NAMESPACE_CXX11 00317 00318 /// Buffer conversions 00319 template<typename _Codecvt, typename _Elem = wchar_t, 00320 typename _Tr = char_traits<_Elem>> 00321 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 00322 { 00323 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 00324 00325 public: 00326 typedef typename _Codecvt::state_type state_type; 00327 00328 /** Default constructor. 00329 * 00330 * @param __bytebuf The underlying byte stream buffer. 00331 * @param __pcvt The facet to use for conversions. 00332 * @param __state Initial conversion state. 00333 * 00334 * Takes ownership of @p __pcvt and will delete it in the destructor. 00335 */ 00336 explicit 00337 wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, 00338 state_type __state = state_type()) 00339 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 00340 { 00341 if (!_M_cvt) 00342 __throw_logic_error("wbuffer_convert"); 00343 00344 _M_always_noconv = _M_cvt->always_noconv(); 00345 00346 if (_M_buf) 00347 { 00348 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 00349 this->setg(_M_get_area + _S_putback_length, 00350 _M_get_area + _S_putback_length, 00351 _M_get_area + _S_putback_length); 00352 } 00353 } 00354 00355 ~wbuffer_convert() = default; 00356 00357 // _GLIBCXX_RESOLVE_LIB_DEFECTS 00358 // 2176. Special members for wstring_convert and wbuffer_convert 00359 wbuffer_convert(const wbuffer_convert&) = delete; 00360 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 00361 00362 streambuf* rdbuf() const noexcept { return _M_buf; } 00363 00364 streambuf* 00365 rdbuf(streambuf *__bytebuf) noexcept 00366 { 00367 auto __prev = _M_buf; 00368 _M_buf = __bytebuf; 00369 return __prev; 00370 } 00371 00372 /// The conversion state following the last conversion. 00373 state_type state() const noexcept { return _M_state; } 00374 00375 protected: 00376 int 00377 sync() 00378 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } 00379 00380 typename _Wide_streambuf::int_type 00381 overflow(typename _Wide_streambuf::int_type __out) 00382 { 00383 if (!_M_buf || !_M_conv_put()) 00384 return _Tr::eof(); 00385 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 00386 return this->sputc(__out); 00387 return _Tr::not_eof(__out); 00388 } 00389 00390 typename _Wide_streambuf::int_type 00391 underflow() 00392 { 00393 if (!_M_buf) 00394 return _Tr::eof(); 00395 00396 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 00397 return _Tr::to_int_type(*this->gptr()); 00398 else 00399 return _Tr::eof(); 00400 } 00401 00402 streamsize 00403 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 00404 { 00405 if (!_M_buf || __n == 0) 00406 return 0; 00407 streamsize __done = 0; 00408 do 00409 { 00410 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 00411 __n - __done); 00412 _Tr::copy(this->pptr(), __s + __done, __nn); 00413 this->pbump(__nn); 00414 __done += __nn; 00415 } while (__done < __n && _M_conv_put()); 00416 return __done; 00417 } 00418 00419 private: 00420 // fill the get area from converted contents of the byte stream buffer 00421 bool 00422 _M_conv_get() 00423 { 00424 const streamsize __pb1 = this->gptr() - this->eback(); 00425 const streamsize __pb2 = _S_putback_length; 00426 const streamsize __npb = std::min(__pb1, __pb2); 00427 00428 _Tr::move(_M_get_area + _S_putback_length - __npb, 00429 this->gptr() - __npb, __npb); 00430 00431 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 00432 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 00433 if (__nbytes < 1) 00434 __nbytes = 1; 00435 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 00436 if (__nbytes < 1) 00437 return false; 00438 __nbytes += _M_unconv; 00439 00440 // convert _M_get_buf into _M_get_area 00441 00442 _Elem* __outbuf = _M_get_area + _S_putback_length; 00443 _Elem* __outnext = __outbuf; 00444 const char* __bnext = _M_get_buf; 00445 00446 codecvt_base::result __result; 00447 if (_M_always_noconv) 00448 __result = codecvt_base::noconv; 00449 else 00450 { 00451 _Elem* __outend = _M_get_area + _S_buffer_length; 00452 00453 __result = _M_cvt->in(_M_state, 00454 __bnext, __bnext + __nbytes, __bnext, 00455 __outbuf, __outend, __outnext); 00456 } 00457 00458 if (__result == codecvt_base::noconv) 00459 { 00460 // cast is safe because noconv means _Elem is same type as char 00461 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 00462 _Tr::copy(__outbuf, __get_buf, __nbytes); 00463 _M_unconv = 0; 00464 return true; 00465 } 00466 00467 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 00468 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 00469 00470 this->setg(__outbuf, __outbuf, __outnext); 00471 00472 return __result != codecvt_base::error; 00473 } 00474 00475 // unused 00476 bool 00477 _M_put(...) 00478 { return false; } 00479 00480 bool 00481 _M_put(const char* __p, streamsize __n) 00482 { 00483 if (_M_buf->sputn(__p, __n) < __n) 00484 return false; 00485 return true; 00486 } 00487 00488 // convert the put area and write to the byte stream buffer 00489 bool 00490 _M_conv_put() 00491 { 00492 _Elem* const __first = this->pbase(); 00493 const _Elem* const __last = this->pptr(); 00494 const streamsize __pending = __last - __first; 00495 00496 if (_M_always_noconv) 00497 return _M_put(__first, __pending); 00498 00499 char __outbuf[2 * _S_buffer_length]; 00500 00501 const _Elem* __next = __first; 00502 const _Elem* __start; 00503 do 00504 { 00505 __start = __next; 00506 char* __outnext = __outbuf; 00507 char* const __outlast = __outbuf + sizeof(__outbuf); 00508 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 00509 __outnext, __outlast, __outnext); 00510 if (__result == codecvt_base::error) 00511 return false; 00512 else if (__result == codecvt_base::noconv) 00513 return _M_put(__next, __pending); 00514 00515 if (!_M_put(__outbuf, __outnext - __outbuf)) 00516 return false; 00517 } 00518 while (__next != __last && __next != __start); 00519 00520 if (__next != __last) 00521 _Tr::move(__first, __next, __last - __next); 00522 00523 this->pbump(__first - __next); 00524 return __next != __first; 00525 } 00526 00527 streambuf* _M_buf; 00528 unique_ptr<_Codecvt> _M_cvt; 00529 state_type _M_state; 00530 00531 static const streamsize _S_buffer_length = 32; 00532 static const streamsize _S_putback_length = 3; 00533 _Elem _M_put_area[_S_buffer_length]; 00534 _Elem _M_get_area[_S_buffer_length]; 00535 streamsize _M_unconv = 0; 00536 char _M_get_buf[_S_buffer_length-_S_putback_length]; 00537 bool _M_always_noconv; 00538 }; 00539 00540 #endif // _GLIBCXX_USE_WCHAR_T 00541 00542 /// @} group locales 00543 00544 _GLIBCXX_END_NAMESPACE_VERSION 00545 } // namespace 00546 00547 #endif // __cplusplus 00548 00549 #endif /* _LOCALE_CONV_H */