Parent Directory
|
Revision Log
committing r3113 initial commit again...
1 | william | 31 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: src/common/strconv.cpp | ||
3 | // Purpose: Unicode conversion classes | ||
4 | // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik, | ||
5 | // Ryan Norton, Fredrik Roubert (UTF7) | ||
6 | // Modified by: | ||
7 | // Created: 29/01/98 | ||
8 | // RCS-ID: $Id: strconv.cpp 56394 2008-10-17 11:31:22Z VZ $ | ||
9 | // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik | ||
10 | // (c) 2000-2003 Vadim Zeitlin | ||
11 | // (c) 2004 Ryan Norton, Fredrik Roubert | ||
12 | // Licence: wxWindows licence | ||
13 | ///////////////////////////////////////////////////////////////////////////// | ||
14 | |||
15 | // For compilers that support precompilation, includes "wx.h". | ||
16 | #include "wx/wxprec.h" | ||
17 | |||
18 | #ifndef WX_PRECOMP | ||
19 | #ifdef __WXMSW__ | ||
20 | #include "wx/msw/missing.h" | ||
21 | #endif | ||
22 | #include "wx/intl.h" | ||
23 | #include "wx/log.h" | ||
24 | #include "wx/utils.h" | ||
25 | #include "wx/hashmap.h" | ||
26 | #endif | ||
27 | |||
28 | #include "wx/strconv.h" | ||
29 | |||
30 | #if wxUSE_WCHAR_T | ||
31 | |||
32 | #ifdef __WINDOWS__ | ||
33 | #include "wx/msw/private.h" | ||
34 | #endif | ||
35 | |||
36 | #ifndef __WXWINCE__ | ||
37 | #include <errno.h> | ||
38 | #endif | ||
39 | |||
40 | #include <ctype.h> | ||
41 | #include <string.h> | ||
42 | #include <stdlib.h> | ||
43 | |||
44 | #if defined(__WIN32__) && !defined(__WXMICROWIN__) | ||
45 | #define wxHAVE_WIN32_MB2WC | ||
46 | #endif | ||
47 | |||
48 | #ifdef __SALFORDC__ | ||
49 | #include <clib.h> | ||
50 | #endif | ||
51 | |||
52 | #ifdef HAVE_ICONV | ||
53 | #include <iconv.h> | ||
54 | #include "wx/thread.h" | ||
55 | #endif | ||
56 | |||
57 | #include "wx/encconv.h" | ||
58 | #include "wx/fontmap.h" | ||
59 | |||
60 | #ifdef __WXMAC__ | ||
61 | #ifndef __DARWIN__ | ||
62 | #include <ATSUnicode.h> | ||
63 | #include <TextCommon.h> | ||
64 | #include <TextEncodingConverter.h> | ||
65 | #endif | ||
66 | |||
67 | // includes Mac headers | ||
68 | #include "wx/mac/private.h" | ||
69 | #include "wx/thread.h" | ||
70 | |||
71 | #endif | ||
72 | |||
73 | |||
74 | #define TRACE_STRCONV _T("strconv") | ||
75 | |||
76 | // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to | ||
77 | // be 4 bytes | ||
78 | #if SIZEOF_WCHAR_T == 2 | ||
79 | #define WC_UTF16 | ||
80 | #endif | ||
81 | |||
82 | |||
83 | // ============================================================================ | ||
84 | // implementation | ||
85 | // ============================================================================ | ||
86 | |||
87 | // helper function of cMB2WC(): check if n bytes at this location are all NUL | ||
88 | static bool NotAllNULs(const char *p, size_t n) | ||
89 | { | ||
90 | while ( n && *p++ == '\0' ) | ||
91 | n--; | ||
92 | |||
93 | return n != 0; | ||
94 | } | ||
95 | |||
96 | // ---------------------------------------------------------------------------- | ||
97 | // UTF-16 en/decoding to/from UCS-4 with surrogates handling | ||
98 | // ---------------------------------------------------------------------------- | ||
99 | |||
100 | static size_t encode_utf16(wxUint32 input, wxUint16 *output) | ||
101 | { | ||
102 | if (input <= 0xffff) | ||
103 | { | ||
104 | if (output) | ||
105 | *output = (wxUint16) input; | ||
106 | |||
107 | return 1; | ||
108 | } | ||
109 | else if (input >= 0x110000) | ||
110 | { | ||
111 | return wxCONV_FAILED; | ||
112 | } | ||
113 | else | ||
114 | { | ||
115 | if (output) | ||
116 | { | ||
117 | *output++ = (wxUint16) ((input >> 10) + 0xd7c0); | ||
118 | *output = (wxUint16) ((input & 0x3ff) + 0xdc00); | ||
119 | } | ||
120 | |||
121 | return 2; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | static size_t decode_utf16(const wxUint16* input, wxUint32& output) | ||
126 | { | ||
127 | if ((*input < 0xd800) || (*input > 0xdfff)) | ||
128 | { | ||
129 | output = *input; | ||
130 | return 1; | ||
131 | } | ||
132 | else if ((input[1] < 0xdc00) || (input[1] > 0xdfff)) | ||
133 | { | ||
134 | output = *input; | ||
135 | return wxCONV_FAILED; | ||
136 | } | ||
137 | else | ||
138 | { | ||
139 | output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00); | ||
140 | return 2; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | #ifdef WC_UTF16 | ||
145 | typedef wchar_t wxDecodeSurrogate_t; | ||
146 | #else // !WC_UTF16 | ||
147 | typedef wxUint16 wxDecodeSurrogate_t; | ||
148 | #endif // WC_UTF16/!WC_UTF16 | ||
149 | |||
150 | // returns the next UTF-32 character from the wchar_t buffer and advances the | ||
151 | // pointer to the character after this one | ||
152 | // | ||
153 | // if an invalid character is found, *pSrc is set to NULL, the caller must | ||
154 | // check for this | ||
155 | static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc) | ||
156 | { | ||
157 | wxUint32 out; | ||
158 | const size_t | ||
159 | n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out); | ||
160 | if ( n == wxCONV_FAILED ) | ||
161 | *pSrc = NULL; | ||
162 | else | ||
163 | *pSrc += n; | ||
164 | |||
165 | return out; | ||
166 | } | ||
167 | |||
168 | // ---------------------------------------------------------------------------- | ||
169 | // wxMBConv | ||
170 | // ---------------------------------------------------------------------------- | ||
171 | |||
172 | size_t | ||
173 | wxMBConv::ToWChar(wchar_t *dst, size_t dstLen, | ||
174 | const char *src, size_t srcLen) const | ||
175 | { | ||
176 | // although new conversion classes are supposed to implement this function | ||
177 | // directly, the existins ones only implement the old MB2WC() and so, to | ||
178 | // avoid to have to rewrite all conversion classes at once, we provide a | ||
179 | // default (but not efficient) implementation of this one in terms of the | ||
180 | // old function by copying the input to ensure that it's NUL-terminated and | ||
181 | // then using MB2WC() to convert it | ||
182 | |||
183 | // the number of chars [which would be] written to dst [if it were not NULL] | ||
184 | size_t dstWritten = 0; | ||
185 | |||
186 | // the number of NULs terminating this string | ||
187 | size_t nulLen = 0; // not really needed, but just to avoid warnings | ||
188 | |||
189 | // if we were not given the input size we just have to assume that the | ||
190 | // string is properly terminated as we have no way of knowing how long it | ||
191 | // is anyhow, but if we do have the size check whether there are enough | ||
192 | // NULs at the end | ||
193 | wxCharBuffer bufTmp; | ||
194 | const char *srcEnd; | ||
195 | if ( srcLen != wxNO_LEN ) | ||
196 | { | ||
197 | // we need to know how to find the end of this string | ||
198 | nulLen = GetMBNulLen(); | ||
199 | if ( nulLen == wxCONV_FAILED ) | ||
200 | return wxCONV_FAILED; | ||
201 | |||
202 | // if there are enough NULs we can avoid the copy | ||
203 | if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) ) | ||
204 | { | ||
205 | // make a copy in order to properly NUL-terminate the string | ||
206 | bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */); | ||
207 | char * const p = bufTmp.data(); | ||
208 | memcpy(p, src, srcLen); | ||
209 | for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ ) | ||
210 | *s = '\0'; | ||
211 | |||
212 | src = bufTmp; | ||
213 | } | ||
214 | |||
215 | srcEnd = src + srcLen; | ||
216 | } | ||
217 | else // quit after the first loop iteration | ||
218 | { | ||
219 | srcEnd = NULL; | ||
220 | } | ||
221 | |||
222 | for ( ;; ) | ||
223 | { | ||
224 | // try to convert the current chunk | ||
225 | size_t lenChunk = MB2WC(NULL, src, 0); | ||
226 | if ( lenChunk == wxCONV_FAILED ) | ||
227 | return wxCONV_FAILED; | ||
228 | |||
229 | lenChunk++; // for the L'\0' at the end of this chunk | ||
230 | |||
231 | dstWritten += lenChunk; | ||
232 | |||
233 | if ( lenChunk == 1 ) | ||
234 | { | ||
235 | // nothing left in the input string, conversion succeeded | ||
236 | break; | ||
237 | } | ||
238 | |||
239 | if ( dst ) | ||
240 | { | ||
241 | if ( dstWritten > dstLen ) | ||
242 | return wxCONV_FAILED; | ||
243 | |||
244 | if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED ) | ||
245 | return wxCONV_FAILED; | ||
246 | |||
247 | dst += lenChunk; | ||
248 | } | ||
249 | |||
250 | if ( !srcEnd ) | ||
251 | { | ||
252 | // we convert just one chunk in this case as this is the entire | ||
253 | // string anyhow | ||
254 | break; | ||
255 | } | ||
256 | |||
257 | // advance the input pointer past the end of this chunk | ||
258 | while ( NotAllNULs(src, nulLen) ) | ||
259 | { | ||
260 | // notice that we must skip over multiple bytes here as we suppose | ||
261 | // that if NUL takes 2 or 4 bytes, then all the other characters do | ||
262 | // too and so if advanced by a single byte we might erroneously | ||
263 | // detect sequences of NUL bytes in the middle of the input | ||
264 | src += nulLen; | ||
265 | } | ||
266 | |||
267 | src += nulLen; // skipping over its terminator as well | ||
268 | |||
269 | // note that ">=" (and not just "==") is needed here as the terminator | ||
270 | // we skipped just above could be inside or just after the buffer | ||
271 | // delimited by inEnd | ||
272 | if ( src >= srcEnd ) | ||
273 | break; | ||
274 | } | ||
275 | |||
276 | return dstWritten; | ||
277 | } | ||
278 | |||
279 | size_t | ||
280 | wxMBConv::FromWChar(char *dst, size_t dstLen, | ||
281 | const wchar_t *src, size_t srcLen) const | ||
282 | { | ||
283 | // the number of chars [which would be] written to dst [if it were not NULL] | ||
284 | size_t dstWritten = 0; | ||
285 | |||
286 | // make a copy of the input string unless it is already properly | ||
287 | // NUL-terminated | ||
288 | // | ||
289 | // if we don't know its length we have no choice but to assume that it is, | ||
290 | // indeed, properly terminated | ||
291 | wxWCharBuffer bufTmp; | ||
292 | if ( srcLen == wxNO_LEN ) | ||
293 | { | ||
294 | srcLen = wxWcslen(src) + 1; | ||
295 | } | ||
296 | else if ( srcLen != 0 && src[srcLen - 1] != L'\0' ) | ||
297 | { | ||
298 | // make a copy in order to properly NUL-terminate the string | ||
299 | bufTmp = wxWCharBuffer(srcLen); | ||
300 | memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t)); | ||
301 | src = bufTmp; | ||
302 | } | ||
303 | |||
304 | const size_t lenNul = GetMBNulLen(); | ||
305 | for ( const wchar_t * const srcEnd = src + srcLen; | ||
306 | src < srcEnd; | ||
307 | src += wxWcslen(src) + 1 /* skip L'\0' too */ ) | ||
308 | { | ||
309 | // try to convert the current chunk | ||
310 | size_t lenChunk = WC2MB(NULL, src, 0); | ||
311 | |||
312 | if ( lenChunk == wxCONV_FAILED ) | ||
313 | return wxCONV_FAILED; | ||
314 | |||
315 | lenChunk += lenNul; | ||
316 | dstWritten += lenChunk; | ||
317 | |||
318 | if ( dst ) | ||
319 | { | ||
320 | if ( dstWritten > dstLen ) | ||
321 | return wxCONV_FAILED; | ||
322 | |||
323 | if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED ) | ||
324 | return wxCONV_FAILED; | ||
325 | |||
326 | dst += lenChunk; | ||
327 | } | ||
328 | } | ||
329 | |||
330 | return dstWritten; | ||
331 | } | ||
332 | |||
333 | size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const | ||
334 | { | ||
335 | size_t rc = ToWChar(outBuff, outLen, inBuff); | ||
336 | if ( rc != wxCONV_FAILED ) | ||
337 | { | ||
338 | // ToWChar() returns the buffer length, i.e. including the trailing | ||
339 | // NUL, while this method doesn't take it into account | ||
340 | rc--; | ||
341 | } | ||
342 | |||
343 | return rc; | ||
344 | } | ||
345 | |||
346 | size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const | ||
347 | { | ||
348 | size_t rc = FromWChar(outBuff, outLen, inBuff); | ||
349 | if ( rc != wxCONV_FAILED ) | ||
350 | { | ||
351 | rc -= GetMBNulLen(); | ||
352 | } | ||
353 | |||
354 | return rc; | ||
355 | } | ||
356 | |||
357 | wxMBConv::~wxMBConv() | ||
358 | { | ||
359 | // nothing to do here (necessary for Darwin linking probably) | ||
360 | } | ||
361 | |||
362 | const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const | ||
363 | { | ||
364 | if ( psz ) | ||
365 | { | ||
366 | // calculate the length of the buffer needed first | ||
367 | const size_t nLen = MB2WC(NULL, psz, 0); | ||
368 | if ( nLen != wxCONV_FAILED ) | ||
369 | { | ||
370 | // now do the actual conversion | ||
371 | wxWCharBuffer buf(nLen /* +1 added implicitly */); | ||
372 | |||
373 | // +1 for the trailing NULL | ||
374 | if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED ) | ||
375 | return buf; | ||
376 | } | ||
377 | } | ||
378 | |||
379 | return wxWCharBuffer(); | ||
380 | } | ||
381 | |||
382 | const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const | ||
383 | { | ||
384 | if ( pwz ) | ||
385 | { | ||
386 | const size_t nLen = WC2MB(NULL, pwz, 0); | ||
387 | if ( nLen != wxCONV_FAILED ) | ||
388 | { | ||
389 | // extra space for trailing NUL(s) | ||
390 | static const size_t extraLen = GetMaxMBNulLen(); | ||
391 | |||
392 | wxCharBuffer buf(nLen + extraLen - 1); | ||
393 | if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED ) | ||
394 | return buf; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | return wxCharBuffer(); | ||
399 | } | ||
400 | |||
401 | const wxWCharBuffer | ||
402 | wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const | ||
403 | { | ||
404 | const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen); | ||
405 | if ( dstLen != wxCONV_FAILED ) | ||
406 | { | ||
407 | wxWCharBuffer wbuf(dstLen - 1); | ||
408 | if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) | ||
409 | { | ||
410 | if ( outLen ) | ||
411 | { | ||
412 | *outLen = dstLen; | ||
413 | if ( wbuf[dstLen - 1] == L'\0' ) | ||
414 | (*outLen)--; | ||
415 | } | ||
416 | |||
417 | return wbuf; | ||
418 | } | ||
419 | } | ||
420 | |||
421 | if ( outLen ) | ||
422 | *outLen = 0; | ||
423 | |||
424 | return wxWCharBuffer(); | ||
425 | } | ||
426 | |||
427 | const wxCharBuffer | ||
428 | wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const | ||
429 | { | ||
430 | size_t dstLen = FromWChar(NULL, 0, inBuff, inLen); | ||
431 | if ( dstLen != wxCONV_FAILED ) | ||
432 | { | ||
433 | // special case of empty input: can't allocate 0 size buffer below as | ||
434 | // wxCharBuffer insists on NUL-terminating it | ||
435 | wxCharBuffer buf(dstLen ? dstLen - 1 : 1); | ||
436 | if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED ) | ||
437 | { | ||
438 | if ( outLen ) | ||
439 | { | ||
440 | *outLen = dstLen; | ||
441 | |||
442 | const size_t nulLen = GetMBNulLen(); | ||
443 | if ( dstLen >= nulLen && | ||
444 | !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) ) | ||
445 | { | ||
446 | // in this case the output is NUL-terminated and we're not | ||
447 | // supposed to count NUL | ||
448 | *outLen -= nulLen; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | return buf; | ||
453 | } | ||
454 | } | ||
455 | |||
456 | if ( outLen ) | ||
457 | *outLen = 0; | ||
458 | |||
459 | return wxCharBuffer(); | ||
460 | } | ||
461 | |||
462 | // ---------------------------------------------------------------------------- | ||
463 | // wxMBConvLibc | ||
464 | // ---------------------------------------------------------------------------- | ||
465 | |||
466 | size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const | ||
467 | { | ||
468 | return wxMB2WC(buf, psz, n); | ||
469 | } | ||
470 | |||
471 | size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const | ||
472 | { | ||
473 | return wxWC2MB(buf, psz, n); | ||
474 | } | ||
475 | |||
476 | // ---------------------------------------------------------------------------- | ||
477 | // wxConvBrokenFileNames | ||
478 | // ---------------------------------------------------------------------------- | ||
479 | |||
480 | #ifdef __UNIX__ | ||
481 | |||
482 | wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset) | ||
483 | { | ||
484 | if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0 | ||
485 | || wxStricmp(charset, _T("UTF8")) == 0 ) | ||
486 | m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA); | ||
487 | else | ||
488 | m_conv = new wxCSConv(charset); | ||
489 | } | ||
490 | |||
491 | #endif // __UNIX__ | ||
492 | |||
493 | // ---------------------------------------------------------------------------- | ||
494 | // UTF-7 | ||
495 | // ---------------------------------------------------------------------------- | ||
496 | |||
497 | // Implementation (C) 2004 Fredrik Roubert | ||
498 | |||
499 | // | ||
500 | // BASE64 decoding table | ||
501 | // | ||
502 | static const unsigned char utf7unb64[] = | ||
503 | { | ||
504 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
505 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
506 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
507 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
508 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
509 | 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f, | ||
510 | 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, | ||
511 | 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
512 | 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, | ||
513 | 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, | ||
514 | 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, | ||
515 | 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
516 | 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, | ||
517 | 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, | ||
518 | 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, | ||
519 | 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
520 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
521 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
522 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
523 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
524 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
525 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
526 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
527 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
528 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
529 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
530 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
531 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
532 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
533 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
534 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, | ||
535 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | ||
536 | }; | ||
537 | |||
538 | size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const | ||
539 | { | ||
540 | size_t len = 0; | ||
541 | |||
542 | while ( *psz && (!buf || (len < n)) ) | ||
543 | { | ||
544 | unsigned char cc = *psz++; | ||
545 | if (cc != '+') | ||
546 | { | ||
547 | // plain ASCII char | ||
548 | if (buf) | ||
549 | *buf++ = cc; | ||
550 | len++; | ||
551 | } | ||
552 | else if (*psz == '-') | ||
553 | { | ||
554 | // encoded plus sign | ||
555 | if (buf) | ||
556 | *buf++ = cc; | ||
557 | len++; | ||
558 | psz++; | ||
559 | } | ||
560 | else // start of BASE64 encoded string | ||
561 | { | ||
562 | bool lsb, ok; | ||
563 | unsigned int d, l; | ||
564 | for ( ok = lsb = false, d = 0, l = 0; | ||
565 | (cc = utf7unb64[(unsigned char)*psz]) != 0xff; | ||
566 | psz++ ) | ||
567 | { | ||
568 | d <<= 6; | ||
569 | d += cc; | ||
570 | for (l += 6; l >= 8; lsb = !lsb) | ||
571 | { | ||
572 | unsigned char c = (unsigned char)((d >> (l -= 8)) % 256); | ||
573 | if (lsb) | ||
574 | { | ||
575 | if (buf) | ||
576 | *buf++ |= c; | ||
577 | len ++; | ||
578 | } | ||
579 | else | ||
580 | { | ||
581 | if (buf) | ||
582 | *buf = (wchar_t)(c << 8); | ||
583 | } | ||
584 | |||
585 | ok = true; | ||
586 | } | ||
587 | } | ||
588 | |||
589 | if ( !ok ) | ||
590 | { | ||
591 | // in valid UTF7 we should have valid characters after '+' | ||
592 | return wxCONV_FAILED; | ||
593 | } | ||
594 | |||
595 | if (*psz == '-') | ||
596 | psz++; | ||
597 | } | ||
598 | } | ||
599 | |||
600 | if ( buf && (len < n) ) | ||
601 | *buf = '\0'; | ||
602 | |||
603 | return len; | ||
604 | } | ||
605 | |||
606 | // | ||
607 | // BASE64 encoding table | ||
608 | // | ||
609 | static const unsigned char utf7enb64[] = | ||
610 | { | ||
611 | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', | ||
612 | 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', | ||
613 | 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', | ||
614 | 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', | ||
615 | 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', | ||
616 | 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', | ||
617 | 'w', 'x', 'y', 'z', '0', '1', '2', '3', | ||
618 | '4', '5', '6', '7', '8', '9', '+', '/' | ||
619 | }; | ||
620 | |||
621 | // | ||
622 | // UTF-7 encoding table | ||
623 | // | ||
624 | // 0 - Set D (directly encoded characters) | ||
625 | // 1 - Set O (optional direct characters) | ||
626 | // 2 - whitespace characters (optional) | ||
627 | // 3 - special characters | ||
628 | // | ||
629 | static const unsigned char utf7encode[128] = | ||
630 | { | ||
631 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, | ||
632 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | ||
633 | 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3, | ||
634 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, | ||
635 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
636 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1, | ||
637 | 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
638 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3 | ||
639 | }; | ||
640 | |||
641 | size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const | ||
642 | { | ||
643 | size_t len = 0; | ||
644 | |||
645 | while (*psz && ((!buf) || (len < n))) | ||
646 | { | ||
647 | wchar_t cc = *psz++; | ||
648 | if (cc < 0x80 && utf7encode[cc] < 1) | ||
649 | { | ||
650 | // plain ASCII char | ||
651 | if (buf) | ||
652 | *buf++ = (char)cc; | ||
653 | |||
654 | len++; | ||
655 | } | ||
656 | #ifndef WC_UTF16 | ||
657 | else if (((wxUint32)cc) > 0xffff) | ||
658 | { | ||
659 | // no surrogate pair generation (yet?) | ||
660 | return wxCONV_FAILED; | ||
661 | } | ||
662 | #endif | ||
663 | else | ||
664 | { | ||
665 | if (buf) | ||
666 | *buf++ = '+'; | ||
667 | |||
668 | len++; | ||
669 | if (cc != '+') | ||
670 | { | ||
671 | // BASE64 encode string | ||
672 | unsigned int lsb, d, l; | ||
673 | for (d = 0, l = 0; /*nothing*/; psz++) | ||
674 | { | ||
675 | for (lsb = 0; lsb < 2; lsb ++) | ||
676 | { | ||
677 | d <<= 8; | ||
678 | d += lsb ? cc & 0xff : (cc & 0xff00) >> 8; | ||
679 | |||
680 | for (l += 8; l >= 6; ) | ||
681 | { | ||
682 | l -= 6; | ||
683 | if (buf) | ||
684 | *buf++ = utf7enb64[(d >> l) % 64]; | ||
685 | len++; | ||
686 | } | ||
687 | } | ||
688 | |||
689 | cc = *psz; | ||
690 | if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1)) | ||
691 | break; | ||
692 | } | ||
693 | |||
694 | if (l != 0) | ||
695 | { | ||
696 | if (buf) | ||
697 | *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64]; | ||
698 | |||
699 | len++; | ||
700 | } | ||
701 | } | ||
702 | |||
703 | if (buf) | ||
704 | *buf++ = '-'; | ||
705 | len++; | ||
706 | } | ||
707 | } | ||
708 | |||
709 | if (buf && (len < n)) | ||
710 | *buf = 0; | ||
711 | |||
712 | return len; | ||
713 | } | ||
714 | |||
715 | // ---------------------------------------------------------------------------- | ||
716 | // UTF-8 | ||
717 | // ---------------------------------------------------------------------------- | ||
718 | |||
719 | static wxUint32 utf8_max[]= | ||
720 | { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff }; | ||
721 | |||
722 | // boundaries of the private use area we use to (temporarily) remap invalid | ||
723 | // characters invalid in a UTF-8 encoded string | ||
724 | const wxUint32 wxUnicodePUA = 0x100000; | ||
725 | const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256; | ||
726 | |||
727 | size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const | ||
728 | { | ||
729 | size_t len = 0; | ||
730 | |||
731 | while (*psz && ((!buf) || (len < n))) | ||
732 | { | ||
733 | const char *opsz = psz; | ||
734 | bool invalid = false; | ||
735 | unsigned char cc = *psz++, fc = cc; | ||
736 | unsigned cnt; | ||
737 | for (cnt = 0; fc & 0x80; cnt++) | ||
738 | fc <<= 1; | ||
739 | |||
740 | if (!cnt) | ||
741 | { | ||
742 | // plain ASCII char | ||
743 | if (buf) | ||
744 | *buf++ = cc; | ||
745 | len++; | ||
746 | |||
747 | // escape the escape character for octal escapes | ||
748 | if ((m_options & MAP_INVALID_UTF8_TO_OCTAL) | ||
749 | && cc == '\\' && (!buf || len < n)) | ||
750 | { | ||
751 | if (buf) | ||
752 | *buf++ = cc; | ||
753 | len++; | ||
754 | } | ||
755 | } | ||
756 | else | ||
757 | { | ||
758 | cnt--; | ||
759 | if (!cnt) | ||
760 | { | ||
761 | // invalid UTF-8 sequence | ||
762 | invalid = true; | ||
763 | } | ||
764 | else | ||
765 | { | ||
766 | unsigned ocnt = cnt - 1; | ||
767 | wxUint32 res = cc & (0x3f >> cnt); | ||
768 | while (cnt--) | ||
769 | { | ||
770 | cc = *psz; | ||
771 | if ((cc & 0xC0) != 0x80) | ||
772 | { | ||
773 | // invalid UTF-8 sequence | ||
774 | invalid = true; | ||
775 | break; | ||
776 | } | ||
777 | |||
778 | psz++; | ||
779 | res = (res << 6) | (cc & 0x3f); | ||
780 | } | ||
781 | |||
782 | if (invalid || res <= utf8_max[ocnt]) | ||
783 | { | ||
784 | // illegal UTF-8 encoding | ||
785 | invalid = true; | ||
786 | } | ||
787 | else if ((m_options & MAP_INVALID_UTF8_TO_PUA) && | ||
788 | res >= wxUnicodePUA && res < wxUnicodePUAEnd) | ||
789 | { | ||
790 | // if one of our PUA characters turns up externally | ||
791 | // it must also be treated as an illegal sequence | ||
792 | // (a bit like you have to escape an escape character) | ||
793 | invalid = true; | ||
794 | } | ||
795 | else | ||
796 | { | ||
797 | #ifdef WC_UTF16 | ||
798 | // cast is ok because wchar_t == wxUuint16 if WC_UTF16 | ||
799 | size_t pa = encode_utf16(res, (wxUint16 *)buf); | ||
800 | if (pa == wxCONV_FAILED) | ||
801 | { | ||
802 | invalid = true; | ||
803 | } | ||
804 | else | ||
805 | { | ||
806 | if (buf) | ||
807 | buf += pa; | ||
808 | len += pa; | ||
809 | } | ||
810 | #else // !WC_UTF16 | ||
811 | if (buf) | ||
812 | *buf++ = (wchar_t)res; | ||
813 | len++; | ||
814 | #endif // WC_UTF16/!WC_UTF16 | ||
815 | } | ||
816 | } | ||
817 | |||
818 | if (invalid) | ||
819 | { | ||
820 | if (m_options & MAP_INVALID_UTF8_TO_PUA) | ||
821 | { | ||
822 | while (opsz < psz && (!buf || len < n)) | ||
823 | { | ||
824 | #ifdef WC_UTF16 | ||
825 | // cast is ok because wchar_t == wxUuint16 if WC_UTF16 | ||
826 | size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf); | ||
827 | wxASSERT(pa != wxCONV_FAILED); | ||
828 | if (buf) | ||
829 | buf += pa; | ||
830 | opsz++; | ||
831 | len += pa; | ||
832 | #else | ||
833 | if (buf) | ||
834 | *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz); | ||
835 | opsz++; | ||
836 | len++; | ||
837 | #endif | ||
838 | } | ||
839 | } | ||
840 | else if (m_options & MAP_INVALID_UTF8_TO_OCTAL) | ||
841 | { | ||
842 | while (opsz < psz && (!buf || len < n)) | ||
843 | { | ||
844 | if ( buf && len + 3 < n ) | ||
845 | { | ||
846 | unsigned char on = *opsz; | ||
847 | *buf++ = L'\\'; | ||
848 | *buf++ = (wchar_t)( L'0' + on / 0100 ); | ||
849 | *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 ); | ||
850 | *buf++ = (wchar_t)( L'0' + on % 010 ); | ||
851 | } | ||
852 | |||
853 | opsz++; | ||
854 | len += 4; | ||
855 | } | ||
856 | } | ||
857 | else // MAP_INVALID_UTF8_NOT | ||
858 | { | ||
859 | return wxCONV_FAILED; | ||
860 | } | ||
861 | } | ||
862 | } | ||
863 | } | ||
864 | |||
865 | if (buf && (len < n)) | ||
866 | *buf = 0; | ||
867 | |||
868 | return len; | ||
869 | } | ||
870 | |||
871 | static inline bool isoctal(wchar_t wch) | ||
872 | { | ||
873 | return L'0' <= wch && wch <= L'7'; | ||
874 | } | ||
875 | |||
876 | size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const | ||
877 | { | ||
878 | size_t len = 0; | ||
879 | |||
880 | while (*psz && ((!buf) || (len < n))) | ||
881 | { | ||
882 | wxUint32 cc; | ||
883 | |||
884 | #ifdef WC_UTF16 | ||
885 | // cast is ok for WC_UTF16 | ||
886 | size_t pa = decode_utf16((const wxUint16 *)psz, cc); | ||
887 | psz += (pa == wxCONV_FAILED) ? 1 : pa; | ||
888 | #else | ||
889 | cc = (*psz++) & 0x7fffffff; | ||
890 | #endif | ||
891 | |||
892 | if ( (m_options & MAP_INVALID_UTF8_TO_PUA) | ||
893 | && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd ) | ||
894 | { | ||
895 | if (buf) | ||
896 | *buf++ = (char)(cc - wxUnicodePUA); | ||
897 | len++; | ||
898 | } | ||
899 | else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) | ||
900 | && cc == L'\\' && psz[0] == L'\\' ) | ||
901 | { | ||
902 | if (buf) | ||
903 | *buf++ = (char)cc; | ||
904 | psz++; | ||
905 | len++; | ||
906 | } | ||
907 | else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) && | ||
908 | cc == L'\\' && | ||
909 | isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) ) | ||
910 | { | ||
911 | if (buf) | ||
912 | { | ||
913 | *buf++ = (char) ((psz[0] - L'0') * 0100 + | ||
914 | (psz[1] - L'0') * 010 + | ||
915 | (psz[2] - L'0')); | ||
916 | } | ||
917 | |||
918 | psz += 3; | ||
919 | len++; | ||
920 | } | ||
921 | else | ||
922 | { | ||
923 | unsigned cnt; | ||
924 | for (cnt = 0; cc > utf8_max[cnt]; cnt++) | ||
925 | { | ||
926 | } | ||
927 | |||
928 | if (!cnt) | ||
929 | { | ||
930 | // plain ASCII char | ||
931 | if (buf) | ||
932 | *buf++ = (char) cc; | ||
933 | len++; | ||
934 | } | ||
935 | else | ||
936 | { | ||
937 | len += cnt + 1; | ||
938 | if (buf) | ||
939 | { | ||
940 | *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt))); | ||
941 | while (cnt--) | ||
942 | *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f)); | ||
943 | } | ||
944 | } | ||
945 | } | ||
946 | } | ||
947 | |||
948 | if (buf && (len < n)) | ||
949 | *buf = 0; | ||
950 | |||
951 | return len; | ||
952 | } | ||
953 | |||
954 | // ============================================================================ | ||
955 | // UTF-16 | ||
956 | // ============================================================================ | ||
957 | |||
958 | #ifdef WORDS_BIGENDIAN | ||
959 | #define wxMBConvUTF16straight wxMBConvUTF16BE | ||
960 | #define wxMBConvUTF16swap wxMBConvUTF16LE | ||
961 | #else | ||
962 | #define wxMBConvUTF16swap wxMBConvUTF16BE | ||
963 | #define wxMBConvUTF16straight wxMBConvUTF16LE | ||
964 | #endif | ||
965 | |||
966 | /* static */ | ||
967 | size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen) | ||
968 | { | ||
969 | if ( srcLen == wxNO_LEN ) | ||
970 | { | ||
971 | // count the number of bytes in input, including the trailing NULs | ||
972 | const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src); | ||
973 | for ( srcLen = 1; *inBuff++; srcLen++ ) | ||
974 | ; | ||
975 | |||
976 | srcLen *= BYTES_PER_CHAR; | ||
977 | } | ||
978 | else // we already have the length | ||
979 | { | ||
980 | // we can only convert an entire number of UTF-16 characters | ||
981 | if ( srcLen % BYTES_PER_CHAR ) | ||
982 | return wxCONV_FAILED; | ||
983 | } | ||
984 | |||
985 | return srcLen; | ||
986 | } | ||
987 | |||
988 | // case when in-memory representation is UTF-16 too | ||
989 | #ifdef WC_UTF16 | ||
990 | |||
991 | // ---------------------------------------------------------------------------- | ||
992 | // conversions without endianness change | ||
993 | // ---------------------------------------------------------------------------- | ||
994 | |||
995 | size_t | ||
996 | wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen, | ||
997 | const char *src, size_t srcLen) const | ||
998 | { | ||
999 | // set up the scene for using memcpy() (which is presumably more efficient | ||
1000 | // than copying the bytes one by one) | ||
1001 | srcLen = GetLength(src, srcLen); | ||
1002 | if ( srcLen == wxNO_LEN ) | ||
1003 | return wxCONV_FAILED; | ||
1004 | |||
1005 | const size_t inLen = srcLen / BYTES_PER_CHAR; | ||
1006 | if ( dst ) | ||
1007 | { | ||
1008 | if ( dstLen < inLen ) | ||
1009 | return wxCONV_FAILED; | ||
1010 | |||
1011 | memcpy(dst, src, srcLen); | ||
1012 | } | ||
1013 | |||
1014 | return inLen; | ||
1015 | } | ||
1016 | |||
1017 | size_t | ||
1018 | wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen, | ||
1019 | const wchar_t *src, size_t srcLen) const | ||
1020 | { | ||
1021 | if ( srcLen == wxNO_LEN ) | ||
1022 | srcLen = wxWcslen(src) + 1; | ||
1023 | |||
1024 | srcLen *= BYTES_PER_CHAR; | ||
1025 | |||
1026 | if ( dst ) | ||
1027 | { | ||
1028 | if ( dstLen < srcLen ) | ||
1029 | return wxCONV_FAILED; | ||
1030 | |||
1031 | memcpy(dst, src, srcLen); | ||
1032 | } | ||
1033 | |||
1034 | return srcLen; | ||
1035 | } | ||
1036 | |||
1037 | // ---------------------------------------------------------------------------- | ||
1038 | // endian-reversing conversions | ||
1039 | // ---------------------------------------------------------------------------- | ||
1040 | |||
1041 | size_t | ||
1042 | wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen, | ||
1043 | const char *src, size_t srcLen) const | ||
1044 | { | ||
1045 | srcLen = GetLength(src, srcLen); | ||
1046 | if ( srcLen == wxNO_LEN ) | ||
1047 | return wxCONV_FAILED; | ||
1048 | |||
1049 | srcLen /= BYTES_PER_CHAR; | ||
1050 | |||
1051 | if ( dst ) | ||
1052 | { | ||
1053 | if ( dstLen < srcLen ) | ||
1054 | return wxCONV_FAILED; | ||
1055 | |||
1056 | const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src); | ||
1057 | for ( size_t n = 0; n < srcLen; n++, inBuff++ ) | ||
1058 | { | ||
1059 | *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff); | ||
1060 | } | ||
1061 | } | ||
1062 | |||
1063 | return srcLen; | ||
1064 | } | ||
1065 | |||
1066 | size_t | ||
1067 | wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen, | ||
1068 | const wchar_t *src, size_t srcLen) const | ||
1069 | { | ||
1070 | if ( srcLen == wxNO_LEN ) | ||
1071 | srcLen = wxWcslen(src) + 1; | ||
1072 | |||
1073 | srcLen *= BYTES_PER_CHAR; | ||
1074 | |||
1075 | if ( dst ) | ||
1076 | { | ||
1077 | if ( dstLen < srcLen ) | ||
1078 | return wxCONV_FAILED; | ||
1079 | |||
1080 | wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst); | ||
1081 | for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ ) | ||
1082 | { | ||
1083 | *outBuff++ = wxUINT16_SWAP_ALWAYS(*src); | ||
1084 | } | ||
1085 | } | ||
1086 | |||
1087 | return srcLen; | ||
1088 | } | ||
1089 | |||
1090 | #else // !WC_UTF16: wchar_t is UTF-32 | ||
1091 | |||
1092 | // ---------------------------------------------------------------------------- | ||
1093 | // conversions without endianness change | ||
1094 | // ---------------------------------------------------------------------------- | ||
1095 | |||
1096 | size_t | ||
1097 | wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen, | ||
1098 | const char *src, size_t srcLen) const | ||
1099 | { | ||
1100 | srcLen = GetLength(src, srcLen); | ||
1101 | if ( srcLen == wxNO_LEN ) | ||
1102 | return wxCONV_FAILED; | ||
1103 | |||
1104 | const size_t inLen = srcLen / BYTES_PER_CHAR; | ||
1105 | if ( !dst ) | ||
1106 | { | ||
1107 | // optimization: return maximal space which could be needed for this | ||
1108 | // string even if the real size could be smaller if the buffer contains | ||
1109 | // any surrogates | ||
1110 | return inLen; | ||
1111 | } | ||
1112 | |||
1113 | size_t outLen = 0; | ||
1114 | const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src); | ||
1115 | for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; ) | ||
1116 | { | ||
1117 | const wxUint32 ch = wxDecodeSurrogate(&inBuff); | ||
1118 | if ( !inBuff ) | ||
1119 | return wxCONV_FAILED; | ||
1120 | |||
1121 | if ( ++outLen > dstLen ) | ||
1122 | return wxCONV_FAILED; | ||
1123 | |||
1124 | *dst++ = ch; | ||
1125 | } | ||
1126 | |||
1127 | |||
1128 | return outLen; | ||
1129 | } | ||
1130 | |||
1131 | size_t | ||
1132 | wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen, | ||
1133 | const wchar_t *src, size_t srcLen) const | ||
1134 | { | ||
1135 | if ( srcLen == wxNO_LEN ) | ||
1136 | srcLen = wxWcslen(src) + 1; | ||
1137 | |||
1138 | size_t outLen = 0; | ||
1139 | wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst); | ||
1140 | for ( size_t n = 0; n < srcLen; n++ ) | ||
1141 | { | ||
1142 | wxUint16 cc[2]; | ||
1143 | const size_t numChars = encode_utf16(*src++, cc); | ||
1144 | if ( numChars == wxCONV_FAILED ) | ||
1145 | return wxCONV_FAILED; | ||
1146 | |||
1147 | outLen += numChars * BYTES_PER_CHAR; | ||
1148 | if ( outBuff ) | ||
1149 | { | ||
1150 | if ( outLen > dstLen ) | ||
1151 | return wxCONV_FAILED; | ||
1152 | |||
1153 | *outBuff++ = cc[0]; | ||
1154 | if ( numChars == 2 ) | ||
1155 | { | ||
1156 | // second character of a surrogate | ||
1157 | *outBuff++ = cc[1]; | ||
1158 | } | ||
1159 | } | ||
1160 | } | ||
1161 | |||
1162 | return outLen; | ||
1163 | } | ||
1164 | |||
1165 | // ---------------------------------------------------------------------------- | ||
1166 | // endian-reversing conversions | ||
1167 | // ---------------------------------------------------------------------------- | ||
1168 | |||
1169 | size_t | ||
1170 | wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen, | ||
1171 | const char *src, size_t srcLen) const | ||
1172 | { | ||
1173 | srcLen = GetLength(src, srcLen); | ||
1174 | if ( srcLen == wxNO_LEN ) | ||
1175 | return wxCONV_FAILED; | ||
1176 | |||
1177 | const size_t inLen = srcLen / BYTES_PER_CHAR; | ||
1178 | if ( !dst ) | ||
1179 | { | ||
1180 | // optimization: return maximal space which could be needed for this | ||
1181 | // string even if the real size could be smaller if the buffer contains | ||
1182 | // any surrogates | ||
1183 | return inLen; | ||
1184 | } | ||
1185 | |||
1186 | size_t outLen = 0; | ||
1187 | const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src); | ||
1188 | for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; ) | ||
1189 | { | ||
1190 | wxUint32 ch; | ||
1191 | wxUint16 tmp[2]; | ||
1192 | |||
1193 | tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff); | ||
1194 | inBuff++; | ||
1195 | tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff); | ||
1196 | |||
1197 | const size_t numChars = decode_utf16(tmp, ch); | ||
1198 | if ( numChars == wxCONV_FAILED ) | ||
1199 | return wxCONV_FAILED; | ||
1200 | |||
1201 | if ( numChars == 2 ) | ||
1202 | inBuff++; | ||
1203 | |||
1204 | if ( ++outLen > dstLen ) | ||
1205 | return wxCONV_FAILED; | ||
1206 | |||
1207 | *dst++ = ch; | ||
1208 | } | ||
1209 | |||
1210 | |||
1211 | return outLen; | ||
1212 | } | ||
1213 | |||
1214 | size_t | ||
1215 | wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen, | ||
1216 | const wchar_t *src, size_t srcLen) const | ||
1217 | { | ||
1218 | if ( srcLen == wxNO_LEN ) | ||
1219 | srcLen = wxWcslen(src) + 1; | ||
1220 | |||
1221 | size_t outLen = 0; | ||
1222 | wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst); | ||
1223 | for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ ) | ||
1224 | { | ||
1225 | wxUint16 cc[2]; | ||
1226 | const size_t numChars = encode_utf16(*src, cc); | ||
1227 | if ( numChars == wxCONV_FAILED ) | ||
1228 | return wxCONV_FAILED; | ||
1229 | |||
1230 | outLen += numChars * BYTES_PER_CHAR; | ||
1231 | if ( outBuff ) | ||
1232 | { | ||
1233 | if ( outLen > dstLen ) | ||
1234 | return wxCONV_FAILED; | ||
1235 | |||
1236 | *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]); | ||
1237 | if ( numChars == 2 ) | ||
1238 | { | ||
1239 | // second character of a surrogate | ||
1240 | *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]); | ||
1241 | } | ||
1242 | } | ||
1243 | } | ||
1244 | |||
1245 | return outLen; | ||
1246 | } | ||
1247 | |||
1248 | #endif // WC_UTF16/!WC_UTF16 | ||
1249 | |||
1250 | |||
1251 | // ============================================================================ | ||
1252 | // UTF-32 | ||
1253 | // ============================================================================ | ||
1254 | |||
1255 | #ifdef WORDS_BIGENDIAN | ||
1256 | #define wxMBConvUTF32straight wxMBConvUTF32BE | ||
1257 | #define wxMBConvUTF32swap wxMBConvUTF32LE | ||
1258 | #else | ||
1259 | #define wxMBConvUTF32swap wxMBConvUTF32BE | ||
1260 | #define wxMBConvUTF32straight wxMBConvUTF32LE | ||
1261 | #endif | ||
1262 | |||
1263 | |||
1264 | WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE; | ||
1265 | WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE; | ||
1266 | |||
1267 | /* static */ | ||
1268 | size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen) | ||
1269 | { | ||
1270 | if ( srcLen == wxNO_LEN ) | ||
1271 | { | ||
1272 | // count the number of bytes in input, including the trailing NULs | ||
1273 | const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src); | ||
1274 | for ( srcLen = 1; *inBuff++; srcLen++ ) | ||
1275 | ; | ||
1276 | |||
1277 | srcLen *= BYTES_PER_CHAR; | ||
1278 | } | ||
1279 | else // we already have the length | ||
1280 | { | ||
1281 | // we can only convert an entire number of UTF-32 characters | ||
1282 | if ( srcLen % BYTES_PER_CHAR ) | ||
1283 | return wxCONV_FAILED; | ||
1284 | } | ||
1285 | |||
1286 | return srcLen; | ||
1287 | } | ||
1288 | |||
1289 | // case when in-memory representation is UTF-16 | ||
1290 | #ifdef WC_UTF16 | ||
1291 | |||
1292 | // ---------------------------------------------------------------------------- | ||
1293 | // conversions without endianness change | ||
1294 | // ---------------------------------------------------------------------------- | ||
1295 | |||
1296 | size_t | ||
1297 | wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen, | ||
1298 | const char *src, size_t srcLen) const | ||
1299 | { | ||
1300 | srcLen = GetLength(src, srcLen); | ||
1301 | if ( srcLen == wxNO_LEN ) | ||
1302 | return wxCONV_FAILED; | ||
1303 | |||
1304 | const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src); | ||
1305 | const size_t inLen = srcLen / BYTES_PER_CHAR; | ||
1306 | size_t outLen = 0; | ||
1307 | for ( size_t n = 0; n < inLen; n++ ) | ||
1308 | { | ||
1309 | wxUint16 cc[2]; | ||
1310 | const size_t numChars = encode_utf16(*inBuff++, cc); | ||
1311 | if ( numChars == wxCONV_FAILED ) | ||
1312 | return wxCONV_FAILED; | ||
1313 | |||
1314 | outLen += numChars; | ||
1315 | if ( dst ) | ||
1316 | { | ||
1317 | if ( outLen > dstLen ) | ||
1318 | return wxCONV_FAILED; | ||
1319 | |||
1320 | *dst++ = cc[0]; | ||
1321 | if ( numChars == 2 ) | ||
1322 | { | ||
1323 | // second character of a surrogate | ||
1324 | *dst++ = cc[1]; | ||
1325 | } | ||
1326 | } | ||
1327 | } | ||
1328 | |||
1329 | return outLen; | ||
1330 | } | ||
1331 | |||
1332 | size_t | ||
1333 | wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen, | ||
1334 | const wchar_t *src, size_t srcLen) const | ||
1335 | { | ||
1336 | if ( srcLen == wxNO_LEN ) | ||
1337 | srcLen = wxWcslen(src) + 1; | ||
1338 | |||
1339 | if ( !dst ) | ||
1340 | { | ||
1341 | // optimization: return maximal space which could be needed for this | ||
1342 | // string instead of the exact amount which could be less if there are | ||
1343 | // any surrogates in the input | ||
1344 | // | ||
1345 | // we consider that surrogates are rare enough to make it worthwhile to | ||
1346 | // avoid running the loop below at the cost of slightly extra memory | ||
1347 | // consumption | ||
1348 | return srcLen * BYTES_PER_CHAR; | ||
1349 | } | ||
1350 | |||
1351 | wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst); | ||
1352 | size_t outLen = 0; | ||
1353 | for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; ) | ||
1354 | { | ||
1355 | const wxUint32 ch = wxDecodeSurrogate(&src); | ||
1356 | if ( !src ) | ||
1357 | return wxCONV_FAILED; | ||
1358 | |||
1359 | outLen += BYTES_PER_CHAR; | ||
1360 | |||
1361 | if ( outLen > dstLen ) | ||
1362 | return wxCONV_FAILED; | ||
1363 | |||
1364 | *outBuff++ = ch; | ||
1365 | } | ||
1366 | |||
1367 | return outLen; | ||
1368 | } | ||
1369 | |||
1370 | // ---------------------------------------------------------------------------- | ||
1371 | // endian-reversing conversions | ||
1372 | // ---------------------------------------------------------------------------- | ||
1373 | |||
1374 | size_t | ||
1375 | wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen, | ||
1376 | const char *src, size_t srcLen) const | ||
1377 | { | ||
1378 | srcLen = GetLength(src, srcLen); | ||
1379 | if ( srcLen == wxNO_LEN ) | ||
1380 | return wxCONV_FAILED; | ||
1381 | |||
1382 | const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src); | ||
1383 | const size_t inLen = srcLen / BYTES_PER_CHAR; | ||
1384 | size_t outLen = 0; | ||
1385 | for ( size_t n = 0; n < inLen; n++, inBuff++ ) | ||
1386 | { | ||
1387 | wxUint16 cc[2]; | ||
1388 | const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc); | ||
1389 | if ( numChars == wxCONV_FAILED ) | ||
1390 | return wxCONV_FAILED; | ||
1391 | |||
1392 | outLen += numChars; | ||
1393 | if ( dst ) | ||
1394 | { | ||
1395 | if ( outLen > dstLen ) | ||
1396 | return wxCONV_FAILED; | ||
1397 | |||
1398 | *dst++ = cc[0]; | ||
1399 | if ( numChars == 2 ) | ||
1400 | { | ||
1401 | // second character of a surrogate | ||
1402 | *dst++ = cc[1]; | ||
1403 | } | ||
1404 | } | ||
1405 | } | ||
1406 | |||
1407 | return outLen; | ||
1408 | } | ||
1409 | |||
1410 | size_t | ||
1411 | wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen, | ||
1412 | const wchar_t *src, size_t srcLen) const | ||
1413 | { | ||
1414 | if ( srcLen == wxNO_LEN ) | ||
1415 | srcLen = wxWcslen(src) + 1; | ||
1416 | |||
1417 | if ( !dst ) | ||
1418 | { | ||
1419 | // optimization: return maximal space which could be needed for this | ||
1420 | // string instead of the exact amount which could be less if there are | ||
1421 | // any surrogates in the input | ||
1422 | // | ||
1423 | // we consider that surrogates are rare enough to make it worthwhile to | ||
1424 | // avoid running the loop below at the cost of slightly extra memory | ||
1425 | // consumption | ||
1426 | return srcLen*BYTES_PER_CHAR; | ||
1427 | } | ||
1428 | |||
1429 | wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst); | ||
1430 | size_t outLen = 0; | ||
1431 | for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; ) | ||
1432 | { | ||
1433 | const wxUint32 ch = wxDecodeSurrogate(&src); | ||
1434 | if ( !src ) | ||
1435 | return wxCONV_FAILED; | ||
1436 | |||
1437 | outLen += BYTES_PER_CHAR; | ||
1438 | |||
1439 | if ( outLen > dstLen ) | ||
1440 | return wxCONV_FAILED; | ||
1441 | |||
1442 | *outBuff++ = wxUINT32_SWAP_ALWAYS(ch); | ||
1443 | } | ||
1444 | |||
1445 | return outLen; | ||
1446 | } | ||
1447 | |||
1448 | #else // !WC_UTF16: wchar_t is UTF-32 | ||
1449 | |||
1450 | // ---------------------------------------------------------------------------- | ||
1451 | // conversions without endianness change | ||
1452 | // ---------------------------------------------------------------------------- | ||
1453 | |||
1454 | size_t | ||
1455 | wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen, | ||
1456 | const char *src, size_t srcLen) const | ||
1457 | { | ||
1458 | // use memcpy() as it should be much faster than hand-written loop | ||
1459 | srcLen = GetLength(src, srcLen); | ||
1460 | if ( srcLen == wxNO_LEN ) | ||
1461 | return wxCONV_FAILED; | ||
1462 | |||
1463 | const size_t inLen = srcLen/BYTES_PER_CHAR; | ||
1464 | if ( dst ) | ||
1465 | { | ||
1466 | if ( dstLen < inLen ) | ||
1467 | return wxCONV_FAILED; | ||
1468 | |||
1469 | memcpy(dst, src, srcLen); | ||
1470 | } | ||
1471 | |||
1472 | return inLen; | ||
1473 | } | ||
1474 | |||
1475 | size_t | ||
1476 | wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen, | ||
1477 | const wchar_t *src, size_t srcLen) const | ||
1478 | { | ||
1479 | if ( srcLen == wxNO_LEN ) | ||
1480 | srcLen = wxWcslen(src) + 1; | ||
1481 | |||
1482 | srcLen *= BYTES_PER_CHAR; | ||
1483 | |||
1484 | if ( dst ) | ||
1485 | { | ||
1486 | if ( dstLen < srcLen ) | ||
1487 | return wxCONV_FAILED; | ||
1488 | |||
1489 | memcpy(dst, src, srcLen); | ||
1490 | } | ||
1491 | |||
1492 | return srcLen; | ||
1493 | } | ||
1494 | |||
1495 | // ---------------------------------------------------------------------------- | ||
1496 | // endian-reversing conversions | ||
1497 | // ---------------------------------------------------------------------------- | ||
1498 | |||
1499 | size_t | ||
1500 | wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen, | ||
1501 | const char *src, size_t srcLen) const | ||
1502 | { | ||
1503 | srcLen = GetLength(src, srcLen); | ||
1504 | if ( srcLen == wxNO_LEN ) | ||
1505 | return wxCONV_FAILED; | ||
1506 | |||
1507 | srcLen /= BYTES_PER_CHAR; | ||
1508 | |||
1509 | if ( dst ) | ||
1510 | { | ||
1511 | if ( dstLen < srcLen ) | ||
1512 | return wxCONV_FAILED; | ||
1513 | |||
1514 | const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src); | ||
1515 | for ( size_t n = 0; n < srcLen; n++, inBuff++ ) | ||
1516 | { | ||
1517 | *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff); | ||
1518 | } | ||
1519 | } | ||
1520 | |||
1521 | return srcLen; | ||
1522 | } | ||
1523 | |||
1524 | size_t | ||
1525 | wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen, | ||
1526 | const wchar_t *src, size_t srcLen) const | ||
1527 | { | ||
1528 | if ( srcLen == wxNO_LEN ) | ||
1529 | srcLen = wxWcslen(src) + 1; | ||
1530 | |||
1531 | srcLen *= BYTES_PER_CHAR; | ||
1532 | |||
1533 | if ( dst ) | ||
1534 | { | ||
1535 | if ( dstLen < srcLen ) | ||
1536 | return wxCONV_FAILED; | ||
1537 | |||
1538 | wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst); | ||
1539 | for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ ) | ||
1540 | { | ||
1541 | *outBuff++ = wxUINT32_SWAP_ALWAYS(*src); | ||
1542 | } | ||
1543 | } | ||
1544 | |||
1545 | return srcLen; | ||
1546 | } | ||
1547 | |||
1548 | #endif // WC_UTF16/!WC_UTF16 | ||
1549 | |||
1550 | |||
1551 | // ============================================================================ | ||
1552 | // The classes doing conversion using the iconv_xxx() functions | ||
1553 | // ============================================================================ | ||
1554 | |||
1555 | #ifdef HAVE_ICONV | ||
1556 | |||
1557 | // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with | ||
1558 | // E2BIG if output buffer is _exactly_ as big as needed. Such case is | ||
1559 | // (unless there's yet another bug in glibc) the only case when iconv() | ||
1560 | // returns with (size_t)-1 (which means error) and says there are 0 bytes | ||
1561 | // left in the input buffer -- when _real_ error occurs, | ||
1562 | // bytes-left-in-input buffer is non-zero. Hence, this alternative test for | ||
1563 | // iconv() failure. | ||
1564 | // [This bug does not appear in glibc 2.2.] | ||
1565 | #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1 | ||
1566 | #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \ | ||
1567 | (errno != E2BIG || bufLeft != 0)) | ||
1568 | #else | ||
1569 | #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1) | ||
1570 | #endif | ||
1571 | |||
1572 | #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x)) | ||
1573 | |||
1574 | #define ICONV_T_INVALID ((iconv_t)-1) | ||
1575 | |||
1576 | #if SIZEOF_WCHAR_T == 4 | ||
1577 | #define WC_BSWAP wxUINT32_SWAP_ALWAYS | ||
1578 | #define WC_ENC wxFONTENCODING_UTF32 | ||
1579 | #elif SIZEOF_WCHAR_T == 2 | ||
1580 | #define WC_BSWAP wxUINT16_SWAP_ALWAYS | ||
1581 | #define WC_ENC wxFONTENCODING_UTF16 | ||
1582 | #else // sizeof(wchar_t) != 2 nor 4 | ||
1583 | // does this ever happen? | ||
1584 | #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org" | ||
1585 | #endif | ||
1586 | |||
1587 | // ---------------------------------------------------------------------------- | ||
1588 | // wxMBConv_iconv: encapsulates an iconv character set | ||
1589 | // ---------------------------------------------------------------------------- | ||
1590 | |||
1591 | class wxMBConv_iconv : public wxMBConv | ||
1592 | { | ||
1593 | public: | ||
1594 | wxMBConv_iconv(const wxChar *name); | ||
1595 | virtual ~wxMBConv_iconv(); | ||
1596 | |||
1597 | virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const; | ||
1598 | virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const; | ||
1599 | |||
1600 | // classify this encoding as explained in wxMBConv::GetMBNulLen() comment | ||
1601 | virtual size_t GetMBNulLen() const; | ||
1602 | |||
1603 | virtual wxMBConv *Clone() const | ||
1604 | { | ||
1605 | wxMBConv_iconv *p = new wxMBConv_iconv(m_name); | ||
1606 | p->m_minMBCharWidth = m_minMBCharWidth; | ||
1607 | return p; | ||
1608 | } | ||
1609 | |||
1610 | bool IsOk() const | ||
1611 | { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); } | ||
1612 | |||
1613 | protected: | ||
1614 | // the iconv handlers used to translate from multibyte | ||
1615 | // to wide char and in the other direction | ||
1616 | iconv_t m2w, | ||
1617 | w2m; | ||
1618 | |||
1619 | #if wxUSE_THREADS | ||
1620 | // guards access to m2w and w2m objects | ||
1621 | wxMutex m_iconvMutex; | ||
1622 | #endif | ||
1623 | |||
1624 | private: | ||
1625 | // the name (for iconv_open()) of a wide char charset -- if none is | ||
1626 | // available on this machine, it will remain NULL | ||
1627 | static wxString ms_wcCharsetName; | ||
1628 | |||
1629 | // true if the wide char encoding we use (i.e. ms_wcCharsetName) has | ||
1630 | // different endian-ness than the native one | ||
1631 | static bool ms_wcNeedsSwap; | ||
1632 | |||
1633 | |||
1634 | // name of the encoding handled by this conversion | ||
1635 | wxString m_name; | ||
1636 | |||
1637 | // cached result of GetMBNulLen(); set to 0 meaning "unknown" | ||
1638 | // initially | ||
1639 | size_t m_minMBCharWidth; | ||
1640 | }; | ||
1641 | |||
1642 | // make the constructor available for unit testing | ||
1643 | WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name ) | ||
1644 | { | ||
1645 | wxMBConv_iconv* result = new wxMBConv_iconv( name ); | ||
1646 | if ( !result->IsOk() ) | ||
1647 | { | ||
1648 | delete result; | ||
1649 | return 0; | ||
1650 | } | ||
1651 | |||
1652 | return result; | ||
1653 | } | ||
1654 | |||
1655 | wxString wxMBConv_iconv::ms_wcCharsetName; | ||
1656 | bool wxMBConv_iconv::ms_wcNeedsSwap = false; | ||
1657 | |||
1658 | wxMBConv_iconv::wxMBConv_iconv(const wxChar *name) | ||
1659 | : m_name(name) | ||
1660 | { | ||
1661 | m_minMBCharWidth = 0; | ||
1662 | |||
1663 | // iconv operates with chars, not wxChars, but luckily it uses only ASCII | ||
1664 | // names for the charsets | ||
1665 | const wxCharBuffer cname(wxString(name).ToAscii()); | ||
1666 | |||
1667 | // check for charset that represents wchar_t: | ||
1668 | if ( ms_wcCharsetName.empty() ) | ||
1669 | { | ||
1670 | wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:")); | ||
1671 | |||
1672 | #if wxUSE_FONTMAP | ||
1673 | const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC); | ||
1674 | #else // !wxUSE_FONTMAP | ||
1675 | static const wxChar *names_static[] = | ||
1676 | { | ||
1677 | #if SIZEOF_WCHAR_T == 4 | ||
1678 | _T("UCS-4"), | ||
1679 | #elif SIZEOF_WCHAR_T = 2 | ||
1680 | _T("UCS-2"), | ||
1681 | #endif | ||
1682 | NULL | ||
1683 | }; | ||
1684 | const wxChar **names = names_static; | ||
1685 | #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP | ||
1686 | |||
1687 | for ( ; *names && ms_wcCharsetName.empty(); ++names ) | ||
1688 | { | ||
1689 | const wxString nameCS(*names); | ||
1690 | |||
1691 | // first try charset with explicit bytesex info (e.g. "UCS-4LE"): | ||
1692 | wxString nameXE(nameCS); | ||
1693 | |||
1694 | #ifdef WORDS_BIGENDIAN | ||
1695 | nameXE += _T("BE"); | ||
1696 | #else // little endian | ||
1697 | nameXE += _T("LE"); | ||
1698 | #endif | ||
1699 | |||
1700 | wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), | ||
1701 | nameXE.c_str()); | ||
1702 | |||
1703 | m2w = iconv_open(nameXE.ToAscii(), cname); | ||
1704 | if ( m2w == ICONV_T_INVALID ) | ||
1705 | { | ||
1706 | // try charset w/o bytesex info (e.g. "UCS4") | ||
1707 | wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""), | ||
1708 | nameCS.c_str()); | ||
1709 | m2w = iconv_open(nameCS.ToAscii(), cname); | ||
1710 | |||
1711 | // and check for bytesex ourselves: | ||
1712 | if ( m2w != ICONV_T_INVALID ) | ||
1713 | { | ||
1714 | char buf[2], *bufPtr; | ||
1715 | wchar_t wbuf[2], *wbufPtr; | ||
1716 | size_t insz, outsz; | ||
1717 | size_t res; | ||
1718 | |||
1719 | buf[0] = 'A'; | ||
1720 | buf[1] = 0; | ||
1721 | wbuf[0] = 0; | ||
1722 | insz = 2; | ||
1723 | outsz = SIZEOF_WCHAR_T * 2; | ||
1724 | wbufPtr = wbuf; | ||
1725 | bufPtr = buf; | ||
1726 | |||
1727 | res = iconv( | ||
1728 | m2w, ICONV_CHAR_CAST(&bufPtr), &insz, | ||
1729 | (char**)&wbufPtr, &outsz); | ||
1730 | |||
1731 | if (ICONV_FAILED(res, insz)) | ||
1732 | { | ||
1733 | wxLogLastError(wxT("iconv")); | ||
1734 | wxLogError(_("Conversion to charset '%s' doesn't work."), | ||
1735 | nameCS.c_str()); | ||
1736 | } | ||
1737 | else // ok, can convert to this encoding, remember it | ||
1738 | { | ||
1739 | ms_wcCharsetName = nameCS; | ||
1740 | ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0]; | ||
1741 | } | ||
1742 | } | ||
1743 | } | ||
1744 | else // use charset not requiring byte swapping | ||
1745 | { | ||
1746 | ms_wcCharsetName = nameXE; | ||
1747 | } | ||
1748 | } | ||
1749 | |||
1750 | wxLogTrace(TRACE_STRCONV, | ||
1751 | wxT("iconv wchar_t charset is \"%s\"%s"), | ||
1752 | ms_wcCharsetName.empty() ? _T("<none>") | ||
1753 | : ms_wcCharsetName.c_str(), | ||
1754 | ms_wcNeedsSwap ? _T(" (needs swap)") | ||
1755 | : _T("")); | ||
1756 | } | ||
1757 | else // we already have ms_wcCharsetName | ||
1758 | { | ||
1759 | m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname); | ||
1760 | } | ||
1761 | |||
1762 | if ( ms_wcCharsetName.empty() ) | ||
1763 | { | ||
1764 | w2m = ICONV_T_INVALID; | ||
1765 | } | ||
1766 | else | ||
1767 | { | ||
1768 | w2m = iconv_open(cname, ms_wcCharsetName.ToAscii()); | ||
1769 | if ( w2m == ICONV_T_INVALID ) | ||
1770 | { | ||
1771 | wxLogTrace(TRACE_STRCONV, | ||
1772 | wxT("\"%s\" -> \"%s\" works but not the converse!?"), | ||
1773 | ms_wcCharsetName.c_str(), cname.data()); | ||
1774 | } | ||
1775 | } | ||
1776 | } | ||
1777 | |||
1778 | wxMBConv_iconv::~wxMBConv_iconv() | ||
1779 | { | ||
1780 | if ( m2w != ICONV_T_INVALID ) | ||
1781 | iconv_close(m2w); | ||
1782 | if ( w2m != ICONV_T_INVALID ) | ||
1783 | iconv_close(w2m); | ||
1784 | } | ||
1785 | |||
1786 | size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const | ||
1787 | { | ||
1788 | // find the string length: notice that must be done differently for | ||
1789 | // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs | ||
1790 | size_t inbuf; | ||
1791 | const size_t nulLen = GetMBNulLen(); | ||
1792 | switch ( nulLen ) | ||
1793 | { | ||
1794 | default: | ||
1795 | return wxCONV_FAILED; | ||
1796 | |||
1797 | case 1: | ||
1798 | inbuf = strlen(psz); // arguably more optimized than our version | ||
1799 | break; | ||
1800 | |||
1801 | case 2: | ||
1802 | case 4: | ||
1803 | // for UTF-16/32 not only we need to have 2/4 consecutive NULs but | ||
1804 | // they also have to start at character boundary and not span two | ||
1805 | // adjacent characters | ||
1806 | const char *p; | ||
1807 | for ( p = psz; NotAllNULs(p, nulLen); p += nulLen ) | ||
1808 | ; | ||
1809 | inbuf = p - psz; | ||
1810 | break; | ||
1811 | } | ||
1812 | |||
1813 | #if wxUSE_THREADS | ||
1814 | // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle. | ||
1815 | // Unfortunately there are a couple of global wxCSConv objects such as | ||
1816 | // wxConvLocal that are used all over wx code, so we have to make sure | ||
1817 | // the handle is used by at most one thread at the time. Otherwise | ||
1818 | // only a few wx classes would be safe to use from non-main threads | ||
1819 | // as MB<->WC conversion would fail "randomly". | ||
1820 | wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); | ||
1821 | #endif // wxUSE_THREADS | ||
1822 | |||
1823 | size_t outbuf = n * SIZEOF_WCHAR_T; | ||
1824 | size_t res, cres; | ||
1825 | // VS: Use these instead of psz, buf because iconv() modifies its arguments: | ||
1826 | wchar_t *bufPtr = buf; | ||
1827 | const char *pszPtr = psz; | ||
1828 | |||
1829 | if (buf) | ||
1830 | { | ||
1831 | // have destination buffer, convert there | ||
1832 | cres = iconv(m2w, | ||
1833 | ICONV_CHAR_CAST(&pszPtr), &inbuf, | ||
1834 | (char**)&bufPtr, &outbuf); | ||
1835 | res = n - (outbuf / SIZEOF_WCHAR_T); | ||
1836 | |||
1837 | if (ms_wcNeedsSwap) | ||
1838 | { | ||
1839 | // convert to native endianness | ||
1840 | for ( unsigned i = 0; i < res; i++ ) | ||
1841 | buf[n] = WC_BSWAP(buf[i]); | ||
1842 | } | ||
1843 | |||
1844 | // NUL-terminate the string if there is any space left | ||
1845 | if (res < n) | ||
1846 | buf[res] = 0; | ||
1847 | } | ||
1848 | else | ||
1849 | { | ||
1850 | // no destination buffer... convert using temp buffer | ||
1851 | // to calculate destination buffer requirement | ||
1852 | wchar_t tbuf[8]; | ||
1853 | res = 0; | ||
1854 | |||
1855 | do | ||
1856 | { | ||
1857 | bufPtr = tbuf; | ||
1858 | outbuf = 8 * SIZEOF_WCHAR_T; | ||
1859 | |||
1860 | cres = iconv(m2w, | ||
1861 | ICONV_CHAR_CAST(&pszPtr), &inbuf, | ||
1862 | (char**)&bufPtr, &outbuf ); | ||
1863 | |||
1864 | res += 8 - (outbuf / SIZEOF_WCHAR_T); | ||
1865 | } | ||
1866 | while ((cres == (size_t)-1) && (errno == E2BIG)); | ||
1867 | } | ||
1868 | |||
1869 | if (ICONV_FAILED(cres, inbuf)) | ||
1870 | { | ||
1871 | //VS: it is ok if iconv fails, hence trace only | ||
1872 | wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); | ||
1873 | return wxCONV_FAILED; | ||
1874 | } | ||
1875 | |||
1876 | return res; | ||
1877 | } | ||
1878 | |||
1879 | size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const | ||
1880 | { | ||
1881 | #if wxUSE_THREADS | ||
1882 | // NB: explained in MB2WC | ||
1883 | wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex); | ||
1884 | #endif | ||
1885 | |||
1886 | size_t inlen = wxWcslen(psz); | ||
1887 | size_t inbuf = inlen * SIZEOF_WCHAR_T; | ||
1888 | size_t outbuf = n; | ||
1889 | size_t res, cres; | ||
1890 | |||
1891 | wchar_t *tmpbuf = 0; | ||
1892 | |||
1893 | if (ms_wcNeedsSwap) | ||
1894 | { | ||
1895 | // need to copy to temp buffer to switch endianness | ||
1896 | // (doing WC_BSWAP twice on the original buffer won't help, as it | ||
1897 | // could be in read-only memory, or be accessed in some other thread) | ||
1898 | tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T); | ||
1899 | for ( size_t i = 0; i < inlen; i++ ) | ||
1900 | tmpbuf[n] = WC_BSWAP(psz[i]); | ||
1901 | |||
1902 | tmpbuf[inlen] = L'\0'; | ||
1903 | psz = tmpbuf; | ||
1904 | } | ||
1905 | |||
1906 | if (buf) | ||
1907 | { | ||
1908 | // have destination buffer, convert there | ||
1909 | cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf ); | ||
1910 | |||
1911 | res = n - outbuf; | ||
1912 | |||
1913 | // NB: iconv was given only wcslen(psz) characters on input, and so | ||
1914 | // it couldn't convert the trailing zero. Let's do it ourselves | ||
1915 | // if there's some room left for it in the output buffer. | ||
1916 | if (res < n) | ||
1917 | buf[0] = 0; | ||
1918 | } | ||
1919 | else | ||
1920 | { | ||
1921 | // no destination buffer: convert using temp buffer | ||
1922 | // to calculate destination buffer requirement | ||
1923 | char tbuf[16]; | ||
1924 | res = 0; | ||
1925 | do | ||
1926 | { | ||
1927 | buf = tbuf; | ||
1928 | outbuf = 16; | ||
1929 | |||
1930 | cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf ); | ||
1931 | |||
1932 | res += 16 - outbuf; | ||
1933 | } | ||
1934 | while ((cres == (size_t)-1) && (errno == E2BIG)); | ||
1935 | } | ||
1936 | |||
1937 | if (ms_wcNeedsSwap) | ||
1938 | { | ||
1939 | free(tmpbuf); | ||
1940 | } | ||
1941 | |||
1942 | if (ICONV_FAILED(cres, inbuf)) | ||
1943 | { | ||
1944 | wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode())); | ||
1945 | return wxCONV_FAILED; | ||
1946 | } | ||
1947 | |||
1948 | return res; | ||
1949 | } | ||
1950 | |||
1951 | size_t wxMBConv_iconv::GetMBNulLen() const | ||
1952 | { | ||
1953 | if ( m_minMBCharWidth == 0 ) | ||
1954 | { | ||
1955 | wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv); | ||
1956 | |||
1957 | #if wxUSE_THREADS | ||
1958 | // NB: explained in MB2WC | ||
1959 | wxMutexLocker lock(self->m_iconvMutex); | ||
1960 | #endif | ||
1961 | |||
1962 | const wchar_t *wnul = L""; | ||
1963 | char buf[8]; // should be enough for NUL in any encoding | ||
1964 | size_t inLen = sizeof(wchar_t), | ||
1965 | outLen = WXSIZEOF(buf); | ||
1966 | char *inBuff = (char *)wnul; | ||
1967 | char *outBuff = buf; | ||
1968 | if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 ) | ||
1969 | { | ||
1970 | self->m_minMBCharWidth = (size_t)-1; | ||
1971 | } | ||
1972 | else // ok | ||
1973 | { | ||
1974 | self->m_minMBCharWidth = outBuff - buf; | ||
1975 | } | ||
1976 | } | ||
1977 | |||
1978 | return m_minMBCharWidth; | ||
1979 | } | ||
1980 | |||
1981 | #endif // HAVE_ICONV | ||
1982 | |||
1983 | |||
1984 | // ============================================================================ | ||
1985 | // Win32 conversion classes | ||
1986 | // ============================================================================ | ||
1987 | |||
1988 | #ifdef wxHAVE_WIN32_MB2WC | ||
1989 | |||
1990 | // from utils.cpp | ||
1991 | #if wxUSE_FONTMAP | ||
1992 | extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset); | ||
1993 | extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding); | ||
1994 | #endif | ||
1995 | |||
1996 | class wxMBConv_win32 : public wxMBConv | ||
1997 | { | ||
1998 | public: | ||
1999 | wxMBConv_win32() | ||
2000 | { | ||
2001 | m_CodePage = CP_ACP; | ||
2002 | m_minMBCharWidth = 0; | ||
2003 | } | ||
2004 | |||
2005 | wxMBConv_win32(const wxMBConv_win32& conv) | ||
2006 | : wxMBConv() | ||
2007 | { | ||
2008 | m_CodePage = conv.m_CodePage; | ||
2009 | m_minMBCharWidth = conv.m_minMBCharWidth; | ||
2010 | } | ||
2011 | |||
2012 | #if wxUSE_FONTMAP | ||
2013 | wxMBConv_win32(const wxChar* name) | ||
2014 | { | ||
2015 | m_CodePage = wxCharsetToCodepage(name); | ||
2016 | m_minMBCharWidth = 0; | ||
2017 | } | ||
2018 | |||
2019 | wxMBConv_win32(wxFontEncoding encoding) | ||
2020 | { | ||
2021 | m_CodePage = wxEncodingToCodepage(encoding); | ||
2022 | m_minMBCharWidth = 0; | ||
2023 | } | ||
2024 | #endif // wxUSE_FONTMAP | ||
2025 | |||
2026 | virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const | ||
2027 | { | ||
2028 | // note that we have to use MB_ERR_INVALID_CHARS flag as it without it | ||
2029 | // the behaviour is not compatible with the Unix version (using iconv) | ||
2030 | // and break the library itself, e.g. wxTextInputStream::NextChar() | ||
2031 | // wouldn't work if reading an incomplete MB char didn't result in an | ||
2032 | // error | ||
2033 | // | ||
2034 | // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or | ||
2035 | // Win XP or newer and it is not supported for UTF-[78] so we always | ||
2036 | // use our own conversions in this case. See | ||
2037 | // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx | ||
2038 | // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp | ||
2039 | if ( m_CodePage == CP_UTF8 ) | ||
2040 | { | ||
2041 | return wxConvUTF8.MB2WC(buf, psz, n); | ||
2042 | } | ||
2043 | |||
2044 | if ( m_CodePage == CP_UTF7 ) | ||
2045 | { | ||
2046 | return wxConvUTF7.MB2WC(buf, psz, n); | ||
2047 | } | ||
2048 | |||
2049 | int flags = 0; | ||
2050 | if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) && | ||
2051 | IsAtLeastWin2kSP4() ) | ||
2052 | { | ||
2053 | flags = MB_ERR_INVALID_CHARS; | ||
2054 | } | ||
2055 | |||
2056 | const size_t len = ::MultiByteToWideChar | ||
2057 | ( | ||
2058 | m_CodePage, // code page | ||
2059 | flags, // flags: fall on error | ||
2060 | psz, // input string | ||
2061 | -1, // its length (NUL-terminated) | ||
2062 | buf, // output string | ||
2063 | buf ? n : 0 // size of output buffer | ||
2064 | ); | ||
2065 | if ( !len ) | ||
2066 | { | ||
2067 | // function totally failed | ||
2068 | return wxCONV_FAILED; | ||
2069 | } | ||
2070 | |||
2071 | // if we were really converting and didn't use MB_ERR_INVALID_CHARS, | ||
2072 | // check if we succeeded, by doing a double trip: | ||
2073 | if ( !flags && buf ) | ||
2074 | { | ||
2075 | const size_t mbLen = strlen(psz); | ||
2076 | wxCharBuffer mbBuf(mbLen); | ||
2077 | if ( ::WideCharToMultiByte | ||
2078 | ( | ||
2079 | m_CodePage, | ||
2080 | 0, | ||
2081 | buf, | ||
2082 | -1, | ||
2083 | mbBuf.data(), | ||
2084 | mbLen + 1, // size in bytes, not length | ||
2085 | NULL, | ||
2086 | NULL | ||
2087 | ) == 0 || | ||
2088 | strcmp(mbBuf, psz) != 0 ) | ||
2089 | { | ||
2090 | // we didn't obtain the same thing we started from, hence | ||
2091 | // the conversion was lossy and we consider that it failed | ||
2092 | return wxCONV_FAILED; | ||
2093 | } | ||
2094 | } | ||
2095 | |||
2096 | // note that it returns count of written chars for buf != NULL and size | ||
2097 | // of the needed buffer for buf == NULL so in either case the length of | ||
2098 | // the string (which never includes the terminating NUL) is one less | ||
2099 | return len - 1; | ||
2100 | } | ||
2101 | |||
2102 | virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const | ||
2103 | { | ||
2104 | /* | ||
2105 | we have a problem here: by default, WideCharToMultiByte() may | ||
2106 | replace characters unrepresentable in the target code page with bad | ||
2107 | quality approximations such as turning "1/2" symbol (U+00BD) into | ||
2108 | "1" for the code pages which don't have it and we, obviously, want | ||
2109 | to avoid this at any price | ||
2110 | |||
2111 | the trouble is that this function does it _silently_, i.e. it won't | ||
2112 | even tell us whether it did or not... Win98/2000 and higher provide | ||
2113 | WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and | ||
2114 | we have to resort to a round trip, i.e. check that converting back | ||
2115 | results in the same string -- this is, of course, expensive but | ||
2116 | otherwise we simply can't be sure to not garble the data. | ||
2117 | */ | ||
2118 | |||
2119 | // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN | ||
2120 | // it doesn't work with CJK encodings (which we test for rather roughly | ||
2121 | // here...) nor with UTF-7/8 nor, of course, with Windows versions not | ||
2122 | // supporting it | ||
2123 | BOOL usedDef wxDUMMY_INITIALIZE(false); | ||
2124 | BOOL *pUsedDef; | ||
2125 | int flags; | ||
2126 | if ( CanUseNoBestFit() && m_CodePage < 50000 ) | ||
2127 | { | ||
2128 | // it's our lucky day | ||
2129 | flags = WC_NO_BEST_FIT_CHARS; | ||
2130 | pUsedDef = &usedDef; | ||
2131 | } | ||
2132 | else // old system or unsupported encoding | ||
2133 | { | ||
2134 | flags = 0; | ||
2135 | pUsedDef = NULL; | ||
2136 | } | ||
2137 | |||
2138 | const size_t len = ::WideCharToMultiByte | ||
2139 | ( | ||
2140 | m_CodePage, // code page | ||
2141 | flags, // either none or no best fit | ||
2142 | pwz, // input string | ||
2143 | -1, // it is (wide) NUL-terminated | ||
2144 | buf, // output buffer | ||
2145 | buf ? n : 0, // and its size | ||
2146 | NULL, // default "replacement" char | ||
2147 | pUsedDef // [out] was it used? | ||
2148 | ); | ||
2149 | |||
2150 | if ( !len ) | ||
2151 | { | ||
2152 | // function totally failed | ||
2153 | return wxCONV_FAILED; | ||
2154 | } | ||
2155 | |||
2156 | // if we were really converting, check if we succeeded | ||
2157 | if ( buf ) | ||
2158 | { | ||
2159 | if ( flags ) | ||
2160 | { | ||
2161 | // check if the conversion failed, i.e. if any replacements | ||
2162 | // were done | ||
2163 | if ( usedDef ) | ||
2164 | return wxCONV_FAILED; | ||
2165 | } | ||
2166 | else // we must resort to double tripping... | ||
2167 | { | ||
2168 | wxWCharBuffer wcBuf(n); | ||
2169 | if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED || | ||
2170 | wcscmp(wcBuf, pwz) != 0 ) | ||
2171 | { | ||
2172 | // we didn't obtain the same thing we started from, hence | ||
2173 | // the conversion was lossy and we consider that it failed | ||
2174 | return wxCONV_FAILED; | ||
2175 | } | ||
2176 | } | ||
2177 | } | ||
2178 | |||
2179 | // see the comment above for the reason of "len - 1" | ||
2180 | return len - 1; | ||
2181 | } | ||
2182 | |||
2183 | virtual size_t GetMBNulLen() const | ||
2184 | { | ||
2185 | if ( m_minMBCharWidth == 0 ) | ||
2186 | { | ||
2187 | int len = ::WideCharToMultiByte | ||
2188 | ( | ||
2189 | m_CodePage, // code page | ||
2190 | 0, // no flags | ||
2191 | L"", // input string | ||
2192 | 1, // translate just the NUL | ||
2193 | NULL, // output buffer | ||
2194 | 0, // and its size | ||
2195 | NULL, // no replacement char | ||
2196 | NULL // [out] don't care if it was used | ||
2197 | ); | ||
2198 | |||
2199 | wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32); | ||
2200 | switch ( len ) | ||
2201 | { | ||
2202 | default: | ||
2203 | wxLogDebug(_T("Unexpected NUL length %d"), len); | ||
2204 | self->m_minMBCharWidth = (size_t)-1; | ||
2205 | break; | ||
2206 | |||
2207 | case 0: | ||
2208 | self->m_minMBCharWidth = (size_t)-1; | ||
2209 | break; | ||
2210 | |||
2211 | case 1: | ||
2212 | case 2: | ||
2213 | case 4: | ||
2214 | self->m_minMBCharWidth = len; | ||
2215 | break; | ||
2216 | } | ||
2217 | } | ||
2218 | |||
2219 | return m_minMBCharWidth; | ||
2220 | } | ||
2221 | |||
2222 | virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); } | ||
2223 | |||
2224 | bool IsOk() const { return m_CodePage != -1; } | ||
2225 | |||
2226 | private: | ||
2227 | static bool CanUseNoBestFit() | ||
2228 | { | ||
2229 | static int s_isWin98Or2k = -1; | ||
2230 | |||
2231 | if ( s_isWin98Or2k == -1 ) | ||
2232 | { | ||
2233 | int verMaj, verMin; | ||
2234 | switch ( wxGetOsVersion(&verMaj, &verMin) ) | ||
2235 | { | ||
2236 | case wxOS_WINDOWS_9X: | ||
2237 | s_isWin98Or2k = verMaj >= 4 && verMin >= 10; | ||
2238 | break; | ||
2239 | |||
2240 | case wxOS_WINDOWS_NT: | ||
2241 | s_isWin98Or2k = verMaj >= 5; | ||
2242 | break; | ||
2243 | |||
2244 | default: | ||
2245 | // unknown: be conservative by default | ||
2246 | s_isWin98Or2k = 0; | ||
2247 | break; | ||
2248 | } | ||
2249 | |||
2250 | wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") ); | ||
2251 | } | ||
2252 | |||
2253 | return s_isWin98Or2k == 1; | ||
2254 | } | ||
2255 | |||
2256 | static bool IsAtLeastWin2kSP4() | ||
2257 | { | ||
2258 | #ifdef __WXWINCE__ | ||
2259 | return false; | ||
2260 | #else | ||
2261 | static int s_isAtLeastWin2kSP4 = -1; | ||
2262 | |||
2263 | if ( s_isAtLeastWin2kSP4 == -1 ) | ||
2264 | { | ||
2265 | OSVERSIONINFOEX ver; | ||
2266 | |||
2267 | memset(&ver, 0, sizeof(ver)); | ||
2268 | ver.dwOSVersionInfoSize = sizeof(ver); | ||
2269 | GetVersionEx((OSVERSIONINFO*)&ver); | ||
2270 | |||
2271 | s_isAtLeastWin2kSP4 = | ||
2272 | ((ver.dwMajorVersion > 5) || // Vista+ | ||
2273 | (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003 | ||
2274 | (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 && | ||
2275 | ver.wServicePackMajor >= 4)) // 2000 SP4+ | ||
2276 | ? 1 : 0; | ||
2277 | } | ||
2278 | |||
2279 | return s_isAtLeastWin2kSP4 == 1; | ||
2280 | #endif | ||
2281 | } | ||
2282 | |||
2283 | |||
2284 | // the code page we're working with | ||
2285 | long m_CodePage; | ||
2286 | |||
2287 | // cached result of GetMBNulLen(), set to 0 initially meaning | ||
2288 | // "unknown" | ||
2289 | size_t m_minMBCharWidth; | ||
2290 | }; | ||
2291 | |||
2292 | #endif // wxHAVE_WIN32_MB2WC | ||
2293 | |||
2294 | // ============================================================================ | ||
2295 | // Cocoa conversion classes | ||
2296 | // ============================================================================ | ||
2297 | |||
2298 | #if defined(__WXCOCOA__) | ||
2299 | |||
2300 | // RN: There is no UTF-32 support in either Core Foundation or Cocoa. | ||
2301 | // Strangely enough, internally Core Foundation uses | ||
2302 | // UTF-32 internally quite a bit - its just not public (yet). | ||
2303 | |||
2304 | #include <CoreFoundation/CFString.h> | ||
2305 | #include <CoreFoundation/CFStringEncodingExt.h> | ||
2306 | |||
2307 | CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding) | ||
2308 | { | ||
2309 | CFStringEncoding enc = kCFStringEncodingInvalidId ; | ||
2310 | |||
2311 | switch (encoding) | ||
2312 | { | ||
2313 | case wxFONTENCODING_DEFAULT : | ||
2314 | enc = CFStringGetSystemEncoding(); | ||
2315 | break ; | ||
2316 | |||
2317 | case wxFONTENCODING_ISO8859_1 : | ||
2318 | enc = kCFStringEncodingISOLatin1 ; | ||
2319 | break ; | ||
2320 | case wxFONTENCODING_ISO8859_2 : | ||
2321 | enc = kCFStringEncodingISOLatin2; | ||
2322 | break ; | ||
2323 | case wxFONTENCODING_ISO8859_3 : | ||
2324 | enc = kCFStringEncodingISOLatin3 ; | ||
2325 | break ; | ||
2326 | case wxFONTENCODING_ISO8859_4 : | ||
2327 | enc = kCFStringEncodingISOLatin4; | ||
2328 | break ; | ||
2329 | case wxFONTENCODING_ISO8859_5 : | ||
2330 | enc = kCFStringEncodingISOLatinCyrillic; | ||
2331 | break ; | ||
2332 | case wxFONTENCODING_ISO8859_6 : | ||
2333 | enc = kCFStringEncodingISOLatinArabic; | ||
2334 | break ; | ||
2335 | case wxFONTENCODING_ISO8859_7 : | ||
2336 | enc = kCFStringEncodingISOLatinGreek; | ||
2337 | break ; | ||
2338 | case wxFONTENCODING_ISO8859_8 : | ||
2339 | enc = kCFStringEncodingISOLatinHebrew; | ||
2340 | break ; | ||
2341 | case wxFONTENCODING_ISO8859_9 : | ||
2342 | enc = kCFStringEncodingISOLatin5; | ||
2343 | break ; | ||
2344 | case wxFONTENCODING_ISO8859_10 : | ||
2345 | enc = kCFStringEncodingISOLatin6; | ||
2346 | break ; | ||
2347 | case wxFONTENCODING_ISO8859_11 : | ||
2348 | enc = kCFStringEncodingISOLatinThai; | ||
2349 | break ; | ||
2350 | case wxFONTENCODING_ISO8859_13 : | ||
2351 | enc = kCFStringEncodingISOLatin7; | ||
2352 | break ; | ||
2353 | case wxFONTENCODING_ISO8859_14 : | ||
2354 | enc = kCFStringEncodingISOLatin8; | ||
2355 | break ; | ||
2356 | case wxFONTENCODING_ISO8859_15 : | ||
2357 | enc = kCFStringEncodingISOLatin9; | ||
2358 | break ; | ||
2359 | |||
2360 | case wxFONTENCODING_KOI8 : | ||
2361 | enc = kCFStringEncodingKOI8_R; | ||
2362 | break ; | ||
2363 | case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866 | ||
2364 | enc = kCFStringEncodingDOSRussian; | ||
2365 | break ; | ||
2366 | |||
2367 | // case wxFONTENCODING_BULGARIAN : | ||
2368 | // enc = ; | ||
2369 | // break ; | ||
2370 | |||
2371 | case wxFONTENCODING_CP437 : | ||
2372 | enc = kCFStringEncodingDOSLatinUS ; | ||
2373 | break ; | ||
2374 | case wxFONTENCODING_CP850 : | ||
2375 | enc = kCFStringEncodingDOSLatin1; | ||
2376 | break ; | ||
2377 | case wxFONTENCODING_CP852 : | ||
2378 | enc = kCFStringEncodingDOSLatin2; | ||
2379 | break ; | ||
2380 | case wxFONTENCODING_CP855 : | ||
2381 | enc = kCFStringEncodingDOSCyrillic; | ||
2382 | break ; | ||
2383 | case wxFONTENCODING_CP866 : | ||
2384 | enc = kCFStringEncodingDOSRussian ; | ||
2385 | break ; | ||
2386 | case wxFONTENCODING_CP874 : | ||
2387 | enc = kCFStringEncodingDOSThai; | ||
2388 | break ; | ||
2389 | case wxFONTENCODING_CP932 : | ||
2390 | enc = kCFStringEncodingDOSJapanese; | ||
2391 | break ; | ||
2392 | case wxFONTENCODING_CP936 : | ||
2393 | enc = kCFStringEncodingDOSChineseSimplif ; | ||
2394 | break ; | ||
2395 | case wxFONTENCODING_CP949 : | ||
2396 | enc = kCFStringEncodingDOSKorean; | ||
2397 | break ; | ||
2398 | case wxFONTENCODING_CP950 : | ||
2399 | enc = kCFStringEncodingDOSChineseTrad; | ||
2400 | break ; | ||
2401 | case wxFONTENCODING_CP1250 : | ||
2402 | enc = kCFStringEncodingWindowsLatin2; | ||
2403 | break ; | ||
2404 | case wxFONTENCODING_CP1251 : | ||
2405 | enc = kCFStringEncodingWindowsCyrillic ; | ||
2406 | break ; | ||
2407 | case wxFONTENCODING_CP1252 : | ||
2408 | enc = kCFStringEncodingWindowsLatin1 ; | ||
2409 | break ; | ||
2410 | case wxFONTENCODING_CP1253 : | ||
2411 | enc = kCFStringEncodingWindowsGreek; | ||
2412 | break ; | ||
2413 | case wxFONTENCODING_CP1254 : | ||
2414 | enc = kCFStringEncodingWindowsLatin5; | ||
2415 | break ; | ||
2416 | case wxFONTENCODING_CP1255 : | ||
2417 | enc = kCFStringEncodingWindowsHebrew ; | ||
2418 | break ; | ||
2419 | case wxFONTENCODING_CP1256 : | ||
2420 | enc = kCFStringEncodingWindowsArabic ; | ||
2421 | break ; | ||
2422 | case wxFONTENCODING_CP1257 : | ||
2423 | enc = kCFStringEncodingWindowsBalticRim; | ||
2424 | break ; | ||
2425 | // This only really encodes to UTF7 (if that) evidently | ||
2426 | // case wxFONTENCODING_UTF7 : | ||
2427 | // enc = kCFStringEncodingNonLossyASCII ; | ||
2428 | // break ; | ||
2429 | case wxFONTENCODING_UTF8 : | ||
2430 | enc = kCFStringEncodingUTF8 ; | ||
2431 | break ; | ||
2432 | case wxFONTENCODING_EUC_JP : | ||
2433 | enc = kCFStringEncodingEUC_JP; | ||
2434 | break ; | ||
2435 | case wxFONTENCODING_UTF16 : | ||
2436 | enc = kCFStringEncodingUnicode ; | ||
2437 | break ; | ||
2438 | case wxFONTENCODING_MACROMAN : | ||
2439 | enc = kCFStringEncodingMacRoman ; | ||
2440 | break ; | ||
2441 | case wxFONTENCODING_MACJAPANESE : | ||
2442 | enc = kCFStringEncodingMacJapanese ; | ||
2443 | break ; | ||
2444 | case wxFONTENCODING_MACCHINESETRAD : | ||
2445 | enc = kCFStringEncodingMacChineseTrad ; | ||
2446 | break ; | ||
2447 | case wxFONTENCODING_MACKOREAN : | ||
2448 | enc = kCFStringEncodingMacKorean ; | ||
2449 | break ; | ||
2450 | case wxFONTENCODING_MACARABIC : | ||
2451 | enc = kCFStringEncodingMacArabic ; | ||
2452 | break ; | ||
2453 | case wxFONTENCODING_MACHEBREW : | ||
2454 | enc = kCFStringEncodingMacHebrew ; | ||
2455 | break ; | ||
2456 | case wxFONTENCODING_MACGREEK : | ||
2457 | enc = kCFStringEncodingMacGreek ; | ||
2458 | break ; | ||
2459 | case wxFONTENCODING_MACCYRILLIC : | ||
2460 | enc = kCFStringEncodingMacCyrillic ; | ||
2461 | break ; | ||
2462 | case wxFONTENCODING_MACDEVANAGARI : | ||
2463 | enc = kCFStringEncodingMacDevanagari ; | ||
2464 | break ; | ||
2465 | case wxFONTENCODING_MACGURMUKHI : | ||
2466 | enc = kCFStringEncodingMacGurmukhi ; | ||
2467 | break ; | ||
2468 | case wxFONTENCODING_MACGUJARATI : | ||
2469 | enc = kCFStringEncodingMacGujarati ; | ||
2470 | break ; | ||
2471 | case wxFONTENCODING_MACORIYA : | ||
2472 | enc = kCFStringEncodingMacOriya ; | ||
2473 | break ; | ||
2474 | case wxFONTENCODING_MACBENGALI : | ||
2475 | enc = kCFStringEncodingMacBengali ; | ||
2476 | break ; | ||
2477 | case wxFONTENCODING_MACTAMIL : | ||
2478 | enc = kCFStringEncodingMacTamil ; | ||
2479 | break ; | ||
2480 | case wxFONTENCODING_MACTELUGU : | ||
2481 | enc = kCFStringEncodingMacTelugu ; | ||
2482 | break ; | ||
2483 | case wxFONTENCODING_MACKANNADA : | ||
2484 | enc = kCFStringEncodingMacKannada ; | ||
2485 | break ; | ||
2486 | case wxFONTENCODING_MACMALAJALAM : | ||
2487 | enc = kCFStringEncodingMacMalayalam ; | ||
2488 | break ; | ||
2489 | case wxFONTENCODING_MACSINHALESE : | ||
2490 | enc = kCFStringEncodingMacSinhalese ; | ||
2491 | break ; | ||
2492 | case wxFONTENCODING_MACBURMESE : | ||
2493 | enc = kCFStringEncodingMacBurmese ; | ||
2494 | break ; | ||
2495 | case wxFONTENCODING_MACKHMER : | ||
2496 | enc = kCFStringEncodingMacKhmer ; | ||
2497 | break ; | ||
2498 | case wxFONTENCODING_MACTHAI : | ||
2499 | enc = kCFStringEncodingMacThai ; | ||
2500 | break ; | ||
2501 | case wxFONTENCODING_MACLAOTIAN : | ||
2502 | enc = kCFStringEncodingMacLaotian ; | ||
2503 | break ; | ||
2504 | case wxFONTENCODING_MACGEORGIAN : | ||
2505 | enc = kCFStringEncodingMacGeorgian ; | ||
2506 | break ; | ||
2507 | case wxFONTENCODING_MACARMENIAN : | ||
2508 | enc = kCFStringEncodingMacArmenian ; | ||
2509 | break ; | ||
2510 | case wxFONTENCODING_MACCHINESESIMP : | ||
2511 | enc = kCFStringEncodingMacChineseSimp ; | ||
2512 | break ; | ||
2513 | case wxFONTENCODING_MACTIBETAN : | ||
2514 | enc = kCFStringEncodingMacTibetan ; | ||
2515 | break ; | ||
2516 | case wxFONTENCODING_MACMONGOLIAN : | ||
2517 | enc = kCFStringEncodingMacMongolian ; | ||
2518 | break ; | ||
2519 | case wxFONTENCODING_MACETHIOPIC : | ||
2520 | enc = kCFStringEncodingMacEthiopic ; | ||
2521 | break ; | ||
2522 | case wxFONTENCODING_MACCENTRALEUR : | ||
2523 | enc = kCFStringEncodingMacCentralEurRoman ; | ||
2524 | break ; | ||
2525 | case wxFONTENCODING_MACVIATNAMESE : | ||
2526 | enc = kCFStringEncodingMacVietnamese ; | ||
2527 | break ; | ||
2528 | case wxFONTENCODING_MACARABICEXT : | ||
2529 | enc = kCFStringEncodingMacExtArabic ; | ||
2530 | break ; | ||
2531 | case wxFONTENCODING_MACSYMBOL : | ||
2532 | enc = kCFStringEncodingMacSymbol ; | ||
2533 | break ; | ||
2534 | case wxFONTENCODING_MACDINGBATS : | ||
2535 | enc = kCFStringEncodingMacDingbats ; | ||
2536 | break ; | ||
2537 | case wxFONTENCODING_MACTURKISH : | ||
2538 | enc = kCFStringEncodingMacTurkish ; | ||
2539 | break ; | ||
2540 | case wxFONTENCODING_MACCROATIAN : | ||
2541 | enc = kCFStringEncodingMacCroatian ; | ||
2542 | break ; | ||
2543 | case wxFONTENCODING_MACICELANDIC : | ||
2544 | enc = kCFStringEncodingMacIcelandic ; | ||
2545 | break ; | ||
2546 | case wxFONTENCODING_MACROMANIAN : | ||
2547 | enc = kCFStringEncodingMacRomanian ; | ||
2548 | break ; | ||
2549 | case wxFONTENCODING_MACCELTIC : | ||
2550 | enc = kCFStringEncodingMacCeltic ; | ||
2551 | break ; | ||
2552 | case wxFONTENCODING_MACGAELIC : | ||
2553 | enc = kCFStringEncodingMacGaelic ; | ||
2554 | break ; | ||
2555 | // case wxFONTENCODING_MACKEYBOARD : | ||
2556 | // enc = kCFStringEncodingMacKeyboardGlyphs ; | ||
2557 | // break ; | ||
2558 | |||
2559 | default : | ||
2560 | // because gcc is picky | ||
2561 | break ; | ||
2562 | } | ||
2563 | |||
2564 | return enc ; | ||
2565 | } | ||
2566 | |||
2567 | class wxMBConv_cocoa : public wxMBConv | ||
2568 | { | ||
2569 | public: | ||
2570 | wxMBConv_cocoa() | ||
2571 | { | ||
2572 | Init(CFStringGetSystemEncoding()) ; | ||
2573 | } | ||
2574 | |||
2575 | wxMBConv_cocoa(const wxMBConv_cocoa& conv) | ||
2576 | { | ||
2577 | m_encoding = conv.m_encoding; | ||
2578 | } | ||
2579 | |||
2580 | #if wxUSE_FONTMAP | ||
2581 | wxMBConv_cocoa(const wxChar* name) | ||
2582 | { | ||
2583 | Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ; | ||
2584 | } | ||
2585 | #endif | ||
2586 | |||
2587 | wxMBConv_cocoa(wxFontEncoding encoding) | ||
2588 | { | ||
2589 | Init( wxCFStringEncFromFontEnc(encoding) ); | ||
2590 | } | ||
2591 | |||
2592 | virtual ~wxMBConv_cocoa() | ||
2593 | { | ||
2594 | } | ||
2595 | |||
2596 | void Init( CFStringEncoding encoding) | ||
2597 | { | ||
2598 | m_encoding = encoding ; | ||
2599 | } | ||
2600 | |||
2601 | size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const | ||
2602 | { | ||
2603 | wxASSERT(szUnConv); | ||
2604 | |||
2605 | CFStringRef theString = CFStringCreateWithBytes ( | ||
2606 | NULL, //the allocator | ||
2607 | (const UInt8*)szUnConv, | ||
2608 | strlen(szUnConv), | ||
2609 | m_encoding, | ||
2610 | false //no BOM/external representation | ||
2611 | ); | ||
2612 | |||
2613 | wxASSERT(theString); | ||
2614 | |||
2615 | size_t nOutLength = CFStringGetLength(theString); | ||
2616 | |||
2617 | if (szOut == NULL) | ||
2618 | { | ||
2619 | CFRelease(theString); | ||
2620 | return nOutLength; | ||
2621 | } | ||
2622 | |||
2623 | CFRange theRange = { 0, nOutSize }; | ||
2624 | |||
2625 | #if SIZEOF_WCHAR_T == 4 | ||
2626 | UniChar* szUniCharBuffer = new UniChar[nOutSize]; | ||
2627 | #endif | ||
2628 | |||
2629 | CFStringGetCharacters(theString, theRange, szUniCharBuffer); | ||
2630 | |||
2631 | CFRelease(theString); | ||
2632 | |||
2633 | szUniCharBuffer[nOutLength] = '\0'; | ||
2634 | |||
2635 | #if SIZEOF_WCHAR_T == 4 | ||
2636 | wxMBConvUTF16 converter; | ||
2637 | converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize ); | ||
2638 | delete [] szUniCharBuffer; | ||
2639 | #endif | ||
2640 | |||
2641 | return nOutLength; | ||
2642 | } | ||
2643 | |||
2644 | size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const | ||
2645 | { | ||
2646 | wxASSERT(szUnConv); | ||
2647 | |||
2648 | size_t nRealOutSize; | ||
2649 | size_t nBufSize = wxWcslen(szUnConv); | ||
2650 | UniChar* szUniBuffer = (UniChar*) szUnConv; | ||
2651 | |||
2652 | #if SIZEOF_WCHAR_T == 4 | ||
2653 | wxMBConvUTF16 converter ; | ||
2654 | nBufSize = converter.WC2MB( NULL, szUnConv, 0 ); | ||
2655 | szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1]; | ||
2656 | converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar)); | ||
2657 | nBufSize /= sizeof(UniChar); | ||
2658 | #endif | ||
2659 | |||
2660 | CFStringRef theString = CFStringCreateWithCharactersNoCopy( | ||
2661 | NULL, //allocator | ||
2662 | szUniBuffer, | ||
2663 | nBufSize, | ||
2664 | kCFAllocatorNull //deallocator - we want to deallocate it ourselves | ||
2665 | ); | ||
2666 | |||
2667 | wxASSERT(theString); | ||
2668 | |||
2669 | //Note that CER puts a BOM when converting to unicode | ||
2670 | //so we check and use getchars instead in that case | ||
2671 | if (m_encoding == kCFStringEncodingUnicode) | ||
2672 | { | ||
2673 | if (szOut != NULL) | ||
2674 | CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut); | ||
2675 | |||
2676 | nRealOutSize = CFStringGetLength(theString) + 1; | ||
2677 | } | ||
2678 | else | ||
2679 | { | ||
2680 | CFStringGetBytes( | ||
2681 | theString, | ||
2682 | CFRangeMake(0, CFStringGetLength(theString)), | ||
2683 | m_encoding, | ||
2684 | 0, //what to put in characters that can't be converted - | ||
2685 | //0 tells CFString to return NULL if it meets such a character | ||
2686 | false, //not an external representation | ||
2687 | (UInt8*) szOut, | ||
2688 | nOutSize, | ||
2689 | (CFIndex*) &nRealOutSize | ||
2690 | ); | ||
2691 | } | ||
2692 | |||
2693 | CFRelease(theString); | ||
2694 | |||
2695 | #if SIZEOF_WCHAR_T == 4 | ||
2696 | delete[] szUniBuffer; | ||
2697 | #endif | ||
2698 | |||
2699 | return nRealOutSize - 1; | ||
2700 | } | ||
2701 | |||
2702 | virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); } | ||
2703 | |||
2704 | bool IsOk() const | ||
2705 | { | ||
2706 | return m_encoding != kCFStringEncodingInvalidId && | ||
2707 | CFStringIsEncodingAvailable(m_encoding); | ||
2708 | } | ||
2709 | |||
2710 | private: | ||
2711 | CFStringEncoding m_encoding ; | ||
2712 | }; | ||
2713 | |||
2714 | #endif // defined(__WXCOCOA__) | ||
2715 | |||
2716 | // ============================================================================ | ||
2717 | // Mac conversion classes | ||
2718 | // ============================================================================ | ||
2719 | |||
2720 | #if defined(__WXMAC__) && defined(TARGET_CARBON) | ||
2721 | |||
2722 | class wxMBConv_mac : public wxMBConv | ||
2723 | { | ||
2724 | public: | ||
2725 | wxMBConv_mac() | ||
2726 | { | ||
2727 | Init(CFStringGetSystemEncoding()) ; | ||
2728 | } | ||
2729 | |||
2730 | wxMBConv_mac(const wxMBConv_mac& conv) | ||
2731 | { | ||
2732 | Init(conv.m_char_encoding); | ||
2733 | } | ||
2734 | |||
2735 | #if wxUSE_FONTMAP | ||
2736 | wxMBConv_mac(const wxChar* name) | ||
2737 | { | ||
2738 | wxFontEncoding enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false); | ||
2739 | Init( (enc != wxFONTENCODING_SYSTEM) ? wxMacGetSystemEncFromFontEnc( enc ) : kTextEncodingUnknown); | ||
2740 | } | ||
2741 | #endif | ||
2742 | |||
2743 | wxMBConv_mac(wxFontEncoding encoding) | ||
2744 | { | ||
2745 | Init( wxMacGetSystemEncFromFontEnc(encoding) ); | ||
2746 | } | ||
2747 | |||
2748 | virtual ~wxMBConv_mac() | ||
2749 | { | ||
2750 | OSStatus status = noErr ; | ||
2751 | if (m_MB2WC_converter) | ||
2752 | status = TECDisposeConverter(m_MB2WC_converter); | ||
2753 | if (m_WC2MB_converter) | ||
2754 | status = TECDisposeConverter(m_WC2MB_converter); | ||
2755 | } | ||
2756 | |||
2757 | void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant , | ||
2758 | TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat) | ||
2759 | { | ||
2760 | m_MB2WC_converter = NULL ; | ||
2761 | m_WC2MB_converter = NULL ; | ||
2762 | if ( encoding != kTextEncodingUnknown ) | ||
2763 | { | ||
2764 |