/[pcsx2_0.9.7]/trunk/3rdparty/wxWidgets/src/common/strconv.cpp
ViewVC logotype

Contents of /trunk/3rdparty/wxWidgets/src/common/strconv.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (show annotations) (download)
Tue Sep 7 03:24:11 2010 UTC (10 years, 2 months ago) by william
File size: 109064 byte(s)
committing r3113 initial commit again...
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: Ove Kaaven, Robert Roebling, Vadim Zeitlin, Vaclav Slavik,
5 // Ryan Norton, Fredrik Roubert (UTF7)
6 // Modified by:
7 // Created: 29/01/98
8 // RCS-ID: $Id: strconv.cpp 56394 2008-10-17 11:31:22Z VZ $
9 // Copyright: (c) 1999 Ove Kaaven, Robert Roebling, Vaclav Slavik
10 // (c) 2000-2003 Vadim Zeitlin
11 // (c) 2004 Ryan Norton, Fredrik Roubert
12 // Licence: wxWindows licence
13 /////////////////////////////////////////////////////////////////////////////
14
15 // For compilers that support precompilation, includes "wx.h".
16 #include "wx/wxprec.h"
17
18 #ifndef WX_PRECOMP
19 #ifdef __WXMSW__
20 #include "wx/msw/missing.h"
21 #endif
22 #include "wx/intl.h"
23 #include "wx/log.h"
24 #include "wx/utils.h"
25 #include "wx/hashmap.h"
26 #endif
27
28 #include "wx/strconv.h"
29
30 #if wxUSE_WCHAR_T
31
32 #ifdef __WINDOWS__
33 #include "wx/msw/private.h"
34 #endif
35
36 #ifndef __WXWINCE__
37 #include <errno.h>
38 #endif
39
40 #include <ctype.h>
41 #include <string.h>
42 #include <stdlib.h>
43
44 #if defined(__WIN32__) && !defined(__WXMICROWIN__)
45 #define wxHAVE_WIN32_MB2WC
46 #endif
47
48 #ifdef __SALFORDC__
49 #include <clib.h>
50 #endif
51
52 #ifdef HAVE_ICONV
53 #include <iconv.h>
54 #include "wx/thread.h"
55 #endif
56
57 #include "wx/encconv.h"
58 #include "wx/fontmap.h"
59
60 #ifdef __WXMAC__
61 #ifndef __DARWIN__
62 #include <ATSUnicode.h>
63 #include <TextCommon.h>
64 #include <TextEncodingConverter.h>
65 #endif
66
67 // includes Mac headers
68 #include "wx/mac/private.h"
69 #include "wx/thread.h"
70
71 #endif
72
73
74 #define TRACE_STRCONV _T("strconv")
75
76 // WC_UTF16 is defined only if sizeof(wchar_t) == 2, otherwise it's supposed to
77 // be 4 bytes
78 #if SIZEOF_WCHAR_T == 2
79 #define WC_UTF16
80 #endif
81
82
83 // ============================================================================
84 // implementation
85 // ============================================================================
86
87 // helper function of cMB2WC(): check if n bytes at this location are all NUL
88 static bool NotAllNULs(const char *p, size_t n)
89 {
90 while ( n && *p++ == '\0' )
91 n--;
92
93 return n != 0;
94 }
95
96 // ----------------------------------------------------------------------------
97 // UTF-16 en/decoding to/from UCS-4 with surrogates handling
98 // ----------------------------------------------------------------------------
99
100 static size_t encode_utf16(wxUint32 input, wxUint16 *output)
101 {
102 if (input <= 0xffff)
103 {
104 if (output)
105 *output = (wxUint16) input;
106
107 return 1;
108 }
109 else if (input >= 0x110000)
110 {
111 return wxCONV_FAILED;
112 }
113 else
114 {
115 if (output)
116 {
117 *output++ = (wxUint16) ((input >> 10) + 0xd7c0);
118 *output = (wxUint16) ((input & 0x3ff) + 0xdc00);
119 }
120
121 return 2;
122 }
123 }
124
125 static size_t decode_utf16(const wxUint16* input, wxUint32& output)
126 {
127 if ((*input < 0xd800) || (*input > 0xdfff))
128 {
129 output = *input;
130 return 1;
131 }
132 else if ((input[1] < 0xdc00) || (input[1] > 0xdfff))
133 {
134 output = *input;
135 return wxCONV_FAILED;
136 }
137 else
138 {
139 output = ((input[0] - 0xd7c0) << 10) + (input[1] - 0xdc00);
140 return 2;
141 }
142 }
143
144 #ifdef WC_UTF16
145 typedef wchar_t wxDecodeSurrogate_t;
146 #else // !WC_UTF16
147 typedef wxUint16 wxDecodeSurrogate_t;
148 #endif // WC_UTF16/!WC_UTF16
149
150 // returns the next UTF-32 character from the wchar_t buffer and advances the
151 // pointer to the character after this one
152 //
153 // if an invalid character is found, *pSrc is set to NULL, the caller must
154 // check for this
155 static wxUint32 wxDecodeSurrogate(const wxDecodeSurrogate_t **pSrc)
156 {
157 wxUint32 out;
158 const size_t
159 n = decode_utf16(wx_reinterpret_cast(const wxUint16 *, *pSrc), out);
160 if ( n == wxCONV_FAILED )
161 *pSrc = NULL;
162 else
163 *pSrc += n;
164
165 return out;
166 }
167
168 // ----------------------------------------------------------------------------
169 // wxMBConv
170 // ----------------------------------------------------------------------------
171
172 size_t
173 wxMBConv::ToWChar(wchar_t *dst, size_t dstLen,
174 const char *src, size_t srcLen) const
175 {
176 // although new conversion classes are supposed to implement this function
177 // directly, the existins ones only implement the old MB2WC() and so, to
178 // avoid to have to rewrite all conversion classes at once, we provide a
179 // default (but not efficient) implementation of this one in terms of the
180 // old function by copying the input to ensure that it's NUL-terminated and
181 // then using MB2WC() to convert it
182
183 // the number of chars [which would be] written to dst [if it were not NULL]
184 size_t dstWritten = 0;
185
186 // the number of NULs terminating this string
187 size_t nulLen = 0; // not really needed, but just to avoid warnings
188
189 // if we were not given the input size we just have to assume that the
190 // string is properly terminated as we have no way of knowing how long it
191 // is anyhow, but if we do have the size check whether there are enough
192 // NULs at the end
193 wxCharBuffer bufTmp;
194 const char *srcEnd;
195 if ( srcLen != wxNO_LEN )
196 {
197 // we need to know how to find the end of this string
198 nulLen = GetMBNulLen();
199 if ( nulLen == wxCONV_FAILED )
200 return wxCONV_FAILED;
201
202 // if there are enough NULs we can avoid the copy
203 if ( srcLen < nulLen || NotAllNULs(src + srcLen - nulLen, nulLen) )
204 {
205 // make a copy in order to properly NUL-terminate the string
206 bufTmp = wxCharBuffer(srcLen + nulLen - 1 /* 1 will be added */);
207 char * const p = bufTmp.data();
208 memcpy(p, src, srcLen);
209 for ( char *s = p + srcLen; s < p + srcLen + nulLen; s++ )
210 *s = '\0';
211
212 src = bufTmp;
213 }
214
215 srcEnd = src + srcLen;
216 }
217 else // quit after the first loop iteration
218 {
219 srcEnd = NULL;
220 }
221
222 for ( ;; )
223 {
224 // try to convert the current chunk
225 size_t lenChunk = MB2WC(NULL, src, 0);
226 if ( lenChunk == wxCONV_FAILED )
227 return wxCONV_FAILED;
228
229 lenChunk++; // for the L'\0' at the end of this chunk
230
231 dstWritten += lenChunk;
232
233 if ( lenChunk == 1 )
234 {
235 // nothing left in the input string, conversion succeeded
236 break;
237 }
238
239 if ( dst )
240 {
241 if ( dstWritten > dstLen )
242 return wxCONV_FAILED;
243
244 if ( MB2WC(dst, src, lenChunk) == wxCONV_FAILED )
245 return wxCONV_FAILED;
246
247 dst += lenChunk;
248 }
249
250 if ( !srcEnd )
251 {
252 // we convert just one chunk in this case as this is the entire
253 // string anyhow
254 break;
255 }
256
257 // advance the input pointer past the end of this chunk
258 while ( NotAllNULs(src, nulLen) )
259 {
260 // notice that we must skip over multiple bytes here as we suppose
261 // that if NUL takes 2 or 4 bytes, then all the other characters do
262 // too and so if advanced by a single byte we might erroneously
263 // detect sequences of NUL bytes in the middle of the input
264 src += nulLen;
265 }
266
267 src += nulLen; // skipping over its terminator as well
268
269 // note that ">=" (and not just "==") is needed here as the terminator
270 // we skipped just above could be inside or just after the buffer
271 // delimited by inEnd
272 if ( src >= srcEnd )
273 break;
274 }
275
276 return dstWritten;
277 }
278
279 size_t
280 wxMBConv::FromWChar(char *dst, size_t dstLen,
281 const wchar_t *src, size_t srcLen) const
282 {
283 // the number of chars [which would be] written to dst [if it were not NULL]
284 size_t dstWritten = 0;
285
286 // make a copy of the input string unless it is already properly
287 // NUL-terminated
288 //
289 // if we don't know its length we have no choice but to assume that it is,
290 // indeed, properly terminated
291 wxWCharBuffer bufTmp;
292 if ( srcLen == wxNO_LEN )
293 {
294 srcLen = wxWcslen(src) + 1;
295 }
296 else if ( srcLen != 0 && src[srcLen - 1] != L'\0' )
297 {
298 // make a copy in order to properly NUL-terminate the string
299 bufTmp = wxWCharBuffer(srcLen);
300 memcpy(bufTmp.data(), src, srcLen * sizeof(wchar_t));
301 src = bufTmp;
302 }
303
304 const size_t lenNul = GetMBNulLen();
305 for ( const wchar_t * const srcEnd = src + srcLen;
306 src < srcEnd;
307 src += wxWcslen(src) + 1 /* skip L'\0' too */ )
308 {
309 // try to convert the current chunk
310 size_t lenChunk = WC2MB(NULL, src, 0);
311
312 if ( lenChunk == wxCONV_FAILED )
313 return wxCONV_FAILED;
314
315 lenChunk += lenNul;
316 dstWritten += lenChunk;
317
318 if ( dst )
319 {
320 if ( dstWritten > dstLen )
321 return wxCONV_FAILED;
322
323 if ( WC2MB(dst, src, lenChunk) == wxCONV_FAILED )
324 return wxCONV_FAILED;
325
326 dst += lenChunk;
327 }
328 }
329
330 return dstWritten;
331 }
332
333 size_t wxMBConv::MB2WC(wchar_t *outBuff, const char *inBuff, size_t outLen) const
334 {
335 size_t rc = ToWChar(outBuff, outLen, inBuff);
336 if ( rc != wxCONV_FAILED )
337 {
338 // ToWChar() returns the buffer length, i.e. including the trailing
339 // NUL, while this method doesn't take it into account
340 rc--;
341 }
342
343 return rc;
344 }
345
346 size_t wxMBConv::WC2MB(char *outBuff, const wchar_t *inBuff, size_t outLen) const
347 {
348 size_t rc = FromWChar(outBuff, outLen, inBuff);
349 if ( rc != wxCONV_FAILED )
350 {
351 rc -= GetMBNulLen();
352 }
353
354 return rc;
355 }
356
357 wxMBConv::~wxMBConv()
358 {
359 // nothing to do here (necessary for Darwin linking probably)
360 }
361
362 const wxWCharBuffer wxMBConv::cMB2WC(const char *psz) const
363 {
364 if ( psz )
365 {
366 // calculate the length of the buffer needed first
367 const size_t nLen = MB2WC(NULL, psz, 0);
368 if ( nLen != wxCONV_FAILED )
369 {
370 // now do the actual conversion
371 wxWCharBuffer buf(nLen /* +1 added implicitly */);
372
373 // +1 for the trailing NULL
374 if ( MB2WC(buf.data(), psz, nLen + 1) != wxCONV_FAILED )
375 return buf;
376 }
377 }
378
379 return wxWCharBuffer();
380 }
381
382 const wxCharBuffer wxMBConv::cWC2MB(const wchar_t *pwz) const
383 {
384 if ( pwz )
385 {
386 const size_t nLen = WC2MB(NULL, pwz, 0);
387 if ( nLen != wxCONV_FAILED )
388 {
389 // extra space for trailing NUL(s)
390 static const size_t extraLen = GetMaxMBNulLen();
391
392 wxCharBuffer buf(nLen + extraLen - 1);
393 if ( WC2MB(buf.data(), pwz, nLen + extraLen) != wxCONV_FAILED )
394 return buf;
395 }
396 }
397
398 return wxCharBuffer();
399 }
400
401 const wxWCharBuffer
402 wxMBConv::cMB2WC(const char *inBuff, size_t inLen, size_t *outLen) const
403 {
404 const size_t dstLen = ToWChar(NULL, 0, inBuff, inLen);
405 if ( dstLen != wxCONV_FAILED )
406 {
407 wxWCharBuffer wbuf(dstLen - 1);
408 if ( ToWChar(wbuf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
409 {
410 if ( outLen )
411 {
412 *outLen = dstLen;
413 if ( wbuf[dstLen - 1] == L'\0' )
414 (*outLen)--;
415 }
416
417 return wbuf;
418 }
419 }
420
421 if ( outLen )
422 *outLen = 0;
423
424 return wxWCharBuffer();
425 }
426
427 const wxCharBuffer
428 wxMBConv::cWC2MB(const wchar_t *inBuff, size_t inLen, size_t *outLen) const
429 {
430 size_t dstLen = FromWChar(NULL, 0, inBuff, inLen);
431 if ( dstLen != wxCONV_FAILED )
432 {
433 // special case of empty input: can't allocate 0 size buffer below as
434 // wxCharBuffer insists on NUL-terminating it
435 wxCharBuffer buf(dstLen ? dstLen - 1 : 1);
436 if ( FromWChar(buf.data(), dstLen, inBuff, inLen) != wxCONV_FAILED )
437 {
438 if ( outLen )
439 {
440 *outLen = dstLen;
441
442 const size_t nulLen = GetMBNulLen();
443 if ( dstLen >= nulLen &&
444 !NotAllNULs(buf.data() + dstLen - nulLen, nulLen) )
445 {
446 // in this case the output is NUL-terminated and we're not
447 // supposed to count NUL
448 *outLen -= nulLen;
449 }
450 }
451
452 return buf;
453 }
454 }
455
456 if ( outLen )
457 *outLen = 0;
458
459 return wxCharBuffer();
460 }
461
462 // ----------------------------------------------------------------------------
463 // wxMBConvLibc
464 // ----------------------------------------------------------------------------
465
466 size_t wxMBConvLibc::MB2WC(wchar_t *buf, const char *psz, size_t n) const
467 {
468 return wxMB2WC(buf, psz, n);
469 }
470
471 size_t wxMBConvLibc::WC2MB(char *buf, const wchar_t *psz, size_t n) const
472 {
473 return wxWC2MB(buf, psz, n);
474 }
475
476 // ----------------------------------------------------------------------------
477 // wxConvBrokenFileNames
478 // ----------------------------------------------------------------------------
479
480 #ifdef __UNIX__
481
482 wxConvBrokenFileNames::wxConvBrokenFileNames(const wxChar *charset)
483 {
484 if ( !charset || wxStricmp(charset, _T("UTF-8")) == 0
485 || wxStricmp(charset, _T("UTF8")) == 0 )
486 m_conv = new wxMBConvUTF8(wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
487 else
488 m_conv = new wxCSConv(charset);
489 }
490
491 #endif // __UNIX__
492
493 // ----------------------------------------------------------------------------
494 // UTF-7
495 // ----------------------------------------------------------------------------
496
497 // Implementation (C) 2004 Fredrik Roubert
498
499 //
500 // BASE64 decoding table
501 //
502 static const unsigned char utf7unb64[] =
503 {
504 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
505 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
506 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
507 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
508 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
509 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
510 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
511 0x3c, 0x3d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
512 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
513 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
514 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
515 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
516 0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
517 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
518 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
519 0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
520 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
521 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
522 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
523 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
524 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
525 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
526 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
527 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
528 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
529 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
530 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
531 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
532 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
533 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
534 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
535 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
536 };
537
538 size_t wxMBConvUTF7::MB2WC(wchar_t *buf, const char *psz, size_t n) const
539 {
540 size_t len = 0;
541
542 while ( *psz && (!buf || (len < n)) )
543 {
544 unsigned char cc = *psz++;
545 if (cc != '+')
546 {
547 // plain ASCII char
548 if (buf)
549 *buf++ = cc;
550 len++;
551 }
552 else if (*psz == '-')
553 {
554 // encoded plus sign
555 if (buf)
556 *buf++ = cc;
557 len++;
558 psz++;
559 }
560 else // start of BASE64 encoded string
561 {
562 bool lsb, ok;
563 unsigned int d, l;
564 for ( ok = lsb = false, d = 0, l = 0;
565 (cc = utf7unb64[(unsigned char)*psz]) != 0xff;
566 psz++ )
567 {
568 d <<= 6;
569 d += cc;
570 for (l += 6; l >= 8; lsb = !lsb)
571 {
572 unsigned char c = (unsigned char)((d >> (l -= 8)) % 256);
573 if (lsb)
574 {
575 if (buf)
576 *buf++ |= c;
577 len ++;
578 }
579 else
580 {
581 if (buf)
582 *buf = (wchar_t)(c << 8);
583 }
584
585 ok = true;
586 }
587 }
588
589 if ( !ok )
590 {
591 // in valid UTF7 we should have valid characters after '+'
592 return wxCONV_FAILED;
593 }
594
595 if (*psz == '-')
596 psz++;
597 }
598 }
599
600 if ( buf && (len < n) )
601 *buf = '\0';
602
603 return len;
604 }
605
606 //
607 // BASE64 encoding table
608 //
609 static const unsigned char utf7enb64[] =
610 {
611 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
612 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
613 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
614 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
615 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
616 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
617 'w', 'x', 'y', 'z', '0', '1', '2', '3',
618 '4', '5', '6', '7', '8', '9', '+', '/'
619 };
620
621 //
622 // UTF-7 encoding table
623 //
624 // 0 - Set D (directly encoded characters)
625 // 1 - Set O (optional direct characters)
626 // 2 - whitespace characters (optional)
627 // 3 - special characters
628 //
629 static const unsigned char utf7encode[128] =
630 {
631 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3,
632 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
633 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 3,
634 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
635 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
636 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 1, 1,
637 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
638 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 3, 3
639 };
640
641 size_t wxMBConvUTF7::WC2MB(char *buf, const wchar_t *psz, size_t n) const
642 {
643 size_t len = 0;
644
645 while (*psz && ((!buf) || (len < n)))
646 {
647 wchar_t cc = *psz++;
648 if (cc < 0x80 && utf7encode[cc] < 1)
649 {
650 // plain ASCII char
651 if (buf)
652 *buf++ = (char)cc;
653
654 len++;
655 }
656 #ifndef WC_UTF16
657 else if (((wxUint32)cc) > 0xffff)
658 {
659 // no surrogate pair generation (yet?)
660 return wxCONV_FAILED;
661 }
662 #endif
663 else
664 {
665 if (buf)
666 *buf++ = '+';
667
668 len++;
669 if (cc != '+')
670 {
671 // BASE64 encode string
672 unsigned int lsb, d, l;
673 for (d = 0, l = 0; /*nothing*/; psz++)
674 {
675 for (lsb = 0; lsb < 2; lsb ++)
676 {
677 d <<= 8;
678 d += lsb ? cc & 0xff : (cc & 0xff00) >> 8;
679
680 for (l += 8; l >= 6; )
681 {
682 l -= 6;
683 if (buf)
684 *buf++ = utf7enb64[(d >> l) % 64];
685 len++;
686 }
687 }
688
689 cc = *psz;
690 if (!(cc) || (cc < 0x80 && utf7encode[cc] < 1))
691 break;
692 }
693
694 if (l != 0)
695 {
696 if (buf)
697 *buf++ = utf7enb64[((d % 16) << (6 - l)) % 64];
698
699 len++;
700 }
701 }
702
703 if (buf)
704 *buf++ = '-';
705 len++;
706 }
707 }
708
709 if (buf && (len < n))
710 *buf = 0;
711
712 return len;
713 }
714
715 // ----------------------------------------------------------------------------
716 // UTF-8
717 // ----------------------------------------------------------------------------
718
719 static wxUint32 utf8_max[]=
720 { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff, 0xffffffff };
721
722 // boundaries of the private use area we use to (temporarily) remap invalid
723 // characters invalid in a UTF-8 encoded string
724 const wxUint32 wxUnicodePUA = 0x100000;
725 const wxUint32 wxUnicodePUAEnd = wxUnicodePUA + 256;
726
727 size_t wxMBConvUTF8::MB2WC(wchar_t *buf, const char *psz, size_t n) const
728 {
729 size_t len = 0;
730
731 while (*psz && ((!buf) || (len < n)))
732 {
733 const char *opsz = psz;
734 bool invalid = false;
735 unsigned char cc = *psz++, fc = cc;
736 unsigned cnt;
737 for (cnt = 0; fc & 0x80; cnt++)
738 fc <<= 1;
739
740 if (!cnt)
741 {
742 // plain ASCII char
743 if (buf)
744 *buf++ = cc;
745 len++;
746
747 // escape the escape character for octal escapes
748 if ((m_options & MAP_INVALID_UTF8_TO_OCTAL)
749 && cc == '\\' && (!buf || len < n))
750 {
751 if (buf)
752 *buf++ = cc;
753 len++;
754 }
755 }
756 else
757 {
758 cnt--;
759 if (!cnt)
760 {
761 // invalid UTF-8 sequence
762 invalid = true;
763 }
764 else
765 {
766 unsigned ocnt = cnt - 1;
767 wxUint32 res = cc & (0x3f >> cnt);
768 while (cnt--)
769 {
770 cc = *psz;
771 if ((cc & 0xC0) != 0x80)
772 {
773 // invalid UTF-8 sequence
774 invalid = true;
775 break;
776 }
777
778 psz++;
779 res = (res << 6) | (cc & 0x3f);
780 }
781
782 if (invalid || res <= utf8_max[ocnt])
783 {
784 // illegal UTF-8 encoding
785 invalid = true;
786 }
787 else if ((m_options & MAP_INVALID_UTF8_TO_PUA) &&
788 res >= wxUnicodePUA && res < wxUnicodePUAEnd)
789 {
790 // if one of our PUA characters turns up externally
791 // it must also be treated as an illegal sequence
792 // (a bit like you have to escape an escape character)
793 invalid = true;
794 }
795 else
796 {
797 #ifdef WC_UTF16
798 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
799 size_t pa = encode_utf16(res, (wxUint16 *)buf);
800 if (pa == wxCONV_FAILED)
801 {
802 invalid = true;
803 }
804 else
805 {
806 if (buf)
807 buf += pa;
808 len += pa;
809 }
810 #else // !WC_UTF16
811 if (buf)
812 *buf++ = (wchar_t)res;
813 len++;
814 #endif // WC_UTF16/!WC_UTF16
815 }
816 }
817
818 if (invalid)
819 {
820 if (m_options & MAP_INVALID_UTF8_TO_PUA)
821 {
822 while (opsz < psz && (!buf || len < n))
823 {
824 #ifdef WC_UTF16
825 // cast is ok because wchar_t == wxUuint16 if WC_UTF16
826 size_t pa = encode_utf16((unsigned char)*opsz + wxUnicodePUA, (wxUint16 *)buf);
827 wxASSERT(pa != wxCONV_FAILED);
828 if (buf)
829 buf += pa;
830 opsz++;
831 len += pa;
832 #else
833 if (buf)
834 *buf++ = (wchar_t)(wxUnicodePUA + (unsigned char)*opsz);
835 opsz++;
836 len++;
837 #endif
838 }
839 }
840 else if (m_options & MAP_INVALID_UTF8_TO_OCTAL)
841 {
842 while (opsz < psz && (!buf || len < n))
843 {
844 if ( buf && len + 3 < n )
845 {
846 unsigned char on = *opsz;
847 *buf++ = L'\\';
848 *buf++ = (wchar_t)( L'0' + on / 0100 );
849 *buf++ = (wchar_t)( L'0' + (on % 0100) / 010 );
850 *buf++ = (wchar_t)( L'0' + on % 010 );
851 }
852
853 opsz++;
854 len += 4;
855 }
856 }
857 else // MAP_INVALID_UTF8_NOT
858 {
859 return wxCONV_FAILED;
860 }
861 }
862 }
863 }
864
865 if (buf && (len < n))
866 *buf = 0;
867
868 return len;
869 }
870
871 static inline bool isoctal(wchar_t wch)
872 {
873 return L'0' <= wch && wch <= L'7';
874 }
875
876 size_t wxMBConvUTF8::WC2MB(char *buf, const wchar_t *psz, size_t n) const
877 {
878 size_t len = 0;
879
880 while (*psz && ((!buf) || (len < n)))
881 {
882 wxUint32 cc;
883
884 #ifdef WC_UTF16
885 // cast is ok for WC_UTF16
886 size_t pa = decode_utf16((const wxUint16 *)psz, cc);
887 psz += (pa == wxCONV_FAILED) ? 1 : pa;
888 #else
889 cc = (*psz++) & 0x7fffffff;
890 #endif
891
892 if ( (m_options & MAP_INVALID_UTF8_TO_PUA)
893 && cc >= wxUnicodePUA && cc < wxUnicodePUAEnd )
894 {
895 if (buf)
896 *buf++ = (char)(cc - wxUnicodePUA);
897 len++;
898 }
899 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL)
900 && cc == L'\\' && psz[0] == L'\\' )
901 {
902 if (buf)
903 *buf++ = (char)cc;
904 psz++;
905 len++;
906 }
907 else if ( (m_options & MAP_INVALID_UTF8_TO_OCTAL) &&
908 cc == L'\\' &&
909 isoctal(psz[0]) && isoctal(psz[1]) && isoctal(psz[2]) )
910 {
911 if (buf)
912 {
913 *buf++ = (char) ((psz[0] - L'0') * 0100 +
914 (psz[1] - L'0') * 010 +
915 (psz[2] - L'0'));
916 }
917
918 psz += 3;
919 len++;
920 }
921 else
922 {
923 unsigned cnt;
924 for (cnt = 0; cc > utf8_max[cnt]; cnt++)
925 {
926 }
927
928 if (!cnt)
929 {
930 // plain ASCII char
931 if (buf)
932 *buf++ = (char) cc;
933 len++;
934 }
935 else
936 {
937 len += cnt + 1;
938 if (buf)
939 {
940 *buf++ = (char) ((-128 >> cnt) | ((cc >> (cnt * 6)) & (0x3f >> cnt)));
941 while (cnt--)
942 *buf++ = (char) (0x80 | ((cc >> (cnt * 6)) & 0x3f));
943 }
944 }
945 }
946 }
947
948 if (buf && (len < n))
949 *buf = 0;
950
951 return len;
952 }
953
954 // ============================================================================
955 // UTF-16
956 // ============================================================================
957
958 #ifdef WORDS_BIGENDIAN
959 #define wxMBConvUTF16straight wxMBConvUTF16BE
960 #define wxMBConvUTF16swap wxMBConvUTF16LE
961 #else
962 #define wxMBConvUTF16swap wxMBConvUTF16BE
963 #define wxMBConvUTF16straight wxMBConvUTF16LE
964 #endif
965
966 /* static */
967 size_t wxMBConvUTF16Base::GetLength(const char *src, size_t srcLen)
968 {
969 if ( srcLen == wxNO_LEN )
970 {
971 // count the number of bytes in input, including the trailing NULs
972 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
973 for ( srcLen = 1; *inBuff++; srcLen++ )
974 ;
975
976 srcLen *= BYTES_PER_CHAR;
977 }
978 else // we already have the length
979 {
980 // we can only convert an entire number of UTF-16 characters
981 if ( srcLen % BYTES_PER_CHAR )
982 return wxCONV_FAILED;
983 }
984
985 return srcLen;
986 }
987
988 // case when in-memory representation is UTF-16 too
989 #ifdef WC_UTF16
990
991 // ----------------------------------------------------------------------------
992 // conversions without endianness change
993 // ----------------------------------------------------------------------------
994
995 size_t
996 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
997 const char *src, size_t srcLen) const
998 {
999 // set up the scene for using memcpy() (which is presumably more efficient
1000 // than copying the bytes one by one)
1001 srcLen = GetLength(src, srcLen);
1002 if ( srcLen == wxNO_LEN )
1003 return wxCONV_FAILED;
1004
1005 const size_t inLen = srcLen / BYTES_PER_CHAR;
1006 if ( dst )
1007 {
1008 if ( dstLen < inLen )
1009 return wxCONV_FAILED;
1010
1011 memcpy(dst, src, srcLen);
1012 }
1013
1014 return inLen;
1015 }
1016
1017 size_t
1018 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1019 const wchar_t *src, size_t srcLen) const
1020 {
1021 if ( srcLen == wxNO_LEN )
1022 srcLen = wxWcslen(src) + 1;
1023
1024 srcLen *= BYTES_PER_CHAR;
1025
1026 if ( dst )
1027 {
1028 if ( dstLen < srcLen )
1029 return wxCONV_FAILED;
1030
1031 memcpy(dst, src, srcLen);
1032 }
1033
1034 return srcLen;
1035 }
1036
1037 // ----------------------------------------------------------------------------
1038 // endian-reversing conversions
1039 // ----------------------------------------------------------------------------
1040
1041 size_t
1042 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1043 const char *src, size_t srcLen) const
1044 {
1045 srcLen = GetLength(src, srcLen);
1046 if ( srcLen == wxNO_LEN )
1047 return wxCONV_FAILED;
1048
1049 srcLen /= BYTES_PER_CHAR;
1050
1051 if ( dst )
1052 {
1053 if ( dstLen < srcLen )
1054 return wxCONV_FAILED;
1055
1056 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1057 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1058 {
1059 *dst++ = wxUINT16_SWAP_ALWAYS(*inBuff);
1060 }
1061 }
1062
1063 return srcLen;
1064 }
1065
1066 size_t
1067 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1068 const wchar_t *src, size_t srcLen) const
1069 {
1070 if ( srcLen == wxNO_LEN )
1071 srcLen = wxWcslen(src) + 1;
1072
1073 srcLen *= BYTES_PER_CHAR;
1074
1075 if ( dst )
1076 {
1077 if ( dstLen < srcLen )
1078 return wxCONV_FAILED;
1079
1080 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1081 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1082 {
1083 *outBuff++ = wxUINT16_SWAP_ALWAYS(*src);
1084 }
1085 }
1086
1087 return srcLen;
1088 }
1089
1090 #else // !WC_UTF16: wchar_t is UTF-32
1091
1092 // ----------------------------------------------------------------------------
1093 // conversions without endianness change
1094 // ----------------------------------------------------------------------------
1095
1096 size_t
1097 wxMBConvUTF16straight::ToWChar(wchar_t *dst, size_t dstLen,
1098 const char *src, size_t srcLen) const
1099 {
1100 srcLen = GetLength(src, srcLen);
1101 if ( srcLen == wxNO_LEN )
1102 return wxCONV_FAILED;
1103
1104 const size_t inLen = srcLen / BYTES_PER_CHAR;
1105 if ( !dst )
1106 {
1107 // optimization: return maximal space which could be needed for this
1108 // string even if the real size could be smaller if the buffer contains
1109 // any surrogates
1110 return inLen;
1111 }
1112
1113 size_t outLen = 0;
1114 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1115 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1116 {
1117 const wxUint32 ch = wxDecodeSurrogate(&inBuff);
1118 if ( !inBuff )
1119 return wxCONV_FAILED;
1120
1121 if ( ++outLen > dstLen )
1122 return wxCONV_FAILED;
1123
1124 *dst++ = ch;
1125 }
1126
1127
1128 return outLen;
1129 }
1130
1131 size_t
1132 wxMBConvUTF16straight::FromWChar(char *dst, size_t dstLen,
1133 const wchar_t *src, size_t srcLen) const
1134 {
1135 if ( srcLen == wxNO_LEN )
1136 srcLen = wxWcslen(src) + 1;
1137
1138 size_t outLen = 0;
1139 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1140 for ( size_t n = 0; n < srcLen; n++ )
1141 {
1142 wxUint16 cc[2];
1143 const size_t numChars = encode_utf16(*src++, cc);
1144 if ( numChars == wxCONV_FAILED )
1145 return wxCONV_FAILED;
1146
1147 outLen += numChars * BYTES_PER_CHAR;
1148 if ( outBuff )
1149 {
1150 if ( outLen > dstLen )
1151 return wxCONV_FAILED;
1152
1153 *outBuff++ = cc[0];
1154 if ( numChars == 2 )
1155 {
1156 // second character of a surrogate
1157 *outBuff++ = cc[1];
1158 }
1159 }
1160 }
1161
1162 return outLen;
1163 }
1164
1165 // ----------------------------------------------------------------------------
1166 // endian-reversing conversions
1167 // ----------------------------------------------------------------------------
1168
1169 size_t
1170 wxMBConvUTF16swap::ToWChar(wchar_t *dst, size_t dstLen,
1171 const char *src, size_t srcLen) const
1172 {
1173 srcLen = GetLength(src, srcLen);
1174 if ( srcLen == wxNO_LEN )
1175 return wxCONV_FAILED;
1176
1177 const size_t inLen = srcLen / BYTES_PER_CHAR;
1178 if ( !dst )
1179 {
1180 // optimization: return maximal space which could be needed for this
1181 // string even if the real size could be smaller if the buffer contains
1182 // any surrogates
1183 return inLen;
1184 }
1185
1186 size_t outLen = 0;
1187 const wxUint16 *inBuff = wx_reinterpret_cast(const wxUint16 *, src);
1188 for ( const wxUint16 * const inEnd = inBuff + inLen; inBuff < inEnd; )
1189 {
1190 wxUint32 ch;
1191 wxUint16 tmp[2];
1192
1193 tmp[0] = wxUINT16_SWAP_ALWAYS(*inBuff);
1194 inBuff++;
1195 tmp[1] = wxUINT16_SWAP_ALWAYS(*inBuff);
1196
1197 const size_t numChars = decode_utf16(tmp, ch);
1198 if ( numChars == wxCONV_FAILED )
1199 return wxCONV_FAILED;
1200
1201 if ( numChars == 2 )
1202 inBuff++;
1203
1204 if ( ++outLen > dstLen )
1205 return wxCONV_FAILED;
1206
1207 *dst++ = ch;
1208 }
1209
1210
1211 return outLen;
1212 }
1213
1214 size_t
1215 wxMBConvUTF16swap::FromWChar(char *dst, size_t dstLen,
1216 const wchar_t *src, size_t srcLen) const
1217 {
1218 if ( srcLen == wxNO_LEN )
1219 srcLen = wxWcslen(src) + 1;
1220
1221 size_t outLen = 0;
1222 wxUint16 *outBuff = wx_reinterpret_cast(wxUint16 *, dst);
1223 for ( const wchar_t *srcEnd = src + srcLen; src < srcEnd; src++ )
1224 {
1225 wxUint16 cc[2];
1226 const size_t numChars = encode_utf16(*src, cc);
1227 if ( numChars == wxCONV_FAILED )
1228 return wxCONV_FAILED;
1229
1230 outLen += numChars * BYTES_PER_CHAR;
1231 if ( outBuff )
1232 {
1233 if ( outLen > dstLen )
1234 return wxCONV_FAILED;
1235
1236 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[0]);
1237 if ( numChars == 2 )
1238 {
1239 // second character of a surrogate
1240 *outBuff++ = wxUINT16_SWAP_ALWAYS(cc[1]);
1241 }
1242 }
1243 }
1244
1245 return outLen;
1246 }
1247
1248 #endif // WC_UTF16/!WC_UTF16
1249
1250
1251 // ============================================================================
1252 // UTF-32
1253 // ============================================================================
1254
1255 #ifdef WORDS_BIGENDIAN
1256 #define wxMBConvUTF32straight wxMBConvUTF32BE
1257 #define wxMBConvUTF32swap wxMBConvUTF32LE
1258 #else
1259 #define wxMBConvUTF32swap wxMBConvUTF32BE
1260 #define wxMBConvUTF32straight wxMBConvUTF32LE
1261 #endif
1262
1263
1264 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32LE) wxConvUTF32LE;
1265 WXDLLIMPEXP_DATA_BASE(wxMBConvUTF32BE) wxConvUTF32BE;
1266
1267 /* static */
1268 size_t wxMBConvUTF32Base::GetLength(const char *src, size_t srcLen)
1269 {
1270 if ( srcLen == wxNO_LEN )
1271 {
1272 // count the number of bytes in input, including the trailing NULs
1273 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1274 for ( srcLen = 1; *inBuff++; srcLen++ )
1275 ;
1276
1277 srcLen *= BYTES_PER_CHAR;
1278 }
1279 else // we already have the length
1280 {
1281 // we can only convert an entire number of UTF-32 characters
1282 if ( srcLen % BYTES_PER_CHAR )
1283 return wxCONV_FAILED;
1284 }
1285
1286 return srcLen;
1287 }
1288
1289 // case when in-memory representation is UTF-16
1290 #ifdef WC_UTF16
1291
1292 // ----------------------------------------------------------------------------
1293 // conversions without endianness change
1294 // ----------------------------------------------------------------------------
1295
1296 size_t
1297 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1298 const char *src, size_t srcLen) const
1299 {
1300 srcLen = GetLength(src, srcLen);
1301 if ( srcLen == wxNO_LEN )
1302 return wxCONV_FAILED;
1303
1304 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1305 const size_t inLen = srcLen / BYTES_PER_CHAR;
1306 size_t outLen = 0;
1307 for ( size_t n = 0; n < inLen; n++ )
1308 {
1309 wxUint16 cc[2];
1310 const size_t numChars = encode_utf16(*inBuff++, cc);
1311 if ( numChars == wxCONV_FAILED )
1312 return wxCONV_FAILED;
1313
1314 outLen += numChars;
1315 if ( dst )
1316 {
1317 if ( outLen > dstLen )
1318 return wxCONV_FAILED;
1319
1320 *dst++ = cc[0];
1321 if ( numChars == 2 )
1322 {
1323 // second character of a surrogate
1324 *dst++ = cc[1];
1325 }
1326 }
1327 }
1328
1329 return outLen;
1330 }
1331
1332 size_t
1333 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1334 const wchar_t *src, size_t srcLen) const
1335 {
1336 if ( srcLen == wxNO_LEN )
1337 srcLen = wxWcslen(src) + 1;
1338
1339 if ( !dst )
1340 {
1341 // optimization: return maximal space which could be needed for this
1342 // string instead of the exact amount which could be less if there are
1343 // any surrogates in the input
1344 //
1345 // we consider that surrogates are rare enough to make it worthwhile to
1346 // avoid running the loop below at the cost of slightly extra memory
1347 // consumption
1348 return srcLen * BYTES_PER_CHAR;
1349 }
1350
1351 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1352 size_t outLen = 0;
1353 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1354 {
1355 const wxUint32 ch = wxDecodeSurrogate(&src);
1356 if ( !src )
1357 return wxCONV_FAILED;
1358
1359 outLen += BYTES_PER_CHAR;
1360
1361 if ( outLen > dstLen )
1362 return wxCONV_FAILED;
1363
1364 *outBuff++ = ch;
1365 }
1366
1367 return outLen;
1368 }
1369
1370 // ----------------------------------------------------------------------------
1371 // endian-reversing conversions
1372 // ----------------------------------------------------------------------------
1373
1374 size_t
1375 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1376 const char *src, size_t srcLen) const
1377 {
1378 srcLen = GetLength(src, srcLen);
1379 if ( srcLen == wxNO_LEN )
1380 return wxCONV_FAILED;
1381
1382 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1383 const size_t inLen = srcLen / BYTES_PER_CHAR;
1384 size_t outLen = 0;
1385 for ( size_t n = 0; n < inLen; n++, inBuff++ )
1386 {
1387 wxUint16 cc[2];
1388 const size_t numChars = encode_utf16(wxUINT32_SWAP_ALWAYS(*inBuff), cc);
1389 if ( numChars == wxCONV_FAILED )
1390 return wxCONV_FAILED;
1391
1392 outLen += numChars;
1393 if ( dst )
1394 {
1395 if ( outLen > dstLen )
1396 return wxCONV_FAILED;
1397
1398 *dst++ = cc[0];
1399 if ( numChars == 2 )
1400 {
1401 // second character of a surrogate
1402 *dst++ = cc[1];
1403 }
1404 }
1405 }
1406
1407 return outLen;
1408 }
1409
1410 size_t
1411 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1412 const wchar_t *src, size_t srcLen) const
1413 {
1414 if ( srcLen == wxNO_LEN )
1415 srcLen = wxWcslen(src) + 1;
1416
1417 if ( !dst )
1418 {
1419 // optimization: return maximal space which could be needed for this
1420 // string instead of the exact amount which could be less if there are
1421 // any surrogates in the input
1422 //
1423 // we consider that surrogates are rare enough to make it worthwhile to
1424 // avoid running the loop below at the cost of slightly extra memory
1425 // consumption
1426 return srcLen*BYTES_PER_CHAR;
1427 }
1428
1429 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1430 size_t outLen = 0;
1431 for ( const wchar_t * const srcEnd = src + srcLen; src < srcEnd; )
1432 {
1433 const wxUint32 ch = wxDecodeSurrogate(&src);
1434 if ( !src )
1435 return wxCONV_FAILED;
1436
1437 outLen += BYTES_PER_CHAR;
1438
1439 if ( outLen > dstLen )
1440 return wxCONV_FAILED;
1441
1442 *outBuff++ = wxUINT32_SWAP_ALWAYS(ch);
1443 }
1444
1445 return outLen;
1446 }
1447
1448 #else // !WC_UTF16: wchar_t is UTF-32
1449
1450 // ----------------------------------------------------------------------------
1451 // conversions without endianness change
1452 // ----------------------------------------------------------------------------
1453
1454 size_t
1455 wxMBConvUTF32straight::ToWChar(wchar_t *dst, size_t dstLen,
1456 const char *src, size_t srcLen) const
1457 {
1458 // use memcpy() as it should be much faster than hand-written loop
1459 srcLen = GetLength(src, srcLen);
1460 if ( srcLen == wxNO_LEN )
1461 return wxCONV_FAILED;
1462
1463 const size_t inLen = srcLen/BYTES_PER_CHAR;
1464 if ( dst )
1465 {
1466 if ( dstLen < inLen )
1467 return wxCONV_FAILED;
1468
1469 memcpy(dst, src, srcLen);
1470 }
1471
1472 return inLen;
1473 }
1474
1475 size_t
1476 wxMBConvUTF32straight::FromWChar(char *dst, size_t dstLen,
1477 const wchar_t *src, size_t srcLen) const
1478 {
1479 if ( srcLen == wxNO_LEN )
1480 srcLen = wxWcslen(src) + 1;
1481
1482 srcLen *= BYTES_PER_CHAR;
1483
1484 if ( dst )
1485 {
1486 if ( dstLen < srcLen )
1487 return wxCONV_FAILED;
1488
1489 memcpy(dst, src, srcLen);
1490 }
1491
1492 return srcLen;
1493 }
1494
1495 // ----------------------------------------------------------------------------
1496 // endian-reversing conversions
1497 // ----------------------------------------------------------------------------
1498
1499 size_t
1500 wxMBConvUTF32swap::ToWChar(wchar_t *dst, size_t dstLen,
1501 const char *src, size_t srcLen) const
1502 {
1503 srcLen = GetLength(src, srcLen);
1504 if ( srcLen == wxNO_LEN )
1505 return wxCONV_FAILED;
1506
1507 srcLen /= BYTES_PER_CHAR;
1508
1509 if ( dst )
1510 {
1511 if ( dstLen < srcLen )
1512 return wxCONV_FAILED;
1513
1514 const wxUint32 *inBuff = wx_reinterpret_cast(const wxUint32 *, src);
1515 for ( size_t n = 0; n < srcLen; n++, inBuff++ )
1516 {
1517 *dst++ = wxUINT32_SWAP_ALWAYS(*inBuff);
1518 }
1519 }
1520
1521 return srcLen;
1522 }
1523
1524 size_t
1525 wxMBConvUTF32swap::FromWChar(char *dst, size_t dstLen,
1526 const wchar_t *src, size_t srcLen) const
1527 {
1528 if ( srcLen == wxNO_LEN )
1529 srcLen = wxWcslen(src) + 1;
1530
1531 srcLen *= BYTES_PER_CHAR;
1532
1533 if ( dst )
1534 {
1535 if ( dstLen < srcLen )
1536 return wxCONV_FAILED;
1537
1538 wxUint32 *outBuff = wx_reinterpret_cast(wxUint32 *, dst);
1539 for ( size_t n = 0; n < srcLen; n += BYTES_PER_CHAR, src++ )
1540 {
1541 *outBuff++ = wxUINT32_SWAP_ALWAYS(*src);
1542 }
1543 }
1544
1545 return srcLen;
1546 }
1547
1548 #endif // WC_UTF16/!WC_UTF16
1549
1550
1551 // ============================================================================
1552 // The classes doing conversion using the iconv_xxx() functions
1553 // ============================================================================
1554
1555 #ifdef HAVE_ICONV
1556
1557 // VS: glibc 2.1.3 is broken in that iconv() conversion to/from UCS4 fails with
1558 // E2BIG if output buffer is _exactly_ as big as needed. Such case is
1559 // (unless there's yet another bug in glibc) the only case when iconv()
1560 // returns with (size_t)-1 (which means error) and says there are 0 bytes
1561 // left in the input buffer -- when _real_ error occurs,
1562 // bytes-left-in-input buffer is non-zero. Hence, this alternative test for
1563 // iconv() failure.
1564 // [This bug does not appear in glibc 2.2.]
1565 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 1
1566 #define ICONV_FAILED(cres, bufLeft) ((cres == (size_t)-1) && \
1567 (errno != E2BIG || bufLeft != 0))
1568 #else
1569 #define ICONV_FAILED(cres, bufLeft) (cres == (size_t)-1)
1570 #endif
1571
1572 #define ICONV_CHAR_CAST(x) ((ICONV_CONST char **)(x))
1573
1574 #define ICONV_T_INVALID ((iconv_t)-1)
1575
1576 #if SIZEOF_WCHAR_T == 4
1577 #define WC_BSWAP wxUINT32_SWAP_ALWAYS
1578 #define WC_ENC wxFONTENCODING_UTF32
1579 #elif SIZEOF_WCHAR_T == 2
1580 #define WC_BSWAP wxUINT16_SWAP_ALWAYS
1581 #define WC_ENC wxFONTENCODING_UTF16
1582 #else // sizeof(wchar_t) != 2 nor 4
1583 // does this ever happen?
1584 #error "Unknown sizeof(wchar_t): please report this to wx-dev@lists.wxwindows.org"
1585 #endif
1586
1587 // ----------------------------------------------------------------------------
1588 // wxMBConv_iconv: encapsulates an iconv character set
1589 // ----------------------------------------------------------------------------
1590
1591 class wxMBConv_iconv : public wxMBConv
1592 {
1593 public:
1594 wxMBConv_iconv(const wxChar *name);
1595 virtual ~wxMBConv_iconv();
1596
1597 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const;
1598 virtual size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const;
1599
1600 // classify this encoding as explained in wxMBConv::GetMBNulLen() comment
1601 virtual size_t GetMBNulLen() const;
1602
1603 virtual wxMBConv *Clone() const
1604 {
1605 wxMBConv_iconv *p = new wxMBConv_iconv(m_name);
1606 p->m_minMBCharWidth = m_minMBCharWidth;
1607 return p;
1608 }
1609
1610 bool IsOk() const
1611 { return (m2w != ICONV_T_INVALID) && (w2m != ICONV_T_INVALID); }
1612
1613 protected:
1614 // the iconv handlers used to translate from multibyte
1615 // to wide char and in the other direction
1616 iconv_t m2w,
1617 w2m;
1618
1619 #if wxUSE_THREADS
1620 // guards access to m2w and w2m objects
1621 wxMutex m_iconvMutex;
1622 #endif
1623
1624 private:
1625 // the name (for iconv_open()) of a wide char charset -- if none is
1626 // available on this machine, it will remain NULL
1627 static wxString ms_wcCharsetName;
1628
1629 // true if the wide char encoding we use (i.e. ms_wcCharsetName) has
1630 // different endian-ness than the native one
1631 static bool ms_wcNeedsSwap;
1632
1633
1634 // name of the encoding handled by this conversion
1635 wxString m_name;
1636
1637 // cached result of GetMBNulLen(); set to 0 meaning "unknown"
1638 // initially
1639 size_t m_minMBCharWidth;
1640 };
1641
1642 // make the constructor available for unit testing
1643 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_iconv( const wxChar* name )
1644 {
1645 wxMBConv_iconv* result = new wxMBConv_iconv( name );
1646 if ( !result->IsOk() )
1647 {
1648 delete result;
1649 return 0;
1650 }
1651
1652 return result;
1653 }
1654
1655 wxString wxMBConv_iconv::ms_wcCharsetName;
1656 bool wxMBConv_iconv::ms_wcNeedsSwap = false;
1657
1658 wxMBConv_iconv::wxMBConv_iconv(const wxChar *name)
1659 : m_name(name)
1660 {
1661 m_minMBCharWidth = 0;
1662
1663 // iconv operates with chars, not wxChars, but luckily it uses only ASCII
1664 // names for the charsets
1665 const wxCharBuffer cname(wxString(name).ToAscii());
1666
1667 // check for charset that represents wchar_t:
1668 if ( ms_wcCharsetName.empty() )
1669 {
1670 wxLogTrace(TRACE_STRCONV, _T("Looking for wide char codeset:"));
1671
1672 #if wxUSE_FONTMAP
1673 const wxChar **names = wxFontMapperBase::GetAllEncodingNames(WC_ENC);
1674 #else // !wxUSE_FONTMAP
1675 static const wxChar *names_static[] =
1676 {
1677 #if SIZEOF_WCHAR_T == 4
1678 _T("UCS-4"),
1679 #elif SIZEOF_WCHAR_T = 2
1680 _T("UCS-2"),
1681 #endif
1682 NULL
1683 };
1684 const wxChar **names = names_static;
1685 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
1686
1687 for ( ; *names && ms_wcCharsetName.empty(); ++names )
1688 {
1689 const wxString nameCS(*names);
1690
1691 // first try charset with explicit bytesex info (e.g. "UCS-4LE"):
1692 wxString nameXE(nameCS);
1693
1694 #ifdef WORDS_BIGENDIAN
1695 nameXE += _T("BE");
1696 #else // little endian
1697 nameXE += _T("LE");
1698 #endif
1699
1700 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1701 nameXE.c_str());
1702
1703 m2w = iconv_open(nameXE.ToAscii(), cname);
1704 if ( m2w == ICONV_T_INVALID )
1705 {
1706 // try charset w/o bytesex info (e.g. "UCS4")
1707 wxLogTrace(TRACE_STRCONV, _T(" trying charset \"%s\""),
1708 nameCS.c_str());
1709 m2w = iconv_open(nameCS.ToAscii(), cname);
1710
1711 // and check for bytesex ourselves:
1712 if ( m2w != ICONV_T_INVALID )
1713 {
1714 char buf[2], *bufPtr;
1715 wchar_t wbuf[2], *wbufPtr;
1716 size_t insz, outsz;
1717 size_t res;
1718
1719 buf[0] = 'A';
1720 buf[1] = 0;
1721 wbuf[0] = 0;
1722 insz = 2;
1723 outsz = SIZEOF_WCHAR_T * 2;
1724 wbufPtr = wbuf;
1725 bufPtr = buf;
1726
1727 res = iconv(
1728 m2w, ICONV_CHAR_CAST(&bufPtr), &insz,
1729 (char**)&wbufPtr, &outsz);
1730
1731 if (ICONV_FAILED(res, insz))
1732 {
1733 wxLogLastError(wxT("iconv"));
1734 wxLogError(_("Conversion to charset '%s' doesn't work."),
1735 nameCS.c_str());
1736 }
1737 else // ok, can convert to this encoding, remember it
1738 {
1739 ms_wcCharsetName = nameCS;
1740 ms_wcNeedsSwap = wbuf[0] != (wchar_t)buf[0];
1741 }
1742 }
1743 }
1744 else // use charset not requiring byte swapping
1745 {
1746 ms_wcCharsetName = nameXE;
1747 }
1748 }
1749
1750 wxLogTrace(TRACE_STRCONV,
1751 wxT("iconv wchar_t charset is \"%s\"%s"),
1752 ms_wcCharsetName.empty() ? _T("<none>")
1753 : ms_wcCharsetName.c_str(),
1754 ms_wcNeedsSwap ? _T(" (needs swap)")
1755 : _T(""));
1756 }
1757 else // we already have ms_wcCharsetName
1758 {
1759 m2w = iconv_open(ms_wcCharsetName.ToAscii(), cname);
1760 }
1761
1762 if ( ms_wcCharsetName.empty() )
1763 {
1764 w2m = ICONV_T_INVALID;
1765 }
1766 else
1767 {
1768 w2m = iconv_open(cname, ms_wcCharsetName.ToAscii());
1769 if ( w2m == ICONV_T_INVALID )
1770 {
1771 wxLogTrace(TRACE_STRCONV,
1772 wxT("\"%s\" -> \"%s\" works but not the converse!?"),
1773 ms_wcCharsetName.c_str(), cname.data());
1774 }
1775 }
1776 }
1777
1778 wxMBConv_iconv::~wxMBConv_iconv()
1779 {
1780 if ( m2w != ICONV_T_INVALID )
1781 iconv_close(m2w);
1782 if ( w2m != ICONV_T_INVALID )
1783 iconv_close(w2m);
1784 }
1785
1786 size_t wxMBConv_iconv::MB2WC(wchar_t *buf, const char *psz, size_t n) const
1787 {
1788 // find the string length: notice that must be done differently for
1789 // NUL-terminated strings and UTF-16/32 which are terminated with 2/4 NULs
1790 size_t inbuf;
1791 const size_t nulLen = GetMBNulLen();
1792 switch ( nulLen )
1793 {
1794 default:
1795 return wxCONV_FAILED;
1796
1797 case 1:
1798 inbuf = strlen(psz); // arguably more optimized than our version
1799 break;
1800
1801 case 2:
1802 case 4:
1803 // for UTF-16/32 not only we need to have 2/4 consecutive NULs but
1804 // they also have to start at character boundary and not span two
1805 // adjacent characters
1806 const char *p;
1807 for ( p = psz; NotAllNULs(p, nulLen); p += nulLen )
1808 ;
1809 inbuf = p - psz;
1810 break;
1811 }
1812
1813 #if wxUSE_THREADS
1814 // NB: iconv() is MT-safe, but each thread must use its own iconv_t handle.
1815 // Unfortunately there are a couple of global wxCSConv objects such as
1816 // wxConvLocal that are used all over wx code, so we have to make sure
1817 // the handle is used by at most one thread at the time. Otherwise
1818 // only a few wx classes would be safe to use from non-main threads
1819 // as MB<->WC conversion would fail "randomly".
1820 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1821 #endif // wxUSE_THREADS
1822
1823 size_t outbuf = n * SIZEOF_WCHAR_T;
1824 size_t res, cres;
1825 // VS: Use these instead of psz, buf because iconv() modifies its arguments:
1826 wchar_t *bufPtr = buf;
1827 const char *pszPtr = psz;
1828
1829 if (buf)
1830 {
1831 // have destination buffer, convert there
1832 cres = iconv(m2w,
1833 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1834 (char**)&bufPtr, &outbuf);
1835 res = n - (outbuf / SIZEOF_WCHAR_T);
1836
1837 if (ms_wcNeedsSwap)
1838 {
1839 // convert to native endianness
1840 for ( unsigned i = 0; i < res; i++ )
1841 buf[n] = WC_BSWAP(buf[i]);
1842 }
1843
1844 // NUL-terminate the string if there is any space left
1845 if (res < n)
1846 buf[res] = 0;
1847 }
1848 else
1849 {
1850 // no destination buffer... convert using temp buffer
1851 // to calculate destination buffer requirement
1852 wchar_t tbuf[8];
1853 res = 0;
1854
1855 do
1856 {
1857 bufPtr = tbuf;
1858 outbuf = 8 * SIZEOF_WCHAR_T;
1859
1860 cres = iconv(m2w,
1861 ICONV_CHAR_CAST(&pszPtr), &inbuf,
1862 (char**)&bufPtr, &outbuf );
1863
1864 res += 8 - (outbuf / SIZEOF_WCHAR_T);
1865 }
1866 while ((cres == (size_t)-1) && (errno == E2BIG));
1867 }
1868
1869 if (ICONV_FAILED(cres, inbuf))
1870 {
1871 //VS: it is ok if iconv fails, hence trace only
1872 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1873 return wxCONV_FAILED;
1874 }
1875
1876 return res;
1877 }
1878
1879 size_t wxMBConv_iconv::WC2MB(char *buf, const wchar_t *psz, size_t n) const
1880 {
1881 #if wxUSE_THREADS
1882 // NB: explained in MB2WC
1883 wxMutexLocker lock(wxConstCast(this, wxMBConv_iconv)->m_iconvMutex);
1884 #endif
1885
1886 size_t inlen = wxWcslen(psz);
1887 size_t inbuf = inlen * SIZEOF_WCHAR_T;
1888 size_t outbuf = n;
1889 size_t res, cres;
1890
1891 wchar_t *tmpbuf = 0;
1892
1893 if (ms_wcNeedsSwap)
1894 {
1895 // need to copy to temp buffer to switch endianness
1896 // (doing WC_BSWAP twice on the original buffer won't help, as it
1897 // could be in read-only memory, or be accessed in some other thread)
1898 tmpbuf = (wchar_t *)malloc(inbuf + SIZEOF_WCHAR_T);
1899 for ( size_t i = 0; i < inlen; i++ )
1900 tmpbuf[n] = WC_BSWAP(psz[i]);
1901
1902 tmpbuf[inlen] = L'\0';
1903 psz = tmpbuf;
1904 }
1905
1906 if (buf)
1907 {
1908 // have destination buffer, convert there
1909 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1910
1911 res = n - outbuf;
1912
1913 // NB: iconv was given only wcslen(psz) characters on input, and so
1914 // it couldn't convert the trailing zero. Let's do it ourselves
1915 // if there's some room left for it in the output buffer.
1916 if (res < n)
1917 buf[0] = 0;
1918 }
1919 else
1920 {
1921 // no destination buffer: convert using temp buffer
1922 // to calculate destination buffer requirement
1923 char tbuf[16];
1924 res = 0;
1925 do
1926 {
1927 buf = tbuf;
1928 outbuf = 16;
1929
1930 cres = iconv( w2m, ICONV_CHAR_CAST(&psz), &inbuf, &buf, &outbuf );
1931
1932 res += 16 - outbuf;
1933 }
1934 while ((cres == (size_t)-1) && (errno == E2BIG));
1935 }
1936
1937 if (ms_wcNeedsSwap)
1938 {
1939 free(tmpbuf);
1940 }
1941
1942 if (ICONV_FAILED(cres, inbuf))
1943 {
1944 wxLogTrace(TRACE_STRCONV, wxT("iconv failed: %s"), wxSysErrorMsg(wxSysErrorCode()));
1945 return wxCONV_FAILED;
1946 }
1947
1948 return res;
1949 }
1950
1951 size_t wxMBConv_iconv::GetMBNulLen() const
1952 {
1953 if ( m_minMBCharWidth == 0 )
1954 {
1955 wxMBConv_iconv * const self = wxConstCast(this, wxMBConv_iconv);
1956
1957 #if wxUSE_THREADS
1958 // NB: explained in MB2WC
1959 wxMutexLocker lock(self->m_iconvMutex);
1960 #endif
1961
1962 const wchar_t *wnul = L"";
1963 char buf[8]; // should be enough for NUL in any encoding
1964 size_t inLen = sizeof(wchar_t),
1965 outLen = WXSIZEOF(buf);
1966 char *inBuff = (char *)wnul;
1967 char *outBuff = buf;
1968 if ( iconv(w2m, ICONV_CHAR_CAST(&inBuff), &inLen, &outBuff, &outLen) == (size_t)-1 )
1969 {
1970 self->m_minMBCharWidth = (size_t)-1;
1971 }
1972 else // ok
1973 {
1974 self->m_minMBCharWidth = outBuff - buf;
1975 }
1976 }
1977
1978 return m_minMBCharWidth;
1979 }
1980
1981 #endif // HAVE_ICONV
1982
1983
1984 // ============================================================================
1985 // Win32 conversion classes
1986 // ============================================================================
1987
1988 #ifdef wxHAVE_WIN32_MB2WC
1989
1990 // from utils.cpp
1991 #if wxUSE_FONTMAP
1992 extern WXDLLIMPEXP_BASE long wxCharsetToCodepage(const wxChar *charset);
1993 extern WXDLLIMPEXP_BASE long wxEncodingToCodepage(wxFontEncoding encoding);
1994 #endif
1995
1996 class wxMBConv_win32 : public wxMBConv
1997 {
1998 public:
1999 wxMBConv_win32()
2000 {
2001 m_CodePage = CP_ACP;
2002 m_minMBCharWidth = 0;
2003 }
2004
2005 wxMBConv_win32(const wxMBConv_win32& conv)
2006 : wxMBConv()
2007 {
2008 m_CodePage = conv.m_CodePage;
2009 m_minMBCharWidth = conv.m_minMBCharWidth;
2010 }
2011
2012 #if wxUSE_FONTMAP
2013 wxMBConv_win32(const wxChar* name)
2014 {
2015 m_CodePage = wxCharsetToCodepage(name);
2016 m_minMBCharWidth = 0;
2017 }
2018
2019 wxMBConv_win32(wxFontEncoding encoding)
2020 {
2021 m_CodePage = wxEncodingToCodepage(encoding);
2022 m_minMBCharWidth = 0;
2023 }
2024 #endif // wxUSE_FONTMAP
2025
2026 virtual size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2027 {
2028 // note that we have to use MB_ERR_INVALID_CHARS flag as it without it
2029 // the behaviour is not compatible with the Unix version (using iconv)
2030 // and break the library itself, e.g. wxTextInputStream::NextChar()
2031 // wouldn't work if reading an incomplete MB char didn't result in an
2032 // error
2033 //
2034 // Moreover, MB_ERR_INVALID_CHARS is only supported on Win 2K SP4 or
2035 // Win XP or newer and it is not supported for UTF-[78] so we always
2036 // use our own conversions in this case. See
2037 // http://blogs.msdn.com/michkap/archive/2005/04/19/409566.aspx
2038 // http://msdn.microsoft.com/library/en-us/intl/unicode_17si.asp
2039 if ( m_CodePage == CP_UTF8 )
2040 {
2041 return wxConvUTF8.MB2WC(buf, psz, n);
2042 }
2043
2044 if ( m_CodePage == CP_UTF7 )
2045 {
2046 return wxConvUTF7.MB2WC(buf, psz, n);
2047 }
2048
2049 int flags = 0;
2050 if ( (m_CodePage < 50000 && m_CodePage != CP_SYMBOL) &&
2051 IsAtLeastWin2kSP4() )
2052 {
2053 flags = MB_ERR_INVALID_CHARS;
2054 }
2055
2056 const size_t len = ::MultiByteToWideChar
2057 (
2058 m_CodePage, // code page
2059 flags, // flags: fall on error
2060 psz, // input string
2061 -1, // its length (NUL-terminated)
2062 buf, // output string
2063 buf ? n : 0 // size of output buffer
2064 );
2065 if ( !len )
2066 {
2067 // function totally failed
2068 return wxCONV_FAILED;
2069 }
2070
2071 // if we were really converting and didn't use MB_ERR_INVALID_CHARS,
2072 // check if we succeeded, by doing a double trip:
2073 if ( !flags && buf )
2074 {
2075 const size_t mbLen = strlen(psz);
2076 wxCharBuffer mbBuf(mbLen);
2077 if ( ::WideCharToMultiByte
2078 (
2079 m_CodePage,
2080 0,
2081 buf,
2082 -1,
2083 mbBuf.data(),
2084 mbLen + 1, // size in bytes, not length
2085 NULL,
2086 NULL
2087 ) == 0 ||
2088 strcmp(mbBuf, psz) != 0 )
2089 {
2090 // we didn't obtain the same thing we started from, hence
2091 // the conversion was lossy and we consider that it failed
2092 return wxCONV_FAILED;
2093 }
2094 }
2095
2096 // note that it returns count of written chars for buf != NULL and size
2097 // of the needed buffer for buf == NULL so in either case the length of
2098 // the string (which never includes the terminating NUL) is one less
2099 return len - 1;
2100 }
2101
2102 virtual size_t WC2MB(char *buf, const wchar_t *pwz, size_t n) const
2103 {
2104 /*
2105 we have a problem here: by default, WideCharToMultiByte() may
2106 replace characters unrepresentable in the target code page with bad
2107 quality approximations such as turning "1/2" symbol (U+00BD) into
2108 "1" for the code pages which don't have it and we, obviously, want
2109 to avoid this at any price
2110
2111 the trouble is that this function does it _silently_, i.e. it won't
2112 even tell us whether it did or not... Win98/2000 and higher provide
2113 WC_NO_BEST_FIT_CHARS but it doesn't work for the older systems and
2114 we have to resort to a round trip, i.e. check that converting back
2115 results in the same string -- this is, of course, expensive but
2116 otherwise we simply can't be sure to not garble the data.
2117 */
2118
2119 // determine if we can rely on WC_NO_BEST_FIT_CHARS: according to MSDN
2120 // it doesn't work with CJK encodings (which we test for rather roughly
2121 // here...) nor with UTF-7/8 nor, of course, with Windows versions not
2122 // supporting it
2123 BOOL usedDef wxDUMMY_INITIALIZE(false);
2124 BOOL *pUsedDef;
2125 int flags;
2126 if ( CanUseNoBestFit() && m_CodePage < 50000 )
2127 {
2128 // it's our lucky day
2129 flags = WC_NO_BEST_FIT_CHARS;
2130 pUsedDef = &usedDef;
2131 }
2132 else // old system or unsupported encoding
2133 {
2134 flags = 0;
2135 pUsedDef = NULL;
2136 }
2137
2138 const size_t len = ::WideCharToMultiByte
2139 (
2140 m_CodePage, // code page
2141 flags, // either none or no best fit
2142 pwz, // input string
2143 -1, // it is (wide) NUL-terminated
2144 buf, // output buffer
2145 buf ? n : 0, // and its size
2146 NULL, // default "replacement" char
2147 pUsedDef // [out] was it used?
2148 );
2149
2150 if ( !len )
2151 {
2152 // function totally failed
2153 return wxCONV_FAILED;
2154 }
2155
2156 // if we were really converting, check if we succeeded
2157 if ( buf )
2158 {
2159 if ( flags )
2160 {
2161 // check if the conversion failed, i.e. if any replacements
2162 // were done
2163 if ( usedDef )
2164 return wxCONV_FAILED;
2165 }
2166 else // we must resort to double tripping...
2167 {
2168 wxWCharBuffer wcBuf(n);
2169 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2170 wcscmp(wcBuf, pwz) != 0 )
2171 {
2172 // we didn't obtain the same thing we started from, hence
2173 // the conversion was lossy and we consider that it failed
2174 return wxCONV_FAILED;
2175 }
2176 }
2177 }
2178
2179 // see the comment above for the reason of "len - 1"
2180 return len - 1;
2181 }
2182
2183 virtual size_t GetMBNulLen() const
2184 {
2185 if ( m_minMBCharWidth == 0 )
2186 {
2187 int len = ::WideCharToMultiByte
2188 (
2189 m_CodePage, // code page
2190 0, // no flags
2191 L"", // input string
2192 1, // translate just the NUL
2193 NULL, // output buffer
2194 0, // and its size
2195 NULL, // no replacement char
2196 NULL // [out] don't care if it was used
2197 );
2198
2199 wxMBConv_win32 * const self = wxConstCast(this, wxMBConv_win32);
2200 switch ( len )
2201 {
2202 default:
2203 wxLogDebug(_T("Unexpected NUL length %d"), len);
2204 self->m_minMBCharWidth = (size_t)-1;
2205 break;
2206
2207 case 0:
2208 self->m_minMBCharWidth = (size_t)-1;
2209 break;
2210
2211 case 1:
2212 case 2:
2213 case 4:
2214 self->m_minMBCharWidth = len;
2215 break;
2216 }
2217 }
2218
2219 return m_minMBCharWidth;
2220 }
2221
2222 virtual wxMBConv *Clone() const { return new wxMBConv_win32(*this); }
2223
2224 bool IsOk() const { return m_CodePage != -1; }
2225
2226 private:
2227 static bool CanUseNoBestFit()
2228 {
2229 static int s_isWin98Or2k = -1;
2230
2231 if ( s_isWin98Or2k == -1 )
2232 {
2233 int verMaj, verMin;
2234 switch ( wxGetOsVersion(&verMaj, &verMin) )
2235 {
2236 case wxOS_WINDOWS_9X:
2237 s_isWin98Or2k = verMaj >= 4 && verMin >= 10;
2238 break;
2239
2240 case wxOS_WINDOWS_NT:
2241 s_isWin98Or2k = verMaj >= 5;
2242 break;
2243
2244 default:
2245 // unknown: be conservative by default
2246 s_isWin98Or2k = 0;
2247 break;
2248 }
2249
2250 wxASSERT_MSG( s_isWin98Or2k != -1, _T("should be set above") );
2251 }
2252
2253 return s_isWin98Or2k == 1;
2254 }
2255
2256 static bool IsAtLeastWin2kSP4()
2257 {
2258 #ifdef __WXWINCE__
2259 return false;
2260 #else
2261 static int s_isAtLeastWin2kSP4 = -1;
2262
2263 if ( s_isAtLeastWin2kSP4 == -1 )
2264 {
2265 OSVERSIONINFOEX ver;
2266
2267 memset(&ver, 0, sizeof(ver));
2268 ver.dwOSVersionInfoSize = sizeof(ver);
2269 GetVersionEx((OSVERSIONINFO*)&ver);
2270
2271 s_isAtLeastWin2kSP4 =
2272 ((ver.dwMajorVersion > 5) || // Vista+
2273 (ver.dwMajorVersion == 5 && ver.dwMinorVersion > 0) || // XP/2003
2274 (ver.dwMajorVersion == 5 && ver.dwMinorVersion == 0 &&
2275 ver.wServicePackMajor >= 4)) // 2000 SP4+
2276 ? 1 : 0;
2277 }
2278
2279 return s_isAtLeastWin2kSP4 == 1;
2280 #endif
2281 }
2282
2283
2284 // the code page we're working with
2285 long m_CodePage;
2286
2287 // cached result of GetMBNulLen(), set to 0 initially meaning
2288 // "unknown"
2289 size_t m_minMBCharWidth;
2290 };
2291
2292 #endif // wxHAVE_WIN32_MB2WC
2293
2294 // ============================================================================
2295 // Cocoa conversion classes
2296 // ============================================================================
2297
2298 #if defined(__WXCOCOA__)
2299
2300 // RN: There is no UTF-32 support in either Core Foundation or Cocoa.
2301 // Strangely enough, internally Core Foundation uses
2302 // UTF-32 internally quite a bit - its just not public (yet).
2303
2304 #include <CoreFoundation/CFString.h>
2305 #include <CoreFoundation/CFStringEncodingExt.h>
2306
2307 CFStringEncoding wxCFStringEncFromFontEnc(wxFontEncoding encoding)
2308 {
2309 CFStringEncoding enc = kCFStringEncodingInvalidId ;
2310
2311 switch (encoding)
2312 {
2313 case wxFONTENCODING_DEFAULT :
2314 enc = CFStringGetSystemEncoding();
2315 break ;
2316
2317 case wxFONTENCODING_ISO8859_1 :
2318 enc = kCFStringEncodingISOLatin1 ;
2319 break ;
2320 case wxFONTENCODING_ISO8859_2 :
2321 enc = kCFStringEncodingISOLatin2;
2322 break ;
2323 case wxFONTENCODING_ISO8859_3 :
2324 enc = kCFStringEncodingISOLatin3 ;
2325 break ;
2326 case wxFONTENCODING_ISO8859_4 :
2327 enc = kCFStringEncodingISOLatin4;
2328 break ;
2329 case wxFONTENCODING_ISO8859_5 :
2330 enc = kCFStringEncodingISOLatinCyrillic;
2331 break ;
2332 case wxFONTENCODING_ISO8859_6 :
2333 enc = kCFStringEncodingISOLatinArabic;
2334 break ;
2335 case wxFONTENCODING_ISO8859_7 :
2336 enc = kCFStringEncodingISOLatinGreek;
2337 break ;
2338 case wxFONTENCODING_ISO8859_8 :
2339 enc = kCFStringEncodingISOLatinHebrew;
2340 break ;
2341 case wxFONTENCODING_ISO8859_9 :
2342 enc = kCFStringEncodingISOLatin5;
2343 break ;
2344 case wxFONTENCODING_ISO8859_10 :
2345 enc = kCFStringEncodingISOLatin6;
2346 break ;
2347 case wxFONTENCODING_ISO8859_11 :
2348 enc = kCFStringEncodingISOLatinThai;
2349 break ;
2350 case wxFONTENCODING_ISO8859_13 :
2351 enc = kCFStringEncodingISOLatin7;
2352 break ;
2353 case wxFONTENCODING_ISO8859_14 :
2354 enc = kCFStringEncodingISOLatin8;
2355 break ;
2356 case wxFONTENCODING_ISO8859_15 :
2357 enc = kCFStringEncodingISOLatin9;
2358 break ;
2359
2360 case wxFONTENCODING_KOI8 :
2361 enc = kCFStringEncodingKOI8_R;
2362 break ;
2363 case wxFONTENCODING_ALTERNATIVE : // MS-DOS CP866
2364 enc = kCFStringEncodingDOSRussian;
2365 break ;
2366
2367 // case wxFONTENCODING_BULGARIAN :
2368 // enc = ;
2369 // break ;
2370
2371 case wxFONTENCODING_CP437 :
2372 enc = kCFStringEncodingDOSLatinUS ;
2373 break ;
2374 case wxFONTENCODING_CP850 :
2375 enc = kCFStringEncodingDOSLatin1;
2376 break ;
2377 case wxFONTENCODING_CP852 :
2378 enc = kCFStringEncodingDOSLatin2;
2379 break ;
2380 case wxFONTENCODING_CP855 :
2381 enc = kCFStringEncodingDOSCyrillic;
2382 break ;
2383 case wxFONTENCODING_CP866 :
2384 enc = kCFStringEncodingDOSRussian ;
2385 break ;
2386 case wxFONTENCODING_CP874 :
2387 enc = kCFStringEncodingDOSThai;
2388 break ;
2389 case wxFONTENCODING_CP932 :
2390 enc = kCFStringEncodingDOSJapanese;
2391 break ;
2392 case wxFONTENCODING_CP936 :
2393 enc = kCFStringEncodingDOSChineseSimplif ;
2394 break ;
2395 case wxFONTENCODING_CP949 :
2396 enc = kCFStringEncodingDOSKorean;
2397 break ;
2398 case wxFONTENCODING_CP950 :
2399 enc = kCFStringEncodingDOSChineseTrad;
2400 break ;
2401 case wxFONTENCODING_CP1250 :
2402 enc = kCFStringEncodingWindowsLatin2;
2403 break ;
2404 case wxFONTENCODING_CP1251 :
2405 enc = kCFStringEncodingWindowsCyrillic ;
2406 break ;
2407 case wxFONTENCODING_CP1252 :
2408 enc = kCFStringEncodingWindowsLatin1 ;
2409 break ;
2410 case wxFONTENCODING_CP1253 :
2411 enc = kCFStringEncodingWindowsGreek;
2412 break ;
2413 case wxFONTENCODING_CP1254 :
2414 enc = kCFStringEncodingWindowsLatin5;
2415 break ;
2416 case wxFONTENCODING_CP1255 :
2417 enc = kCFStringEncodingWindowsHebrew ;
2418 break ;
2419 case wxFONTENCODING_CP1256 :
2420 enc = kCFStringEncodingWindowsArabic ;
2421 break ;
2422 case wxFONTENCODING_CP1257 :
2423 enc = kCFStringEncodingWindowsBalticRim;
2424 break ;
2425 // This only really encodes to UTF7 (if that) evidently
2426 // case wxFONTENCODING_UTF7 :
2427 // enc = kCFStringEncodingNonLossyASCII ;
2428 // break ;
2429 case wxFONTENCODING_UTF8 :
2430 enc = kCFStringEncodingUTF8 ;
2431 break ;
2432 case wxFONTENCODING_EUC_JP :
2433 enc = kCFStringEncodingEUC_JP;
2434 break ;
2435 case wxFONTENCODING_UTF16 :
2436 enc = kCFStringEncodingUnicode ;
2437 break ;
2438 case wxFONTENCODING_MACROMAN :
2439 enc = kCFStringEncodingMacRoman ;
2440 break ;
2441 case wxFONTENCODING_MACJAPANESE :
2442 enc = kCFStringEncodingMacJapanese ;
2443 break ;
2444 case wxFONTENCODING_MACCHINESETRAD :
2445 enc = kCFStringEncodingMacChineseTrad ;
2446 break ;
2447 case wxFONTENCODING_MACKOREAN :
2448 enc = kCFStringEncodingMacKorean ;
2449 break ;
2450 case wxFONTENCODING_MACARABIC :
2451 enc = kCFStringEncodingMacArabic ;
2452 break ;
2453 case wxFONTENCODING_MACHEBREW :
2454 enc = kCFStringEncodingMacHebrew ;
2455 break ;
2456 case wxFONTENCODING_MACGREEK :
2457 enc = kCFStringEncodingMacGreek ;
2458 break ;
2459 case wxFONTENCODING_MACCYRILLIC :
2460 enc = kCFStringEncodingMacCyrillic ;
2461 break ;
2462 case wxFONTENCODING_MACDEVANAGARI :
2463 enc = kCFStringEncodingMacDevanagari ;
2464 break ;
2465 case wxFONTENCODING_MACGURMUKHI :
2466 enc = kCFStringEncodingMacGurmukhi ;
2467 break ;
2468 case wxFONTENCODING_MACGUJARATI :
2469 enc = kCFStringEncodingMacGujarati ;
2470 break ;
2471 case wxFONTENCODING_MACORIYA :
2472 enc = kCFStringEncodingMacOriya ;
2473 break ;
2474 case wxFONTENCODING_MACBENGALI :
2475 enc = kCFStringEncodingMacBengali ;
2476 break ;
2477 case wxFONTENCODING_MACTAMIL :
2478 enc = kCFStringEncodingMacTamil ;
2479 break ;
2480 case wxFONTENCODING_MACTELUGU :
2481 enc = kCFStringEncodingMacTelugu ;
2482 break ;
2483 case wxFONTENCODING_MACKANNADA :
2484 enc = kCFStringEncodingMacKannada ;
2485 break ;
2486 case wxFONTENCODING_MACMALAJALAM :
2487 enc = kCFStringEncodingMacMalayalam ;
2488 break ;
2489 case wxFONTENCODING_MACSINHALESE :
2490 enc = kCFStringEncodingMacSinhalese ;
2491 break ;
2492 case wxFONTENCODING_MACBURMESE :
2493 enc = kCFStringEncodingMacBurmese ;
2494 break ;
2495 case wxFONTENCODING_MACKHMER :
2496 enc = kCFStringEncodingMacKhmer ;
2497 break ;
2498 case wxFONTENCODING_MACTHAI :
2499 enc = kCFStringEncodingMacThai ;
2500 break ;
2501 case wxFONTENCODING_MACLAOTIAN :
2502 enc = kCFStringEncodingMacLaotian ;
2503 break ;
2504 case wxFONTENCODING_MACGEORGIAN :
2505 enc = kCFStringEncodingMacGeorgian ;
2506 break ;
2507 case wxFONTENCODING_MACARMENIAN :
2508 enc = kCFStringEncodingMacArmenian ;
2509 break ;
2510 case wxFONTENCODING_MACCHINESESIMP :
2511 enc = kCFStringEncodingMacChineseSimp ;
2512 break ;
2513 case wxFONTENCODING_MACTIBETAN :
2514 enc = kCFStringEncodingMacTibetan ;
2515 break ;
2516 case wxFONTENCODING_MACMONGOLIAN :
2517 enc = kCFStringEncodingMacMongolian ;
2518 break ;
2519 case wxFONTENCODING_MACETHIOPIC :
2520 enc = kCFStringEncodingMacEthiopic ;
2521 break ;
2522 case wxFONTENCODING_MACCENTRALEUR :
2523 enc = kCFStringEncodingMacCentralEurRoman ;
2524 break ;
2525 case wxFONTENCODING_MACVIATNAMESE :
2526 enc = kCFStringEncodingMacVietnamese ;
2527 break ;
2528 case wxFONTENCODING_MACARABICEXT :
2529 enc = kCFStringEncodingMacExtArabic ;
2530 break ;
2531 case wxFONTENCODING_MACSYMBOL :
2532 enc = kCFStringEncodingMacSymbol ;
2533 break ;
2534 case wxFONTENCODING_MACDINGBATS :
2535 enc = kCFStringEncodingMacDingbats ;
2536 break ;
2537 case wxFONTENCODING_MACTURKISH :
2538 enc = kCFStringEncodingMacTurkish ;
2539 break ;
2540 case wxFONTENCODING_MACCROATIAN :
2541 enc = kCFStringEncodingMacCroatian ;
2542 break ;
2543 case wxFONTENCODING_MACICELANDIC :
2544 enc = kCFStringEncodingMacIcelandic ;
2545 break ;
2546 case wxFONTENCODING_MACROMANIAN :
2547 enc = kCFStringEncodingMacRomanian ;
2548 break ;
2549 case wxFONTENCODING_MACCELTIC :
2550 enc = kCFStringEncodingMacCeltic ;
2551 break ;
2552 case wxFONTENCODING_MACGAELIC :
2553 enc = kCFStringEncodingMacGaelic ;
2554 break ;
2555 // case wxFONTENCODING_MACKEYBOARD :
2556 // enc = kCFStringEncodingMacKeyboardGlyphs ;
2557 // break ;
2558
2559 default :
2560 // because gcc is picky
2561 break ;
2562 }
2563
2564 return enc ;
2565 }
2566
2567 class wxMBConv_cocoa : public wxMBConv
2568 {
2569 public:
2570 wxMBConv_cocoa()
2571 {
2572 Init(CFStringGetSystemEncoding()) ;
2573 }
2574
2575 wxMBConv_cocoa(const wxMBConv_cocoa& conv)
2576 {
2577 m_encoding = conv.m_encoding;
2578 }
2579
2580 #if wxUSE_FONTMAP
2581 wxMBConv_cocoa(const wxChar* name)
2582 {
2583 Init( wxCFStringEncFromFontEnc(wxFontMapperBase::Get()->CharsetToEncoding(name, false) ) ) ;
2584 }
2585 #endif
2586
2587 wxMBConv_cocoa(wxFontEncoding encoding)
2588 {
2589 Init( wxCFStringEncFromFontEnc(encoding) );
2590 }
2591
2592 virtual ~wxMBConv_cocoa()
2593 {
2594 }
2595
2596 void Init( CFStringEncoding encoding)
2597 {
2598 m_encoding = encoding ;
2599 }
2600
2601 size_t MB2WC(wchar_t * szOut, const char * szUnConv, size_t nOutSize) const
2602 {
2603 wxASSERT(szUnConv);
2604
2605 CFStringRef theString = CFStringCreateWithBytes (
2606 NULL, //the allocator
2607 (const UInt8*)szUnConv,
2608 strlen(szUnConv),
2609 m_encoding,
2610 false //no BOM/external representation
2611 );
2612
2613 wxASSERT(theString);
2614
2615 size_t nOutLength = CFStringGetLength(theString);
2616
2617 if (szOut == NULL)
2618 {
2619 CFRelease(theString);
2620 return nOutLength;
2621 }
2622
2623 CFRange theRange = { 0, nOutSize };
2624
2625 #if SIZEOF_WCHAR_T == 4
2626 UniChar* szUniCharBuffer = new UniChar[nOutSize];
2627 #endif
2628
2629 CFStringGetCharacters(theString, theRange, szUniCharBuffer);
2630
2631 CFRelease(theString);
2632
2633 szUniCharBuffer[nOutLength] = '\0';
2634
2635 #if SIZEOF_WCHAR_T == 4
2636 wxMBConvUTF16 converter;
2637 converter.MB2WC( szOut, (const char*)szUniCharBuffer, nOutSize );
2638 delete [] szUniCharBuffer;
2639 #endif
2640
2641 return nOutLength;
2642 }
2643
2644 size_t WC2MB(char *szOut, const wchar_t *szUnConv, size_t nOutSize) const
2645 {
2646 wxASSERT(szUnConv);
2647
2648 size_t nRealOutSize;
2649 size_t nBufSize = wxWcslen(szUnConv);
2650 UniChar* szUniBuffer = (UniChar*) szUnConv;
2651
2652 #if SIZEOF_WCHAR_T == 4
2653 wxMBConvUTF16 converter ;
2654 nBufSize = converter.WC2MB( NULL, szUnConv, 0 );
2655 szUniBuffer = new UniChar[ (nBufSize / sizeof(UniChar)) + 1];
2656 converter.WC2MB( (char*) szUniBuffer, szUnConv, nBufSize + sizeof(UniChar));
2657 nBufSize /= sizeof(UniChar);
2658 #endif
2659
2660 CFStringRef theString = CFStringCreateWithCharactersNoCopy(
2661 NULL, //allocator
2662 szUniBuffer,
2663 nBufSize,
2664 kCFAllocatorNull //deallocator - we want to deallocate it ourselves
2665 );
2666
2667 wxASSERT(theString);
2668
2669 //Note that CER puts a BOM when converting to unicode
2670 //so we check and use getchars instead in that case
2671 if (m_encoding == kCFStringEncodingUnicode)
2672 {
2673 if (szOut != NULL)
2674 CFStringGetCharacters(theString, CFRangeMake(0, nOutSize - 1), (UniChar*) szOut);
2675
2676 nRealOutSize = CFStringGetLength(theString) + 1;
2677 }
2678 else
2679 {
2680 CFStringGetBytes(
2681 theString,
2682 CFRangeMake(0, CFStringGetLength(theString)),
2683 m_encoding,
2684 0, //what to put in characters that can't be converted -
2685 //0 tells CFString to return NULL if it meets such a character
2686 false, //not an external representation
2687 (UInt8*) szOut,
2688 nOutSize,
2689 (CFIndex*) &nRealOutSize
2690 );
2691 }
2692
2693 CFRelease(theString);
2694
2695 #if SIZEOF_WCHAR_T == 4
2696 delete[] szUniBuffer;
2697 #endif
2698
2699 return nRealOutSize - 1;
2700 }
2701
2702 virtual wxMBConv *Clone() const { return new wxMBConv_cocoa(*this); }
2703
2704 bool IsOk() const
2705 {
2706 return m_encoding != kCFStringEncodingInvalidId &&
2707 CFStringIsEncodingAvailable(m_encoding);
2708 }
2709
2710 private:
2711 CFStringEncoding m_encoding ;
2712 };
2713
2714 #endif // defined(__WXCOCOA__)
2715
2716 // ============================================================================
2717 // Mac conversion classes
2718 // ============================================================================
2719
2720 #if defined(__WXMAC__) && defined(TARGET_CARBON)
2721
2722 class wxMBConv_mac : public wxMBConv
2723 {
2724 public:
2725 wxMBConv_mac()
2726 {
2727 Init(CFStringGetSystemEncoding()) ;
2728 }
2729
2730 wxMBConv_mac(const wxMBConv_mac& conv)
2731 {
2732 Init(conv.m_char_encoding);
2733 }
2734
2735 #if wxUSE_FONTMAP
2736 wxMBConv_mac(const wxChar* name)
2737 {
2738 wxFontEncoding enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
2739 Init( (enc != wxFONTENCODING_SYSTEM) ? wxMacGetSystemEncFromFontEnc( enc ) : kTextEncodingUnknown);
2740 }
2741 #endif
2742
2743 wxMBConv_mac(wxFontEncoding encoding)
2744 {
2745 Init( wxMacGetSystemEncFromFontEnc(encoding) );
2746 }
2747
2748 virtual ~wxMBConv_mac()
2749 {
2750 OSStatus status = noErr ;
2751 if (m_MB2WC_converter)
2752 status = TECDisposeConverter(m_MB2WC_converter);
2753 if (m_WC2MB_converter)
2754 status = TECDisposeConverter(m_WC2MB_converter);
2755 }
2756
2757 void Init( TextEncodingBase encoding,TextEncodingVariant encodingVariant = kTextEncodingDefaultVariant ,
2758 TextEncodingFormat encodingFormat = kTextEncodingDefaultFormat)
2759 {
2760 m_MB2WC_converter = NULL ;
2761 m_WC2MB_converter = NULL ;
2762 if ( encoding != kTextEncodingUnknown )
2763 {
2764 m_char_encoding = CreateTextEncoding(encoding, encodingVariant, encodingFormat) ;
2765 m_unicode_encoding = CreateTextEncoding(kTextEncodingUnicodeDefault, 0, kUnicode16BitFormat) ;
2766 }
2767 else
2768 {
2769 m_char_encoding = kTextEncodingUnknown;
2770 m_unicode_encoding = kTextEncodingUnknown;
2771 }
2772 }
2773
2774 virtual void CreateIfNeeded() const
2775 {
2776 if ( m_MB2WC_converter == NULL && m_WC2MB_converter == NULL &&
2777 m_char_encoding != kTextEncodingUnknown && m_unicode_encoding != kTextEncodingUnknown )
2778 {
2779 OSStatus status = noErr ;
2780 status = TECCreateConverter(&m_MB2WC_converter,
2781 m_char_encoding,
2782 m_unicode_encoding);
2783 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2784 status = TECCreateConverter(&m_WC2MB_converter,
2785 m_unicode_encoding,
2786 m_char_encoding);
2787 wxASSERT_MSG( status == noErr , _("Unable to create TextEncodingConverter")) ;
2788 }
2789 }
2790
2791 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
2792 {
2793 CreateIfNeeded() ;
2794 OSStatus status = noErr ;
2795 ByteCount byteOutLen ;
2796 ByteCount byteInLen = strlen(psz) + 1;
2797 wchar_t *tbuf = NULL ;
2798 UniChar* ubuf = NULL ;
2799 size_t res = 0 ;
2800
2801 if (buf == NULL)
2802 {
2803 // Apple specs say at least 32
2804 n = wxMax( 32, byteInLen ) ;
2805 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
2806 }
2807
2808 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
2809
2810 #if SIZEOF_WCHAR_T == 4
2811 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
2812 #else
2813 ubuf = (UniChar*) (buf ? buf : tbuf) ;
2814 #endif
2815 {
2816 #if wxUSE_THREADS
2817 wxMutexLocker lock( m_MB2WC_guard );
2818 #endif
2819 status = TECConvertText(
2820 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
2821 (TextPtr) ubuf, byteBufferLen, &byteOutLen);
2822 }
2823
2824 #if SIZEOF_WCHAR_T == 4
2825 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
2826 // is not properly terminated we get random characters at the end
2827 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
2828 wxMBConvUTF16 converter ;
2829 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
2830 free( ubuf ) ;
2831 #else
2832 res = byteOutLen / sizeof( UniChar ) ;
2833 #endif
2834
2835 if ( buf == NULL )
2836 free(tbuf) ;
2837
2838 if ( buf && res < n)
2839 buf[res] = 0;
2840
2841 return res ;
2842 }
2843
2844 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2845 {
2846 CreateIfNeeded() ;
2847 OSStatus status = noErr ;
2848 ByteCount byteOutLen ;
2849 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2850
2851 char *tbuf = NULL ;
2852
2853 if (buf == NULL)
2854 {
2855 // Apple specs say at least 32
2856 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2857 tbuf = (char*) malloc( n ) ;
2858 }
2859
2860 ByteCount byteBufferLen = n ;
2861 UniChar* ubuf = NULL ;
2862
2863 #if SIZEOF_WCHAR_T == 4
2864 wxMBConvUTF16 converter ;
2865 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2866 byteInLen = unicharlen ;
2867 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2868 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2869 #else
2870 ubuf = (UniChar*) psz ;
2871 #endif
2872
2873 {
2874 #if wxUSE_THREADS
2875 wxMutexLocker lock( m_WC2MB_guard );
2876 #endif
2877 status = TECConvertText(
2878 m_WC2MB_converter, (ConstTextPtr) ubuf, byteInLen, &byteInLen,
2879 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2880 }
2881
2882 #if SIZEOF_WCHAR_T == 4
2883 free( ubuf ) ;
2884 #endif
2885
2886 if ( buf == NULL )
2887 free(tbuf) ;
2888
2889 size_t res = byteOutLen ;
2890 if ( buf && res < n)
2891 {
2892 buf[res] = 0;
2893
2894 //we need to double-trip to verify it didn't insert any ? in place
2895 //of bogus characters
2896 wxWCharBuffer wcBuf(n);
2897 size_t pszlen = wxWcslen(psz);
2898 if ( MB2WC(wcBuf.data(), buf, n) == wxCONV_FAILED ||
2899 wxWcslen(wcBuf) != pszlen ||
2900 memcmp(wcBuf, psz, pszlen * sizeof(wchar_t)) != 0 )
2901 {
2902 // we didn't obtain the same thing we started from, hence
2903 // the conversion was lossy and we consider that it failed
2904 return wxCONV_FAILED;
2905 }
2906 }
2907
2908 return res ;
2909 }
2910
2911 virtual wxMBConv *Clone() const { return new wxMBConv_mac(*this); }
2912
2913 bool IsOk() const
2914 {
2915 CreateIfNeeded() ;
2916 return m_MB2WC_converter != NULL && m_WC2MB_converter != NULL;
2917 }
2918
2919 protected :
2920 mutable TECObjectRef m_MB2WC_converter;
2921 mutable TECObjectRef m_WC2MB_converter;
2922 #if wxUSE_THREADS
2923 mutable wxMutex m_MB2WC_guard;
2924 mutable wxMutex m_WC2MB_guard;
2925 #endif
2926
2927 TextEncodingBase m_char_encoding;
2928 TextEncodingBase m_unicode_encoding;
2929 };
2930
2931 // MB is decomposed (D) normalized UTF8
2932
2933 class wxMBConv_macUTF8D : public wxMBConv_mac
2934 {
2935 public :
2936 wxMBConv_macUTF8D()
2937 {
2938 Init( kTextEncodingUnicodeDefault , kUnicodeNoSubset , kUnicodeUTF8Format ) ;
2939 m_uni = NULL;
2940 m_uniBack = NULL ;
2941 }
2942
2943 virtual ~wxMBConv_macUTF8D()
2944 {
2945 if (m_uni!=NULL)
2946 DisposeUnicodeToTextInfo(&m_uni);
2947 if (m_uniBack!=NULL)
2948 DisposeUnicodeToTextInfo(&m_uniBack);
2949 }
2950
2951 size_t WC2MB(char *buf, const wchar_t *psz, size_t n) const
2952 {
2953 CreateIfNeeded() ;
2954 OSStatus status = noErr ;
2955 ByteCount byteOutLen ;
2956 ByteCount byteInLen = wxWcslen(psz) * SIZEOF_WCHAR_T ;
2957
2958 char *tbuf = NULL ;
2959
2960 if (buf == NULL)
2961 {
2962 // Apple specs say at least 32
2963 n = wxMax( 32, ((byteInLen / SIZEOF_WCHAR_T) * 8) + SIZEOF_WCHAR_T );
2964 tbuf = (char*) malloc( n ) ;
2965 }
2966
2967 ByteCount byteBufferLen = n ;
2968 UniChar* ubuf = NULL ;
2969
2970 #if SIZEOF_WCHAR_T == 4
2971 wxMBConvUTF16 converter ;
2972 size_t unicharlen = converter.WC2MB( NULL, psz, 0 ) ;
2973 byteInLen = unicharlen ;
2974 ubuf = (UniChar*) malloc( byteInLen + 2 ) ;
2975 converter.WC2MB( (char*) ubuf, psz, unicharlen + 2 ) ;
2976 #else
2977 ubuf = (UniChar*) psz ;
2978 #endif
2979
2980 // ubuf is a non-decomposed UniChar buffer
2981
2982 ByteCount dcubuflen = byteInLen * 2 + 2 ;
2983 ByteCount dcubufread , dcubufwritten ;
2984 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
2985
2986 {
2987 #if wxUSE_THREADS
2988 wxMutexLocker lock( m_WC2MB_guard );
2989 #endif
2990 ConvertFromUnicodeToText( m_uni , byteInLen , ubuf ,
2991 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , dcubuf ) ;
2992
2993 // we now convert that decomposed buffer into UTF8
2994
2995 status = TECConvertText(
2996 m_WC2MB_converter, (ConstTextPtr) dcubuf, dcubufwritten, &dcubufread,
2997 (TextPtr) (buf ? buf : tbuf), byteBufferLen, &byteOutLen);
2998 }
2999
3000 free( dcubuf );
3001
3002 #if SIZEOF_WCHAR_T == 4
3003 free( ubuf ) ;
3004 #endif
3005
3006 if ( buf == NULL )
3007 free(tbuf) ;
3008
3009 size_t res = byteOutLen ;
3010 if ( buf && res < n)
3011 {
3012 buf[res] = 0;
3013 // don't test for round-trip fidelity yet, we cannot guarantee it yet
3014 }
3015
3016 return res ;
3017 }
3018
3019 size_t MB2WC(wchar_t *buf, const char *psz, size_t n) const
3020 {
3021 CreateIfNeeded() ;
3022 OSStatus status = noErr ;
3023 ByteCount byteOutLen ;
3024 ByteCount byteInLen = strlen(psz) + 1;
3025 wchar_t *tbuf = NULL ;
3026 UniChar* ubuf = NULL ;
3027 size_t res = 0 ;
3028
3029 if (buf == NULL)
3030 {
3031 // Apple specs say at least 32
3032 n = wxMax( 32, byteInLen ) ;
3033 tbuf = (wchar_t*) malloc( n * SIZEOF_WCHAR_T ) ;
3034 }
3035
3036 ByteCount byteBufferLen = n * sizeof( UniChar ) ;
3037
3038 #if SIZEOF_WCHAR_T == 4
3039 ubuf = (UniChar*) malloc( byteBufferLen + 2 ) ;
3040 #else
3041 ubuf = (UniChar*) (buf ? buf : tbuf) ;
3042 #endif
3043
3044 ByteCount dcubuflen = byteBufferLen * 2 + 2 ;
3045 ByteCount dcubufread , dcubufwritten ;
3046 UniChar *dcubuf = (UniChar*) malloc( dcubuflen ) ;
3047
3048 {
3049 #if wxUSE_THREADS
3050 wxMutexLocker lock( m_MB2WC_guard );
3051 #endif
3052 status = TECConvertText(
3053 m_MB2WC_converter, (ConstTextPtr) psz, byteInLen, &byteInLen,
3054 (TextPtr) dcubuf, dcubuflen, &byteOutLen);
3055 // we have to terminate here, because n might be larger for the trailing zero, and if UniChar
3056 // is not properly terminated we get random characters at the end
3057 dcubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3058
3059 // now from the decomposed UniChar to properly composed uniChar
3060 ConvertFromUnicodeToText( m_uniBack , byteOutLen , dcubuf ,
3061 kUnicodeDefaultDirectionMask, 0, NULL, NULL, NULL, dcubuflen , &dcubufread , &dcubufwritten , ubuf ) ;
3062 }
3063
3064 free( dcubuf );
3065 byteOutLen = dcubufwritten ;
3066 ubuf[byteOutLen / sizeof( UniChar ) ] = 0 ;
3067
3068
3069 #if SIZEOF_WCHAR_T == 4
3070 wxMBConvUTF16 converter ;
3071 res = converter.MB2WC( (buf ? buf : tbuf), (const char*)ubuf, n ) ;
3072 free( ubuf ) ;
3073 #else
3074 res = byteOutLen / sizeof( UniChar ) ;
3075 #endif
3076
3077 if ( buf == NULL )
3078 free(tbuf) ;
3079
3080 if ( buf && res < n)
3081 buf[res] = 0;
3082
3083 return res ;
3084 }
3085
3086 virtual void CreateIfNeeded() const
3087 {
3088 wxMBConv_mac::CreateIfNeeded() ;
3089 if ( m_uni == NULL )
3090 {
3091 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3092 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3093 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3094 kUnicodeCanonicalDecompVariant, kTextEncodingDefaultFormat);
3095 m_map.mappingVersion = kUnicodeUseLatestMapping;
3096
3097 OSStatus err = CreateUnicodeToTextInfo(&m_map, &m_uni);
3098 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3099
3100 m_map.unicodeEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3101 kUnicodeNoSubset, kTextEncodingDefaultFormat);
3102 m_map.otherEncoding = CreateTextEncoding(kTextEncodingUnicodeDefault,
3103 kUnicodeCanonicalCompVariant, kTextEncodingDefaultFormat);
3104 m_map.mappingVersion = kUnicodeUseLatestMapping;
3105 err = CreateUnicodeToTextInfo(&m_map, &m_uniBack);
3106 wxASSERT_MSG( err == noErr , _(" Couldn't create the UnicodeConverter")) ;
3107 }
3108 }
3109 protected :
3110 mutable UnicodeToTextInfo m_uni;
3111 mutable UnicodeToTextInfo m_uniBack;
3112 mutable UnicodeMapping m_map;
3113 };
3114 #endif // defined(__WXMAC__) && defined(TARGET_CARBON)
3115
3116 // ============================================================================
3117 // wxEncodingConverter based conversion classes
3118 // ============================================================================
3119
3120 #if wxUSE_FONTMAP
3121
3122 class wxMBConv_wxwin : public wxMBConv
3123 {
3124 private:
3125 void Init()
3126 {
3127 m_ok = m2w.Init(m_enc, wxFONTENCODING_UNICODE) &&
3128 w2m.Init(wxFONTENCODING_UNICODE, m_enc);
3129 }
3130
3131 public:
3132 // temporarily just use wxEncodingConverter stuff,
3133 // so that it works while a better implementation is built
3134 wxMBConv_wxwin(const wxChar* name)
3135 {
3136 if (name)
3137 m_enc = wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3138 else
3139 m_enc = wxFONTENCODING_SYSTEM;
3140
3141 Init();
3142 }
3143
3144 wxMBConv_wxwin(wxFontEncoding enc)
3145 {
3146 m_enc = enc;
3147
3148 Init();
3149 }
3150
3151 size_t MB2WC(wchar_t *buf, const char *psz, size_t WXUNUSED(n)) const
3152 {
3153 size_t inbuf = strlen(psz);
3154 if (buf)
3155 {
3156 if (!m2w.Convert(psz, buf))
3157 return wxCONV_FAILED;
3158 }
3159 return inbuf;
3160 }
3161
3162 size_t WC2MB(char *buf, const wchar_t *psz, size_t WXUNUSED(n)) const
3163 {
3164 const size_t inbuf = wxWcslen(psz);
3165 if (buf)
3166 {
3167 if (!w2m.Convert(psz, buf))
3168 return wxCONV_FAILED;
3169 }
3170
3171 return inbuf;
3172 }
3173
3174 virtual size_t GetMBNulLen() const
3175 {
3176 switch ( m_enc )
3177 {
3178 case wxFONTENCODING_UTF16BE:
3179 case wxFONTENCODING_UTF16LE:
3180 return 2;
3181
3182 case wxFONTENCODING_UTF32BE:
3183 case wxFONTENCODING_UTF32LE:
3184 return 4;
3185
3186 default:
3187 return 1;
3188 }
3189 }
3190
3191 virtual wxMBConv *Clone() const { return new wxMBConv_wxwin(m_enc); }
3192
3193 bool IsOk() const { return m_ok; }
3194
3195 public:
3196 wxFontEncoding m_enc;
3197 wxEncodingConverter m2w, w2m;
3198
3199 private:
3200 // were we initialized successfully?
3201 bool m_ok;
3202
3203 DECLARE_NO_COPY_CLASS(wxMBConv_wxwin)
3204 };
3205
3206 // make the constructors available for unit testing
3207 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_wxwin( const wxChar* name )
3208 {
3209 wxMBConv_wxwin* result = new wxMBConv_wxwin( name );
3210 if ( !result->IsOk() )
3211 {
3212 delete result;
3213 return 0;
3214 }
3215
3216 return result;
3217 }
3218
3219 #endif // wxUSE_FONTMAP
3220
3221 // ============================================================================
3222 // wxCSConv implementation
3223 // ============================================================================
3224
3225 void wxCSConv::Init()
3226 {
3227 m_name = NULL;
3228 m_convReal = NULL;
3229 m_deferred = true;
3230 }
3231
3232 wxCSConv::wxCSConv(const wxChar *charset)
3233 {
3234 Init();
3235
3236 if ( charset )
3237 {
3238 SetName(charset);
3239 }
3240
3241 #if wxUSE_FONTMAP
3242 m_encoding = wxFontMapperBase::GetEncodingFromName(charset);
3243 if ( m_encoding == wxFONTENCODING_MAX )
3244 {
3245 // set to unknown/invalid value
3246 m_encoding = wxFONTENCODING_SYSTEM;
3247 }
3248 else if ( m_encoding == wxFONTENCODING_DEFAULT )
3249 {
3250 // wxFONTENCODING_DEFAULT is same as US-ASCII in this context
3251 m_encoding = wxFONTENCODING_ISO8859_1;
3252 }
3253 #else
3254 m_encoding = wxFONTENCODING_SYSTEM;
3255 #endif
3256 }
3257
3258 wxCSConv::wxCSConv(wxFontEncoding encoding)
3259 {
3260 if ( encoding == wxFONTENCODING_MAX || encoding == wxFONTENCODING_DEFAULT )
3261 {
3262 wxFAIL_MSG( _T("invalid encoding value in wxCSConv ctor") );
3263
3264 encoding = wxFONTENCODING_SYSTEM;
3265 }
3266
3267 Init();
3268
3269 m_encoding = encoding;
3270 }
3271
3272 wxCSConv::~wxCSConv()
3273 {
3274 Clear();
3275 }
3276
3277 wxCSConv::wxCSConv(const wxCSConv& conv)
3278 : wxMBConv()
3279 {
3280 Init();
3281
3282 SetName(conv.m_name);
3283 m_encoding = conv.m_encoding;
3284 }
3285
3286 wxCSConv& wxCSConv::operator=(const wxCSConv& conv)
3287 {
3288 Clear();
3289
3290 SetName(conv.m_name);
3291 m_encoding = conv.m_encoding;
3292
3293 return *this;
3294 }
3295
3296 void wxCSConv::Clear()
3297 {
3298 free(m_name);
3299 delete m_convReal;
3300
3301 m_name = NULL;
3302 m_convReal = NULL;
3303 }
3304
3305 void wxCSConv::SetName(const wxChar *charset)
3306 {
3307 if (charset)
3308 {
3309 m_name = wxStrdup(charset);
3310 m_deferred = true;
3311 }
3312 }
3313
3314 #if wxUSE_FONTMAP
3315
3316 WX_DECLARE_HASH_MAP( wxFontEncoding, wxString, wxIntegerHash, wxIntegerEqual,
3317 wxEncodingNameCache );
3318
3319 static wxEncodingNameCache gs_nameCache;
3320 #endif
3321
3322 wxMBConv *wxCSConv::DoCreate() const
3323 {
3324 #if wxUSE_FONTMAP
3325 wxLogTrace(TRACE_STRCONV,
3326 wxT("creating conversion for %s"),
3327 (m_name ? m_name
3328 : wxFontMapperBase::GetEncodingName(m_encoding).c_str()));
3329 #endif // wxUSE_FONTMAP
3330
3331 // check for the special case of ASCII or ISO8859-1 charset: as we have
3332 // special knowledge of it anyhow, we don't need to create a special
3333 // conversion object
3334 if ( m_encoding == wxFONTENCODING_ISO8859_1 ||
3335 m_encoding == wxFONTENCODING_DEFAULT )
3336 {
3337 // don't convert at all
3338 return NULL;
3339 }
3340
3341 // we trust OS to do conversion better than we can so try external
3342 // conversion methods first
3343 //
3344 // the full order is:
3345 // 1. OS conversion (iconv() under Unix or Win32 API)
3346 // 2. hard coded conversions for UTF
3347 // 3. wxEncodingConverter as fall back
3348
3349 // step (1)
3350 #ifdef HAVE_ICONV
3351 #if !wxUSE_FONTMAP
3352 if ( m_name )
3353 #endif // !wxUSE_FONTMAP
3354 {
3355 wxString name(m_name);
3356 #if wxUSE_FONTMAP
3357 wxFontEncoding encoding(m_encoding);
3358 #endif
3359
3360 if ( !name.empty() )
3361 {
3362 wxMBConv_iconv *conv = new wxMBConv_iconv(name);
3363 if ( conv->IsOk() )
3364 return conv;
3365
3366 delete conv;
3367
3368 #if wxUSE_FONTMAP
3369 encoding =
3370 wxFontMapperBase::Get()->CharsetToEncoding(name, false);
3371 #endif // wxUSE_FONTMAP
3372 }
3373 #if wxUSE_FONTMAP
3374 {
3375 const wxEncodingNameCache::iterator it = gs_nameCache.find(encoding);
3376 if ( it != gs_nameCache.end() )
3377 {
3378 if ( it->second.empty() )
3379 return NULL;
3380
3381 wxMBConv_iconv *conv = new wxMBConv_iconv(it->second);
3382 if ( conv->IsOk() )
3383 return conv;
3384
3385 delete conv;
3386 }
3387
3388 const wxChar** names = wxFontMapperBase::GetAllEncodingNames(encoding);
3389 // CS : in case this does not return valid names (eg for MacRoman) encoding
3390 // got a 'failure' entry in the cache all the same, although it just has to
3391 // be created using a different method, so only store failed iconv creation
3392 // attempts (or perhaps we shoulnd't do this at all ?)
3393 if ( names[0] != NULL )
3394 {
3395 for ( ; *names; ++names )
3396 {
3397 wxMBConv_iconv *conv = new wxMBConv_iconv(*names);
3398 if ( conv->IsOk() )
3399 {
3400 gs_nameCache[encoding] = *names;
3401 return conv;
3402 }
3403
3404 delete conv;
3405 }
3406
3407 gs_nameCache[encoding] = _T(""); // cache the failure
3408 }
3409 }
3410 #endif // wxUSE_FONTMAP
3411 }
3412 #endif // HAVE_ICONV
3413
3414 #ifdef wxHAVE_WIN32_MB2WC
3415 {
3416 #if wxUSE_FONTMAP
3417 wxMBConv_win32 *conv = m_name ? new wxMBConv_win32(m_name)
3418 : new wxMBConv_win32(m_encoding);
3419 if ( conv->IsOk() )
3420 return conv;
3421
3422 delete conv;
3423 #else
3424 return NULL;
3425 #endif
3426 }
3427 #endif // wxHAVE_WIN32_MB2WC
3428
3429 #if defined(__WXMAC__)
3430 {
3431 // leave UTF16 and UTF32 to the built-ins of wx
3432 if ( m_name || ( m_encoding < wxFONTENCODING_UTF16BE ||
3433 ( m_encoding >= wxFONTENCODING_MACMIN && m_encoding <= wxFONTENCODING_MACMAX ) ) )
3434 {
3435 #if wxUSE_FONTMAP
3436 wxMBConv_mac *conv = m_name ? new wxMBConv_mac(m_name)
3437 : new wxMBConv_mac(m_encoding);
3438 #else
3439 wxMBConv_mac *conv = new wxMBConv_mac(m_encoding);
3440 #endif
3441 if ( conv->IsOk() )
3442 return conv;
3443
3444 delete conv;
3445 }
3446 }
3447 #endif
3448
3449 #if defined(__WXCOCOA__)
3450 {
3451 if ( m_name || ( m_encoding <= wxFONTENCODING_UTF16 ) )
3452 {
3453 #if wxUSE_FONTMAP
3454 wxMBConv_cocoa *conv = m_name ? new wxMBConv_cocoa(m_name)
3455 : new wxMBConv_cocoa(m_encoding);
3456 #else
3457 wxMBConv_cocoa *conv = new wxMBConv_cocoa(m_encoding);
3458 #endif
3459
3460 if ( conv->IsOk() )
3461 return conv;
3462
3463 delete conv;
3464 }
3465 }
3466 #endif
3467 // step (2)
3468 wxFontEncoding enc = m_encoding;
3469 #if wxUSE_FONTMAP
3470 if ( enc == wxFONTENCODING_SYSTEM && m_name )
3471 {
3472 // use "false" to suppress interactive dialogs -- we can be called from
3473 // anywhere and popping up a dialog from here is the last thing we want to
3474 // do
3475 enc = wxFontMapperBase::Get()->CharsetToEncoding(m_name, false);
3476 }
3477 #endif // wxUSE_FONTMAP
3478
3479 switch ( enc )
3480 {
3481 case wxFONTENCODING_UTF7:
3482 return new wxMBConvUTF7;
3483
3484 case wxFONTENCODING_UTF8:
3485 return new wxMBConvUTF8;
3486
3487 case wxFONTENCODING_UTF16BE:
3488 return new wxMBConvUTF16BE;
3489
3490 case wxFONTENCODING_UTF16LE:
3491 return new wxMBConvUTF16LE;
3492
3493 case wxFONTENCODING_UTF32BE:
3494 return new wxMBConvUTF32BE;
3495
3496 case wxFONTENCODING_UTF32LE:
3497 return new wxMBConvUTF32LE;
3498
3499 default:
3500 // nothing to do but put here to suppress gcc warnings
3501 break;
3502 }
3503
3504 // step (3)
3505 #if wxUSE_FONTMAP
3506 {
3507 wxMBConv_wxwin *conv = m_name ? new wxMBConv_wxwin(m_name)
3508 : new wxMBConv_wxwin(m_encoding);
3509 if ( conv->IsOk() )
3510 return conv;
3511
3512 delete conv;
3513 }
3514 #endif // wxUSE_FONTMAP
3515
3516 // NB: This is a hack to prevent deadlock. What could otherwise happen
3517 // in Unicode build: wxConvLocal creation ends up being here
3518 // because of some failure and logs the error. But wxLog will try to
3519 // attach a timestamp, for which it will need wxConvLocal (to convert
3520 // time to char* and then wchar_t*), but that fails, tries to log the
3521 // error, but wxLog has an (already locked) critical section that
3522 // guards the static buffer.
3523 static bool alreadyLoggingError = false;
3524 if (!alreadyLoggingError)
3525 {
3526 alreadyLoggingError = true;
3527 wxLogError(_("Cannot convert from the charset '%s'!"),
3528 m_name ? m_name
3529 :
3530 #if wxUSE_FONTMAP
3531 wxFontMapperBase::GetEncodingDescription(m_encoding).c_str()
3532 #else // !wxUSE_FONTMAP
3533 wxString::Format(_("encoding %i"), m_encoding).c_str()
3534 #endif // wxUSE_FONTMAP/!wxUSE_FONTMAP
3535 );
3536
3537 alreadyLoggingError = false;
3538 }
3539
3540 return NULL;
3541 }
3542
3543 void wxCSConv::CreateConvIfNeeded() const
3544 {
3545 if ( m_deferred )
3546 {
3547 wxCSConv *self = (wxCSConv *)this; // const_cast
3548
3549 // if we don't have neither the name nor the encoding, use the default
3550 // encoding for this system
3551 if ( !m_name && m_encoding == wxFONTENCODING_SYSTEM )
3552 {
3553 #if wxUSE_INTL
3554 self->m_encoding = wxLocale::GetSystemEncoding();
3555 #else
3556 // fallback to some reasonable default:
3557 self->m_encoding = wxFONTENCODING_ISO8859_1;
3558 #endif // wxUSE_INTL
3559 }
3560
3561 self->m_convReal = DoCreate();
3562 self->m_deferred = false;
3563 }
3564 }
3565
3566 bool wxCSConv::IsOk() const
3567 {
3568 CreateConvIfNeeded();
3569
3570 // special case: no convReal created for wxFONTENCODING_ISO8859_1
3571 if ( m_encoding == wxFONTENCODING_ISO8859_1 )
3572 return true; // always ok as we do it ourselves
3573
3574 // m_convReal->IsOk() is called at its own creation, so we know it must
3575 // be ok if m_convReal is non-NULL
3576 return m_convReal != NULL;
3577 }
3578
3579 size_t wxCSConv::ToWChar(wchar_t *dst, size_t dstLen,
3580 const char *src, size_t srcLen) const
3581 {
3582 CreateConvIfNeeded();
3583
3584 if (m_convReal)
3585 return m_convReal->ToWChar(dst, dstLen, src, srcLen);
3586
3587 // latin-1 (direct)
3588 if ( srcLen == wxNO_LEN )
3589 srcLen = strlen(src) + 1; // take trailing NUL too
3590
3591 if ( dst )
3592 {
3593 if ( dstLen < srcLen )
3594 return wxCONV_FAILED;
3595
3596 for ( size_t n = 0; n < srcLen; n++ )
3597 dst[n] = (unsigned char)(src[n]);
3598 }
3599
3600 return srcLen;
3601 }
3602
3603 size_t wxCSConv::FromWChar(char *dst, size_t dstLen,
3604 const wchar_t *src, size_t srcLen) const
3605 {
3606 CreateConvIfNeeded();
3607
3608 if (m_convReal)
3609 return m_convReal->FromWChar(dst, dstLen, src, srcLen);
3610
3611 // latin-1 (direct)
3612 if ( srcLen == wxNO_LEN )
3613 srcLen = wxWcslen(src) + 1;
3614
3615 if ( dst )
3616 {
3617 if ( dstLen < srcLen )
3618 return wxCONV_FAILED;
3619
3620 for ( size_t n = 0; n < srcLen; n++ )
3621 {
3622 if ( src[n] > 0xFF )
3623 return wxCONV_FAILED;
3624
3625 dst[n] = (char)src[n];
3626 }
3627
3628 }
3629 else // still need to check the input validity
3630 {
3631 for ( size_t n = 0; n < srcLen; n++ )
3632 {
3633