1 |
/* |
2 |
www.sourceforge.net/projects/tinyxml |
3 |
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) |
4 |
|
5 |
This software is provided 'as-is', without any express or implied |
6 |
warranty. In no event will the authors be held liable for any |
7 |
damages arising from the use of this software. |
8 |
|
9 |
Permission is granted to anyone to use this software for any |
10 |
purpose, including commercial applications, and to alter it and |
11 |
redistribute it freely, subject to the following restrictions: |
12 |
|
13 |
1. The origin of this software must not be misrepresented; you must |
14 |
not claim that you wrote the original software. If you use this |
15 |
software in a product, an acknowledgment in the product documentation |
16 |
would be appreciated but is not required. |
17 |
|
18 |
2. Altered source versions must be plainly marked as such, and |
19 |
must not be misrepresented as being the original software. |
20 |
|
21 |
3. This notice may not be removed or altered from any source |
22 |
distribution. |
23 |
*/ |
24 |
|
25 |
#include <ctype.h> |
26 |
#include <stddef.h> |
27 |
|
28 |
#include "tinyxml.h" |
29 |
|
30 |
//#define DEBUG_PARSER |
31 |
#if defined( DEBUG_PARSER ) |
32 |
# if defined( DEBUG ) && defined( _MSC_VER ) |
33 |
# include <windows.h> |
34 |
# define TIXML_LOG OutputDebugString |
35 |
# else |
36 |
# define TIXML_LOG printf |
37 |
# endif |
38 |
#endif |
39 |
|
40 |
// Note tha "PutString" hardcodes the same list. This |
41 |
// is less flexible than it appears. Changing the entries |
42 |
// or order will break putstring. |
43 |
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = |
44 |
{ |
45 |
{ "&", 5, '&' }, |
46 |
{ "<", 4, '<' }, |
47 |
{ ">", 4, '>' }, |
48 |
{ """, 6, '\"' }, |
49 |
{ "'", 6, '\'' } |
50 |
}; |
51 |
|
52 |
// Bunch of unicode info at: |
53 |
// http://www.unicode.org/faq/utf_bom.html |
54 |
// Including the basic of this table, which determines the #bytes in the |
55 |
// sequence from the lead byte. 1 placed for invalid sequences -- |
56 |
// although the result will be junk, pass it through as much as possible. |
57 |
// Beware of the non-characters in UTF-8: |
58 |
// ef bb bf (Microsoft "lead bytes") |
59 |
// ef bf be |
60 |
// ef bf bf |
61 |
|
62 |
const unsigned char TIXML_UTF_LEAD_0 = 0xefU; |
63 |
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; |
64 |
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; |
65 |
|
66 |
const int TiXmlBase::utf8ByteTable[256] = |
67 |
{ |
68 |
// 0 1 2 3 4 5 6 7 8 9 a b c d e f |
69 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 |
70 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 |
71 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 |
72 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 |
73 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 |
74 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 |
75 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 |
76 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range |
77 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid |
78 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 |
79 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 |
80 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 |
81 |
1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte |
82 |
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 |
83 |
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte |
84 |
4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid |
85 |
}; |
86 |
|
87 |
|
88 |
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) |
89 |
{ |
90 |
const unsigned long BYTE_MASK = 0xBF; |
91 |
const unsigned long BYTE_MARK = 0x80; |
92 |
const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; |
93 |
|
94 |
if (input < 0x80) |
95 |
*length = 1; |
96 |
else if ( input < 0x800 ) |
97 |
*length = 2; |
98 |
else if ( input < 0x10000 ) |
99 |
*length = 3; |
100 |
else if ( input < 0x200000 ) |
101 |
*length = 4; |
102 |
else |
103 |
{ *length = 0; return; } // This code won't covert this correctly anyway. |
104 |
|
105 |
output += *length; |
106 |
|
107 |
// Scary scary fall throughs. |
108 |
switch (*length) |
109 |
{ |
110 |
case 4: |
111 |
--output; |
112 |
*output = (char)((input | BYTE_MARK) & BYTE_MASK); |
113 |
input >>= 6; |
114 |
case 3: |
115 |
--output; |
116 |
*output = (char)((input | BYTE_MARK) & BYTE_MASK); |
117 |
input >>= 6; |
118 |
case 2: |
119 |
--output; |
120 |
*output = (char)((input | BYTE_MARK) & BYTE_MASK); |
121 |
input >>= 6; |
122 |
case 1: |
123 |
--output; |
124 |
*output = (char)(input | FIRST_BYTE_MARK[*length]); |
125 |
} |
126 |
} |
127 |
|
128 |
|
129 |
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) |
130 |
{ |
131 |
// This will only work for low-ascii, everything else is assumed to be a valid |
132 |
// letter. I'm not sure this is the best approach, but it is quite tricky trying |
133 |
// to figure out alhabetical vs. not across encoding. So take a very |
134 |
// conservative approach. |
135 |
|
136 |
// if ( encoding == TIXML_ENCODING_UTF8 ) |
137 |
// { |
138 |
if ( anyByte < 127 ) |
139 |
return isalpha( anyByte ); |
140 |
else |
141 |
return 1; // What else to do? The unicode set is huge...get the english ones right. |
142 |
// } |
143 |
// else |
144 |
// { |
145 |
// return isalpha( anyByte ); |
146 |
// } |
147 |
} |
148 |
|
149 |
|
150 |
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ ) |
151 |
{ |
152 |
// This will only work for low-ascii, everything else is assumed to be a valid |
153 |
// letter. I'm not sure this is the best approach, but it is quite tricky trying |
154 |
// to figure out alhabetical vs. not across encoding. So take a very |
155 |
// conservative approach. |
156 |
|
157 |
// if ( encoding == TIXML_ENCODING_UTF8 ) |
158 |
// { |
159 |
if ( anyByte < 127 ) |
160 |
return isalnum( anyByte ); |
161 |
else |
162 |
return 1; // What else to do? The unicode set is huge...get the english ones right. |
163 |
// } |
164 |
// else |
165 |
// { |
166 |
// return isalnum( anyByte ); |
167 |
// } |
168 |
} |
169 |
|
170 |
|
171 |
class TiXmlParsingData |
172 |
{ |
173 |
friend class TiXmlDocument; |
174 |
public: |
175 |
void Stamp( const char* now, TiXmlEncoding encoding ); |
176 |
|
177 |
const TiXmlCursor& Cursor() { return cursor; } |
178 |
|
179 |
private: |
180 |
// Only used by the document! |
181 |
TiXmlParsingData( const char* start, int _tabsize, int row, int col ) |
182 |
{ |
183 |
assert( start ); |
184 |
stamp = start; |
185 |
tabsize = _tabsize; |
186 |
cursor.row = row; |
187 |
cursor.col = col; |
188 |
} |
189 |
|
190 |
TiXmlCursor cursor; |
191 |
const char* stamp; |
192 |
int tabsize; |
193 |
}; |
194 |
|
195 |
|
196 |
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding ) |
197 |
{ |
198 |
assert( now ); |
199 |
|
200 |
// Do nothing if the tabsize is 0. |
201 |
if ( tabsize < 1 ) |
202 |
{ |
203 |
return; |
204 |
} |
205 |
|
206 |
// Get the current row, column. |
207 |
int row = cursor.row; |
208 |
int col = cursor.col; |
209 |
const char* p = stamp; |
210 |
assert( p ); |
211 |
|
212 |
while ( p < now ) |
213 |
{ |
214 |
// Treat p as unsigned, so we have a happy compiler. |
215 |
const unsigned char* pU = (const unsigned char*)p; |
216 |
|
217 |
// Code contributed by Fletcher Dunn: (modified by lee) |
218 |
switch (*pU) { |
219 |
case 0: |
220 |
// We *should* never get here, but in case we do, don't |
221 |
// advance past the terminating null character, ever |
222 |
return; |
223 |
|
224 |
case '\r': |
225 |
// bump down to the next line |
226 |
++row; |
227 |
col = 0; |
228 |
// Eat the character |
229 |
++p; |
230 |
|
231 |
// Check for \r\n sequence, and treat this as a single character |
232 |
if (*p == '\n') { |
233 |
++p; |
234 |
} |
235 |
break; |
236 |
|
237 |
case '\n': |
238 |
// bump down to the next line |
239 |
++row; |
240 |
col = 0; |
241 |
|
242 |
// Eat the character |
243 |
++p; |
244 |
|
245 |
// Check for \n\r sequence, and treat this as a single |
246 |
// character. (Yes, this bizarre thing does occur still |
247 |
// on some arcane platforms...) |
248 |
if (*p == '\r') { |
249 |
++p; |
250 |
} |
251 |
break; |
252 |
|
253 |
case '\t': |
254 |
// Eat the character |
255 |
++p; |
256 |
|
257 |
// Skip to next tab stop |
258 |
col = (col / tabsize + 1) * tabsize; |
259 |
break; |
260 |
|
261 |
case TIXML_UTF_LEAD_0: |
262 |
if ( encoding == TIXML_ENCODING_UTF8 ) |
263 |
{ |
264 |
if ( *(p+1) && *(p+2) ) |
265 |
{ |
266 |
// In these cases, don't advance the column. These are |
267 |
// 0-width spaces. |
268 |
if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 ) |
269 |
p += 3; |
270 |
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU ) |
271 |
p += 3; |
272 |
else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU ) |
273 |
p += 3; |
274 |
else |
275 |
{ p +=3; ++col; } // A normal character. |
276 |
} |
277 |
} |
278 |
else |
279 |
{ |
280 |
++p; |
281 |
++col; |
282 |
} |
283 |
break; |
284 |
|
285 |
default: |
286 |
if ( encoding == TIXML_ENCODING_UTF8 ) |
287 |
{ |
288 |
// Eat the 1 to 4 byte utf8 character. |
289 |
int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)]; |
290 |
if ( step == 0 ) |
291 |
step = 1; // Error case from bad encoding, but handle gracefully. |
292 |
p += step; |
293 |
|
294 |
// Just advance one column, of course. |
295 |
++col; |
296 |
} |
297 |
else |
298 |
{ |
299 |
++p; |
300 |
++col; |
301 |
} |
302 |
break; |
303 |
} |
304 |
} |
305 |
cursor.row = row; |
306 |
cursor.col = col; |
307 |
assert( cursor.row >= -1 ); |
308 |
assert( cursor.col >= -1 ); |
309 |
stamp = p; |
310 |
assert( stamp ); |
311 |
} |
312 |
|
313 |
|
314 |
const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding ) |
315 |
{ |
316 |
if ( !p || !*p ) |
317 |
{ |
318 |
return 0; |
319 |
} |
320 |
if ( encoding == TIXML_ENCODING_UTF8 ) |
321 |
{ |
322 |
while ( *p ) |
323 |
{ |
324 |
const unsigned char* pU = (const unsigned char*)p; |
325 |
|
326 |
// Skip the stupid Microsoft UTF-8 Byte order marks |
327 |
if ( *(pU+0)==TIXML_UTF_LEAD_0 |
328 |
&& *(pU+1)==TIXML_UTF_LEAD_1 |
329 |
&& *(pU+2)==TIXML_UTF_LEAD_2 ) |
330 |
{ |
331 |
p += 3; |
332 |
continue; |
333 |
} |
334 |
else if(*(pU+0)==TIXML_UTF_LEAD_0 |
335 |
&& *(pU+1)==0xbfU |
336 |
&& *(pU+2)==0xbeU ) |
337 |
{ |
338 |
p += 3; |
339 |
continue; |
340 |
} |
341 |
else if(*(pU+0)==TIXML_UTF_LEAD_0 |
342 |
&& *(pU+1)==0xbfU |
343 |
&& *(pU+2)==0xbfU ) |
344 |
{ |
345 |
p += 3; |
346 |
continue; |
347 |
} |
348 |
|
349 |
if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) // Still using old rules for white space. |
350 |
++p; |
351 |
else |
352 |
break; |
353 |
} |
354 |
} |
355 |
else |
356 |
{ |
357 |
while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' ) |
358 |
++p; |
359 |
} |
360 |
|
361 |
return p; |
362 |
} |
363 |
|
364 |
#ifdef TIXML_USE_STL |
365 |
/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag ) |
366 |
{ |
367 |
for( ;; ) |
368 |
{ |
369 |
if ( !in->good() ) return false; |
370 |
|
371 |
int c = in->peek(); |
372 |
// At this scope, we can't get to a document. So fail silently. |
373 |
if ( !IsWhiteSpace( c ) || c <= 0 ) |
374 |
return true; |
375 |
|
376 |
*tag += (char) in->get(); |
377 |
} |
378 |
} |
379 |
|
380 |
/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag ) |
381 |
{ |
382 |
//assert( character > 0 && character < 128 ); // else it won't work in utf-8 |
383 |
while ( in->good() ) |
384 |
{ |
385 |
int c = in->peek(); |
386 |
if ( c == character ) |
387 |
return true; |
388 |
if ( c <= 0 ) // Silent failure: can't get document at this scope |
389 |
return false; |
390 |
|
391 |
in->get(); |
392 |
*tag += (char) c; |
393 |
} |
394 |
return false; |
395 |
} |
396 |
#endif |
397 |
|
398 |
// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The |
399 |
// "assign" optimization removes over 10% of the execution time. |
400 |
// |
401 |
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding ) |
402 |
{ |
403 |
// Oddly, not supported on some comilers, |
404 |
//name->clear(); |
405 |
// So use this: |
406 |
*name = ""; |
407 |
assert( p ); |
408 |
|
409 |
// Names start with letters or underscores. |
410 |
// Of course, in unicode, tinyxml has no idea what a letter *is*. The |
411 |
// algorithm is generous. |
412 |
// |
413 |
// After that, they can be letters, underscores, numbers, |
414 |
// hyphens, or colons. (Colons are valid ony for namespaces, |
415 |
// but tinyxml can't tell namespaces from names.) |
416 |
if ( p && *p |
417 |
&& ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) ) |
418 |
{ |
419 |
const char* start = p; |
420 |
while( p && *p |
421 |
&& ( IsAlphaNum( (unsigned char ) *p, encoding ) |
422 |
|| *p == '_' |
423 |
|| *p == '-' |
424 |
|| *p == '.' |
425 |
|| *p == ':' ) ) |
426 |
{ |
427 |
//(*name) += *p; // expensive |
428 |
++p; |
429 |
} |
430 |
if ( p-start > 0 ) { |
431 |
name->assign( start, p-start ); |
432 |
} |
433 |
return p; |
434 |
} |
435 |
return 0; |
436 |
} |
437 |
|
438 |
const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding ) |
439 |
{ |
440 |
// Presume an entity, and pull it out. |
441 |
TIXML_STRING ent; |
442 |
int i; |
443 |
*length = 0; |
444 |
|
445 |
if ( *(p+1) && *(p+1) == '#' && *(p+2) ) |
446 |
{ |
447 |
unsigned long ucs = 0; |
448 |
ptrdiff_t delta = 0; |
449 |
unsigned mult = 1; |
450 |
|
451 |
if ( *(p+2) == 'x' ) |
452 |
{ |
453 |
// Hexadecimal. |
454 |
if ( !*(p+3) ) return 0; |
455 |
|
456 |
const char* q = p+3; |
457 |
q = strchr( q, ';' ); |
458 |
|
459 |
if ( !q || !*q ) return 0; |
460 |
|
461 |
delta = q-p; |
462 |
--q; |
463 |
|
464 |
while ( *q != 'x' ) |
465 |
{ |
466 |
if ( *q >= '0' && *q <= '9' ) |
467 |
ucs += mult * (*q - '0'); |
468 |
else if ( *q >= 'a' && *q <= 'f' ) |
469 |
ucs += mult * (*q - 'a' + 10); |
470 |
else if ( *q >= 'A' && *q <= 'F' ) |
471 |
ucs += mult * (*q - 'A' + 10 ); |
472 |
else |
473 |
return 0; |
474 |
mult *= 16; |
475 |
--q; |
476 |
} |
477 |
} |
478 |
else |
479 |
{ |
480 |
// Decimal. |
481 |
if ( !*(p+2) ) return 0; |
482 |
|
483 |
const char* q = p+2; |
484 |
q = strchr( q, ';' ); |
485 |
|
486 |
if ( !q || !*q ) return 0; |
487 |
|
488 |
delta = q-p; |
489 |
--q; |
490 |
|
491 |
while ( *q != '#' ) |
492 |
{ |
493 |
if ( *q >= '0' && *q <= '9' ) |
494 |
ucs += mult * (*q - '0'); |
495 |
else |
496 |
return 0; |
497 |
mult *= 10; |
498 |
--q; |
499 |
} |
500 |
} |
501 |
if ( encoding == TIXML_ENCODING_UTF8 ) |
502 |
{ |
503 |
// convert the UCS to UTF-8 |
504 |
ConvertUTF32ToUTF8( ucs, value, length ); |
505 |
} |
506 |
else |
507 |
{ |
508 |
*value = (char)ucs; |
509 |
*length = 1; |
510 |
} |
511 |
return p + delta + 1; |
512 |
} |
513 |
|
514 |
// Now try to match it. |
515 |
for( i=0; i<NUM_ENTITY; ++i ) |
516 |
{ |
517 |
if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 ) |
518 |
{ |
519 |
assert( strlen( entity[i].str ) == entity[i].strLength ); |
520 |
*value = entity[i].chr; |
521 |
*length = 1; |
522 |
return ( p + entity[i].strLength ); |
523 |
} |
524 |
} |
525 |
|
526 |
// So it wasn't an entity, its unrecognized, or something like that. |
527 |
*value = *p; // Don't put back the last one, since we return it! |
528 |
//*length = 1; // Leave unrecognized entities - this doesn't really work. |
529 |
// Just writes strange XML. |
530 |
return p+1; |
531 |
} |
532 |
|
533 |
|
534 |
bool TiXmlBase::StringEqual( const char* p, |
535 |
const char* tag, |
536 |
bool ignoreCase, |
537 |
TiXmlEncoding encoding ) |
538 |
{ |
539 |
assert( p ); |
540 |
assert( tag ); |
541 |
if ( !p || !*p ) |
542 |
{ |
543 |
assert( 0 ); |
544 |
return false; |
545 |
} |
546 |
|
547 |
const char* q = p; |
548 |
|
549 |
if ( ignoreCase ) |
550 |
{ |
551 |
while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) ) |
552 |
{ |
553 |
++q; |
554 |
++tag; |
555 |
} |
556 |
|
557 |
if ( *tag == 0 ) |
558 |
return true; |
559 |
} |
560 |
else |
561 |
{ |
562 |
while ( *q && *tag && *q == *tag ) |
563 |
{ |
564 |
++q; |
565 |
++tag; |
566 |
} |
567 |
|
568 |
if ( *tag == 0 ) // Have we found the end of the tag, and everything equal? |
569 |
return true; |
570 |
} |
571 |
return false; |
572 |
} |
573 |
|
574 |
const char* TiXmlBase::ReadText( const char* p, |
575 |
TIXML_STRING * text, |
576 |
bool trimWhiteSpace, |
577 |
const char* endTag, |
578 |
bool caseInsensitive, |
579 |
TiXmlEncoding encoding ) |
580 |
{ |
581 |
*text = ""; |
582 |
if ( !trimWhiteSpace // certain tags always keep whitespace |
583 |
|| !condenseWhiteSpace ) // if true, whitespace is always kept |
584 |
{ |
585 |
// Keep all the white space. |
586 |
while ( p && *p |
587 |
&& !StringEqual( p, endTag, caseInsensitive, encoding ) |
588 |
) |
589 |
{ |
590 |
int len; |
591 |
char cArr[4] = { 0, 0, 0, 0 }; |
592 |
p = GetChar( p, cArr, &len, encoding ); |
593 |
text->append( cArr, len ); |
594 |
} |
595 |
} |
596 |
else |
597 |
{ |
598 |
bool whitespace = false; |
599 |
|
600 |
// Remove leading white space: |
601 |
p = SkipWhiteSpace( p, encoding ); |
602 |
while ( p && *p |
603 |
&& !StringEqual( p, endTag, caseInsensitive, encoding ) ) |
604 |
{ |
605 |
if ( *p == '\r' || *p == '\n' ) |
606 |
{ |
607 |
whitespace = true; |
608 |
++p; |
609 |
} |
610 |
else if ( IsWhiteSpace( *p ) ) |
611 |
{ |
612 |
whitespace = true; |
613 |
++p; |
614 |
} |
615 |
else |
616 |
{ |
617 |
// If we've found whitespace, add it before the |
618 |
// new character. Any whitespace just becomes a space. |
619 |
if ( whitespace ) |
620 |
{ |
621 |
(*text) += ' '; |
622 |
whitespace = false; |
623 |
} |
624 |
int len; |
625 |
char cArr[4] = { 0, 0, 0, 0 }; |
626 |
p = GetChar( p, cArr, &len, encoding ); |
627 |
if ( len == 1 ) |
628 |
(*text) += cArr[0]; // more efficient |
629 |
else |
630 |
text->append( cArr, len ); |
631 |
} |
632 |
} |
633 |
} |
634 |
if ( p ) |
635 |
p += strlen( endTag ); |
636 |
return p; |
637 |
} |
638 |
|
639 |
#ifdef TIXML_USE_STL |
640 |
|
641 |
void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag ) |
642 |
{ |
643 |
// The basic issue with a document is that we don't know what we're |
644 |
// streaming. Read something presumed to be a tag (and hope), then |
645 |
// identify it, and call the appropriate stream method on the tag. |
646 |
// |
647 |
// This "pre-streaming" will never read the closing ">" so the |
648 |
// sub-tag can orient itself. |
649 |
|
650 |
if ( !StreamTo( in, '<', tag ) ) |
651 |
{ |
652 |
SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); |
653 |
return; |
654 |
} |
655 |
|
656 |
while ( in->good() ) |
657 |
{ |
658 |
int tagIndex = (int) tag->length(); |
659 |
while ( in->good() && in->peek() != '>' ) |
660 |
{ |
661 |
int c = in->get(); |
662 |
if ( c <= 0 ) |
663 |
{ |
664 |
SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
665 |
break; |
666 |
} |
667 |
(*tag) += (char) c; |
668 |
} |
669 |
|
670 |
if ( in->good() ) |
671 |
{ |
672 |
// We now have something we presume to be a node of |
673 |
// some sort. Identify it, and call the node to |
674 |
// continue streaming. |
675 |
TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING ); |
676 |
|
677 |
if ( node ) |
678 |
{ |
679 |
node->StreamIn( in, tag ); |
680 |
bool isElement = node->ToElement() != 0; |
681 |
delete node; |
682 |
node = 0; |
683 |
|
684 |
// If this is the root element, we're done. Parsing will be |
685 |
// done by the >> operator. |
686 |
if ( isElement ) |
687 |
{ |
688 |
return; |
689 |
} |
690 |
} |
691 |
else |
692 |
{ |
693 |
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); |
694 |
return; |
695 |
} |
696 |
} |
697 |
} |
698 |
// We should have returned sooner. |
699 |
SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN ); |
700 |
} |
701 |
|
702 |
#endif |
703 |
|
704 |
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding ) |
705 |
{ |
706 |
ClearError(); |
707 |
|
708 |
// Parse away, at the document level. Since a document |
709 |
// contains nothing but other tags, most of what happens |
710 |
// here is skipping white space. |
711 |
if ( !p || !*p ) |
712 |
{ |
713 |
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); |
714 |
return 0; |
715 |
} |
716 |
|
717 |
// Note that, for a document, this needs to come |
718 |
// before the while space skip, so that parsing |
719 |
// starts from the pointer we are given. |
720 |
location.Clear(); |
721 |
if ( prevData ) |
722 |
{ |
723 |
location.row = prevData->cursor.row; |
724 |
location.col = prevData->cursor.col; |
725 |
} |
726 |
else |
727 |
{ |
728 |
location.row = 0; |
729 |
location.col = 0; |
730 |
} |
731 |
TiXmlParsingData data( p, TabSize(), location.row, location.col ); |
732 |
location = data.Cursor(); |
733 |
|
734 |
if ( encoding == TIXML_ENCODING_UNKNOWN ) |
735 |
{ |
736 |
// Check for the Microsoft UTF-8 lead bytes. |
737 |
const unsigned char* pU = (const unsigned char*)p; |
738 |
if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 |
739 |
&& *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 |
740 |
&& *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 ) |
741 |
{ |
742 |
encoding = TIXML_ENCODING_UTF8; |
743 |
useMicrosoftBOM = true; |
744 |
} |
745 |
} |
746 |
|
747 |
p = SkipWhiteSpace( p, encoding ); |
748 |
if ( !p ) |
749 |
{ |
750 |
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN ); |
751 |
return 0; |
752 |
} |
753 |
|
754 |
while ( p && *p ) |
755 |
{ |
756 |
TiXmlNode* node = Identify( p, encoding ); |
757 |
if ( node ) |
758 |
{ |
759 |
p = node->Parse( p, &data, encoding ); |
760 |
LinkEndChild( node ); |
761 |
} |
762 |
else |
763 |
{ |
764 |
break; |
765 |
} |
766 |
|
767 |
// Did we get encoding info? |
768 |
if ( encoding == TIXML_ENCODING_UNKNOWN |
769 |
&& node->ToDeclaration() ) |
770 |
{ |
771 |
TiXmlDeclaration* dec = node->ToDeclaration(); |
772 |
const char* enc = dec->Encoding(); |
773 |
assert( enc ); |
774 |
|
775 |
if ( *enc == 0 ) |
776 |
encoding = TIXML_ENCODING_UTF8; |
777 |
else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) ) |
778 |
encoding = TIXML_ENCODING_UTF8; |
779 |
else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) ) |
780 |
encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice |
781 |
else |
782 |
encoding = TIXML_ENCODING_LEGACY; |
783 |
} |
784 |
|
785 |
p = SkipWhiteSpace( p, encoding ); |
786 |
} |
787 |
|
788 |
// Was this empty? |
789 |
if ( !firstChild ) { |
790 |
SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding ); |
791 |
return 0; |
792 |
} |
793 |
|
794 |
// All is well. |
795 |
return p; |
796 |
} |
797 |
|
798 |
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding ) |
799 |
{ |
800 |
// The first error in a chain is more accurate - don't set again! |
801 |
if ( error ) |
802 |
return; |
803 |
|
804 |
assert( err > 0 && err < TIXML_ERROR_STRING_COUNT ); |
805 |
error = true; |
806 |
errorId = err; |
807 |
errorDesc = errorString[ errorId ]; |
808 |
|
809 |
errorLocation.Clear(); |
810 |
if ( pError && data ) |
811 |
{ |
812 |
data->Stamp( pError, encoding ); |
813 |
errorLocation = data->Cursor(); |
814 |
} |
815 |
} |
816 |
|
817 |
|
818 |
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding ) |
819 |
{ |
820 |
TiXmlNode* returnNode = 0; |
821 |
|
822 |
p = SkipWhiteSpace( p, encoding ); |
823 |
if( !p || !*p || *p != '<' ) |
824 |
{ |
825 |
return 0; |
826 |
} |
827 |
|
828 |
TiXmlDocument* doc = GetDocument(); |
829 |
p = SkipWhiteSpace( p, encoding ); |
830 |
|
831 |
if ( !p || !*p ) |
832 |
{ |
833 |
return 0; |
834 |
} |
835 |
|
836 |
// What is this thing? |
837 |
// - Elements start with a letter or underscore, but xml is reserved. |
838 |
// - Comments: <!-- |
839 |
// - Decleration: <?xml |
840 |
// - Everthing else is unknown to tinyxml. |
841 |
// |
842 |
|
843 |
const char* xmlHeader = { "<?xml" }; |
844 |
const char* commentHeader = { "<!--" }; |
845 |
const char* dtdHeader = { "<!" }; |
846 |
const char* cdataHeader = { "<![CDATA[" }; |
847 |
|
848 |
if ( StringEqual( p, xmlHeader, true, encoding ) ) |
849 |
{ |
850 |
#ifdef DEBUG_PARSER |
851 |
TIXML_LOG( "XML parsing Declaration\n" ); |
852 |
#endif |
853 |
returnNode = new TiXmlDeclaration(); |
854 |
} |
855 |
else if ( StringEqual( p, commentHeader, false, encoding ) ) |
856 |
{ |
857 |
#ifdef DEBUG_PARSER |
858 |
TIXML_LOG( "XML parsing Comment\n" ); |
859 |
#endif |
860 |
returnNode = new TiXmlComment(); |
861 |
} |
862 |
else if ( StringEqual( p, cdataHeader, false, encoding ) ) |
863 |
{ |
864 |
#ifdef DEBUG_PARSER |
865 |
TIXML_LOG( "XML parsing CDATA\n" ); |
866 |
#endif |
867 |
TiXmlText* text = new TiXmlText( "" ); |
868 |
text->SetCDATA( true ); |
869 |
returnNode = text; |
870 |
} |
871 |
else if ( StringEqual( p, dtdHeader, false, encoding ) ) |
872 |
{ |
873 |
#ifdef DEBUG_PARSER |
874 |
TIXML_LOG( "XML parsing Unknown(1)\n" ); |
875 |
#endif |
876 |
returnNode = new TiXmlUnknown(); |
877 |
} |
878 |
else if ( IsAlpha( *(p+1), encoding ) |
879 |
|| *(p+1) == '_' ) |
880 |
{ |
881 |
#ifdef DEBUG_PARSER |
882 |
TIXML_LOG( "XML parsing Element\n" ); |
883 |
#endif |
884 |
returnNode = new TiXmlElement( "" ); |
885 |
} |
886 |
else |
887 |
{ |
888 |
#ifdef DEBUG_PARSER |
889 |
TIXML_LOG( "XML parsing Unknown(2)\n" ); |
890 |
#endif |
891 |
returnNode = new TiXmlUnknown(); |
892 |
} |
893 |
|
894 |
if ( returnNode ) |
895 |
{ |
896 |
// Set the parent, so it can report errors |
897 |
returnNode->parent = this; |
898 |
} |
899 |
else |
900 |
{ |
901 |
if ( doc ) |
902 |
doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN ); |
903 |
} |
904 |
return returnNode; |
905 |
} |
906 |
|
907 |
#ifdef TIXML_USE_STL |
908 |
|
909 |
void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag) |
910 |
{ |
911 |
// We're called with some amount of pre-parsing. That is, some of "this" |
912 |
// element is in "tag". Go ahead and stream to the closing ">" |
913 |
while( in->good() ) |
914 |
{ |
915 |
int c = in->get(); |
916 |
if ( c <= 0 ) |
917 |
{ |
918 |
TiXmlDocument* document = GetDocument(); |
919 |
if ( document ) |
920 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
921 |
return; |
922 |
} |
923 |
(*tag) += (char) c ; |
924 |
|
925 |
if ( c == '>' ) |
926 |
break; |
927 |
} |
928 |
|
929 |
if ( tag->length() < 3 ) return; |
930 |
|
931 |
// Okay...if we are a "/>" tag, then we're done. We've read a complete tag. |
932 |
// If not, identify and stream. |
933 |
|
934 |
if ( tag->at( tag->length() - 1 ) == '>' |
935 |
&& tag->at( tag->length() - 2 ) == '/' ) |
936 |
{ |
937 |
// All good! |
938 |
return; |
939 |
} |
940 |
else if ( tag->at( tag->length() - 1 ) == '>' ) |
941 |
{ |
942 |
// There is more. Could be: |
943 |
// text |
944 |
// cdata text (which looks like another node) |
945 |
// closing tag |
946 |
// another node. |
947 |
for ( ;; ) |
948 |
{ |
949 |
StreamWhiteSpace( in, tag ); |
950 |
|
951 |
// Do we have text? |
952 |
if ( in->good() && in->peek() != '<' ) |
953 |
{ |
954 |
// Yep, text. |
955 |
TiXmlText text( "" ); |
956 |
text.StreamIn( in, tag ); |
957 |
|
958 |
// What follows text is a closing tag or another node. |
959 |
// Go around again and figure it out. |
960 |
continue; |
961 |
} |
962 |
|
963 |
// We now have either a closing tag...or another node. |
964 |
// We should be at a "<", regardless. |
965 |
if ( !in->good() ) return; |
966 |
assert( in->peek() == '<' ); |
967 |
int tagIndex = (int) tag->length(); |
968 |
|
969 |
bool closingTag = false; |
970 |
bool firstCharFound = false; |
971 |
|
972 |
for( ;; ) |
973 |
{ |
974 |
if ( !in->good() ) |
975 |
return; |
976 |
|
977 |
int c = in->peek(); |
978 |
if ( c <= 0 ) |
979 |
{ |
980 |
TiXmlDocument* document = GetDocument(); |
981 |
if ( document ) |
982 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
983 |
return; |
984 |
} |
985 |
|
986 |
if ( c == '>' ) |
987 |
break; |
988 |
|
989 |
*tag += (char) c; |
990 |
in->get(); |
991 |
|
992 |
// Early out if we find the CDATA id. |
993 |
if ( c == '[' && tag->size() >= 9 ) |
994 |
{ |
995 |
size_t len = tag->size(); |
996 |
const char* start = tag->c_str() + len - 9; |
997 |
if ( strcmp( start, "<![CDATA[" ) == 0 ) { |
998 |
assert( !closingTag ); |
999 |
break; |
1000 |
} |
1001 |
} |
1002 |
|
1003 |
if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) ) |
1004 |
{ |
1005 |
firstCharFound = true; |
1006 |
if ( c == '/' ) |
1007 |
closingTag = true; |
1008 |
} |
1009 |
} |
1010 |
// If it was a closing tag, then read in the closing '>' to clean up the input stream. |
1011 |
// If it was not, the streaming will be done by the tag. |
1012 |
if ( closingTag ) |
1013 |
{ |
1014 |
if ( !in->good() ) |
1015 |
return; |
1016 |
|
1017 |
int c = in->get(); |
1018 |
if ( c <= 0 ) |
1019 |
{ |
1020 |
TiXmlDocument* document = GetDocument(); |
1021 |
if ( document ) |
1022 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
1023 |
return; |
1024 |
} |
1025 |
assert( c == '>' ); |
1026 |
*tag += (char) c; |
1027 |
|
1028 |
// We are done, once we've found our closing tag. |
1029 |
return; |
1030 |
} |
1031 |
else |
1032 |
{ |
1033 |
// If not a closing tag, id it, and stream. |
1034 |
const char* tagloc = tag->c_str() + tagIndex; |
1035 |
TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING ); |
1036 |
if ( !node ) |
1037 |
return; |
1038 |
node->StreamIn( in, tag ); |
1039 |
delete node; |
1040 |
node = 0; |
1041 |
|
1042 |
// No return: go around from the beginning: text, closing tag, or node. |
1043 |
} |
1044 |
} |
1045 |
} |
1046 |
} |
1047 |
#endif |
1048 |
|
1049 |
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1050 |
{ |
1051 |
p = SkipWhiteSpace( p, encoding ); |
1052 |
TiXmlDocument* document = GetDocument(); |
1053 |
|
1054 |
if ( !p || !*p ) |
1055 |
{ |
1056 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding ); |
1057 |
return 0; |
1058 |
} |
1059 |
|
1060 |
if ( data ) |
1061 |
{ |
1062 |
data->Stamp( p, encoding ); |
1063 |
location = data->Cursor(); |
1064 |
} |
1065 |
|
1066 |
if ( *p != '<' ) |
1067 |
{ |
1068 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding ); |
1069 |
return 0; |
1070 |
} |
1071 |
|
1072 |
p = SkipWhiteSpace( p+1, encoding ); |
1073 |
|
1074 |
// Read the name. |
1075 |
const char* pErr = p; |
1076 |
|
1077 |
p = ReadName( p, &value, encoding ); |
1078 |
if ( !p || !*p ) |
1079 |
{ |
1080 |
if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding ); |
1081 |
return 0; |
1082 |
} |
1083 |
|
1084 |
TIXML_STRING endTag ("</"); |
1085 |
endTag += value; |
1086 |
endTag += ">"; |
1087 |
|
1088 |
// Check for and read attributes. Also look for an empty |
1089 |
// tag or an end tag. |
1090 |
while ( p && *p ) |
1091 |
{ |
1092 |
pErr = p; |
1093 |
p = SkipWhiteSpace( p, encoding ); |
1094 |
if ( !p || !*p ) |
1095 |
{ |
1096 |
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); |
1097 |
return 0; |
1098 |
} |
1099 |
if ( *p == '/' ) |
1100 |
{ |
1101 |
++p; |
1102 |
// Empty tag. |
1103 |
if ( *p != '>' ) |
1104 |
{ |
1105 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding ); |
1106 |
return 0; |
1107 |
} |
1108 |
return (p+1); |
1109 |
} |
1110 |
else if ( *p == '>' ) |
1111 |
{ |
1112 |
// Done with attributes (if there were any.) |
1113 |
// Read the value -- which can include other |
1114 |
// elements -- read the end tag, and return. |
1115 |
++p; |
1116 |
p = ReadValue( p, data, encoding ); // Note this is an Element method, and will set the error if one happens. |
1117 |
if ( !p || !*p ) |
1118 |
return 0; |
1119 |
|
1120 |
// We should find the end tag now |
1121 |
if ( StringEqual( p, endTag.c_str(), false, encoding ) ) |
1122 |
{ |
1123 |
p += endTag.length(); |
1124 |
return p; |
1125 |
} |
1126 |
else |
1127 |
{ |
1128 |
if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding ); |
1129 |
return 0; |
1130 |
} |
1131 |
} |
1132 |
else |
1133 |
{ |
1134 |
// Try to read an attribute: |
1135 |
TiXmlAttribute* attrib = new TiXmlAttribute(); |
1136 |
if ( !attrib ) |
1137 |
{ |
1138 |
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding ); |
1139 |
return 0; |
1140 |
} |
1141 |
|
1142 |
attrib->SetDocument( document ); |
1143 |
pErr = p; |
1144 |
p = attrib->Parse( p, data, encoding ); |
1145 |
|
1146 |
if ( !p || !*p ) |
1147 |
{ |
1148 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding ); |
1149 |
delete attrib; |
1150 |
return 0; |
1151 |
} |
1152 |
|
1153 |
// Handle the strange case of double attributes: |
1154 |
#ifdef TIXML_USE_STL |
1155 |
TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() ); |
1156 |
#else |
1157 |
TiXmlAttribute* node = attributeSet.Find( attrib->Name() ); |
1158 |
#endif |
1159 |
if ( node ) |
1160 |
{ |
1161 |
node->SetValue( attrib->Value() ); |
1162 |
delete attrib; |
1163 |
return 0; |
1164 |
} |
1165 |
|
1166 |
attributeSet.Add( attrib ); |
1167 |
} |
1168 |
} |
1169 |
return p; |
1170 |
} |
1171 |
|
1172 |
|
1173 |
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1174 |
{ |
1175 |
TiXmlDocument* document = GetDocument(); |
1176 |
|
1177 |
// Read in text and elements in any order. |
1178 |
const char* pWithWhiteSpace = p; |
1179 |
p = SkipWhiteSpace( p, encoding ); |
1180 |
|
1181 |
while ( p && *p ) |
1182 |
{ |
1183 |
if ( *p != '<' ) |
1184 |
{ |
1185 |
// Take what we have, make a text element. |
1186 |
TiXmlText* textNode = new TiXmlText( "" ); |
1187 |
|
1188 |
if ( !textNode ) |
1189 |
{ |
1190 |
if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding ); |
1191 |
return 0; |
1192 |
} |
1193 |
|
1194 |
if ( TiXmlBase::IsWhiteSpaceCondensed() ) |
1195 |
{ |
1196 |
p = textNode->Parse( p, data, encoding ); |
1197 |
} |
1198 |
else |
1199 |
{ |
1200 |
// Special case: we want to keep the white space |
1201 |
// so that leading spaces aren't removed. |
1202 |
p = textNode->Parse( pWithWhiteSpace, data, encoding ); |
1203 |
} |
1204 |
|
1205 |
if ( !textNode->Blank() ) |
1206 |
LinkEndChild( textNode ); |
1207 |
else |
1208 |
delete textNode; |
1209 |
} |
1210 |
else |
1211 |
{ |
1212 |
// We hit a '<' |
1213 |
// Have we hit a new element or an end tag? This could also be |
1214 |
// a TiXmlText in the "CDATA" style. |
1215 |
if ( StringEqual( p, "</", false, encoding ) ) |
1216 |
{ |
1217 |
return p; |
1218 |
} |
1219 |
else |
1220 |
{ |
1221 |
TiXmlNode* node = Identify( p, encoding ); |
1222 |
if ( node ) |
1223 |
{ |
1224 |
p = node->Parse( p, data, encoding ); |
1225 |
LinkEndChild( node ); |
1226 |
} |
1227 |
else |
1228 |
{ |
1229 |
return 0; |
1230 |
} |
1231 |
} |
1232 |
} |
1233 |
pWithWhiteSpace = p; |
1234 |
p = SkipWhiteSpace( p, encoding ); |
1235 |
} |
1236 |
|
1237 |
if ( !p ) |
1238 |
{ |
1239 |
if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding ); |
1240 |
} |
1241 |
return p; |
1242 |
} |
1243 |
|
1244 |
|
1245 |
#ifdef TIXML_USE_STL |
1246 |
void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag ) |
1247 |
{ |
1248 |
while ( in->good() ) |
1249 |
{ |
1250 |
int c = in->get(); |
1251 |
if ( c <= 0 ) |
1252 |
{ |
1253 |
TiXmlDocument* document = GetDocument(); |
1254 |
if ( document ) |
1255 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
1256 |
return; |
1257 |
} |
1258 |
(*tag) += (char) c; |
1259 |
|
1260 |
if ( c == '>' ) |
1261 |
{ |
1262 |
// All is well. |
1263 |
return; |
1264 |
} |
1265 |
} |
1266 |
} |
1267 |
#endif |
1268 |
|
1269 |
|
1270 |
const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1271 |
{ |
1272 |
TiXmlDocument* document = GetDocument(); |
1273 |
p = SkipWhiteSpace( p, encoding ); |
1274 |
|
1275 |
if ( data ) |
1276 |
{ |
1277 |
data->Stamp( p, encoding ); |
1278 |
location = data->Cursor(); |
1279 |
} |
1280 |
if ( !p || !*p || *p != '<' ) |
1281 |
{ |
1282 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding ); |
1283 |
return 0; |
1284 |
} |
1285 |
++p; |
1286 |
value = ""; |
1287 |
|
1288 |
while ( p && *p && *p != '>' ) |
1289 |
{ |
1290 |
value += *p; |
1291 |
++p; |
1292 |
} |
1293 |
|
1294 |
if ( !p ) |
1295 |
{ |
1296 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding ); |
1297 |
} |
1298 |
if ( *p == '>' ) |
1299 |
return p+1; |
1300 |
return p; |
1301 |
} |
1302 |
|
1303 |
#ifdef TIXML_USE_STL |
1304 |
void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag ) |
1305 |
{ |
1306 |
while ( in->good() ) |
1307 |
{ |
1308 |
int c = in->get(); |
1309 |
if ( c <= 0 ) |
1310 |
{ |
1311 |
TiXmlDocument* document = GetDocument(); |
1312 |
if ( document ) |
1313 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
1314 |
return; |
1315 |
} |
1316 |
|
1317 |
(*tag) += (char) c; |
1318 |
|
1319 |
if ( c == '>' |
1320 |
&& tag->at( tag->length() - 2 ) == '-' |
1321 |
&& tag->at( tag->length() - 3 ) == '-' ) |
1322 |
{ |
1323 |
// All is well. |
1324 |
return; |
1325 |
} |
1326 |
} |
1327 |
} |
1328 |
#endif |
1329 |
|
1330 |
|
1331 |
const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1332 |
{ |
1333 |
TiXmlDocument* document = GetDocument(); |
1334 |
value = ""; |
1335 |
|
1336 |
p = SkipWhiteSpace( p, encoding ); |
1337 |
|
1338 |
if ( data ) |
1339 |
{ |
1340 |
data->Stamp( p, encoding ); |
1341 |
location = data->Cursor(); |
1342 |
} |
1343 |
const char* startTag = "<!--"; |
1344 |
const char* endTag = "-->"; |
1345 |
|
1346 |
if ( !StringEqual( p, startTag, false, encoding ) ) |
1347 |
{ |
1348 |
document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding ); |
1349 |
return 0; |
1350 |
} |
1351 |
p += strlen( startTag ); |
1352 |
p = ReadText( p, &value, false, endTag, false, encoding ); |
1353 |
return p; |
1354 |
} |
1355 |
|
1356 |
|
1357 |
const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1358 |
{ |
1359 |
p = SkipWhiteSpace( p, encoding ); |
1360 |
if ( !p || !*p ) return 0; |
1361 |
|
1362 |
// int tabsize = 4; |
1363 |
// if ( document ) |
1364 |
// tabsize = document->TabSize(); |
1365 |
|
1366 |
if ( data ) |
1367 |
{ |
1368 |
data->Stamp( p, encoding ); |
1369 |
location = data->Cursor(); |
1370 |
} |
1371 |
// Read the name, the '=' and the value. |
1372 |
const char* pErr = p; |
1373 |
p = ReadName( p, &name, encoding ); |
1374 |
if ( !p || !*p ) |
1375 |
{ |
1376 |
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding ); |
1377 |
return 0; |
1378 |
} |
1379 |
p = SkipWhiteSpace( p, encoding ); |
1380 |
if ( !p || !*p || *p != '=' ) |
1381 |
{ |
1382 |
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); |
1383 |
return 0; |
1384 |
} |
1385 |
|
1386 |
++p; // skip '=' |
1387 |
p = SkipWhiteSpace( p, encoding ); |
1388 |
if ( !p || !*p ) |
1389 |
{ |
1390 |
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); |
1391 |
return 0; |
1392 |
} |
1393 |
|
1394 |
const char* end; |
1395 |
const char SINGLE_QUOTE = '\''; |
1396 |
const char DOUBLE_QUOTE = '\"'; |
1397 |
|
1398 |
if ( *p == SINGLE_QUOTE ) |
1399 |
{ |
1400 |
++p; |
1401 |
end = "\'"; // single quote in string |
1402 |
p = ReadText( p, &value, false, end, false, encoding ); |
1403 |
} |
1404 |
else if ( *p == DOUBLE_QUOTE ) |
1405 |
{ |
1406 |
++p; |
1407 |
end = "\""; // double quote in string |
1408 |
p = ReadText( p, &value, false, end, false, encoding ); |
1409 |
} |
1410 |
else |
1411 |
{ |
1412 |
// All attribute values should be in single or double quotes. |
1413 |
// But this is such a common error that the parser will try |
1414 |
// its best, even without them. |
1415 |
value = ""; |
1416 |
while ( p && *p // existence |
1417 |
&& !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r' // whitespace |
1418 |
&& *p != '/' && *p != '>' ) // tag end |
1419 |
{ |
1420 |
if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) { |
1421 |
// [ 1451649 ] Attribute values with trailing quotes not handled correctly |
1422 |
// We did not have an opening quote but seem to have a |
1423 |
// closing one. Give up and throw an error. |
1424 |
if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding ); |
1425 |
return 0; |
1426 |
} |
1427 |
value += *p; |
1428 |
++p; |
1429 |
} |
1430 |
} |
1431 |
return p; |
1432 |
} |
1433 |
|
1434 |
#ifdef TIXML_USE_STL |
1435 |
void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag ) |
1436 |
{ |
1437 |
while ( in->good() ) |
1438 |
{ |
1439 |
int c = in->peek(); |
1440 |
if ( !cdata && (c == '<' ) ) |
1441 |
{ |
1442 |
return; |
1443 |
} |
1444 |
if ( c <= 0 ) |
1445 |
{ |
1446 |
TiXmlDocument* document = GetDocument(); |
1447 |
if ( document ) |
1448 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
1449 |
return; |
1450 |
} |
1451 |
|
1452 |
(*tag) += (char) c; |
1453 |
in->get(); // "commits" the peek made above |
1454 |
|
1455 |
if ( cdata && c == '>' && tag->size() >= 3 ) { |
1456 |
size_t len = tag->size(); |
1457 |
if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) { |
1458 |
// terminator of cdata. |
1459 |
return; |
1460 |
} |
1461 |
} |
1462 |
} |
1463 |
} |
1464 |
#endif |
1465 |
|
1466 |
const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding ) |
1467 |
{ |
1468 |
value = ""; |
1469 |
TiXmlDocument* document = GetDocument(); |
1470 |
|
1471 |
if ( data ) |
1472 |
{ |
1473 |
data->Stamp( p, encoding ); |
1474 |
location = data->Cursor(); |
1475 |
} |
1476 |
|
1477 |
const char* const startTag = "<![CDATA["; |
1478 |
const char* const endTag = "]]>"; |
1479 |
|
1480 |
if ( cdata || StringEqual( p, startTag, false, encoding ) ) |
1481 |
{ |
1482 |
cdata = true; |
1483 |
|
1484 |
if ( !StringEqual( p, startTag, false, encoding ) ) |
1485 |
{ |
1486 |
document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding ); |
1487 |
return 0; |
1488 |
} |
1489 |
p += strlen( startTag ); |
1490 |
|
1491 |
// Keep all the white space, ignore the encoding, etc. |
1492 |
while ( p && *p |
1493 |
&& !StringEqual( p, endTag, false, encoding ) |
1494 |
) |
1495 |
{ |
1496 |
value += *p; |
1497 |
++p; |
1498 |
} |
1499 |
|
1500 |
TIXML_STRING dummy; |
1501 |
p = ReadText( p, &dummy, false, endTag, false, encoding ); |
1502 |
return p; |
1503 |
} |
1504 |
else |
1505 |
{ |
1506 |
bool ignoreWhite = true; |
1507 |
|
1508 |
const char* end = "<"; |
1509 |
p = ReadText( p, &value, ignoreWhite, end, false, encoding ); |
1510 |
if ( p ) |
1511 |
return p-1; // don't truncate the '<' |
1512 |
return 0; |
1513 |
} |
1514 |
} |
1515 |
|
1516 |
#ifdef TIXML_USE_STL |
1517 |
void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag ) |
1518 |
{ |
1519 |
while ( in->good() ) |
1520 |
{ |
1521 |
int c = in->get(); |
1522 |
if ( c <= 0 ) |
1523 |
{ |
1524 |
TiXmlDocument* document = GetDocument(); |
1525 |
if ( document ) |
1526 |
document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN ); |
1527 |
return; |
1528 |
} |
1529 |
(*tag) += (char) c; |
1530 |
|
1531 |
if ( c == '>' ) |
1532 |
{ |
1533 |
// All is well. |
1534 |
return; |
1535 |
} |
1536 |
} |
1537 |
} |
1538 |
#endif |
1539 |
|
1540 |
const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding ) |
1541 |
{ |
1542 |
p = SkipWhiteSpace( p, _encoding ); |
1543 |
// Find the beginning, find the end, and look for |
1544 |
// the stuff in-between. |
1545 |
TiXmlDocument* document = GetDocument(); |
1546 |
if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) ) |
1547 |
{ |
1548 |
if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding ); |
1549 |
return 0; |
1550 |
} |
1551 |
if ( data ) |
1552 |
{ |
1553 |
data->Stamp( p, _encoding ); |
1554 |
location = data->Cursor(); |
1555 |
} |
1556 |
p += 5; |
1557 |
|
1558 |
version = ""; |
1559 |
encoding = ""; |
1560 |
standalone = ""; |
1561 |
|
1562 |
while ( p && *p ) |
1563 |
{ |
1564 |
if ( *p == '>' ) |
1565 |
{ |
1566 |
++p; |
1567 |
return p; |
1568 |
} |
1569 |
|
1570 |
p = SkipWhiteSpace( p, _encoding ); |
1571 |
if ( StringEqual( p, "version", true, _encoding ) ) |
1572 |
{ |
1573 |
TiXmlAttribute attrib; |
1574 |
p = attrib.Parse( p, data, _encoding ); |
1575 |
version = attrib.Value(); |
1576 |
} |
1577 |
else if ( StringEqual( p, "encoding", true, _encoding ) ) |
1578 |
{ |
1579 |
TiXmlAttribute attrib; |
1580 |
p = attrib.Parse( p, data, _encoding ); |
1581 |
encoding = attrib.Value(); |
1582 |
} |
1583 |
else if ( StringEqual( p, "standalone", true, _encoding ) ) |
1584 |
{ |
1585 |
TiXmlAttribute attrib; |
1586 |
p = attrib.Parse( p, data, _encoding ); |
1587 |
standalone = attrib.Value(); |
1588 |
} |
1589 |
else |
1590 |
{ |
1591 |
// Read over whatever it is. |
1592 |
while( p && *p && *p != '>' && !IsWhiteSpace( *p ) ) |
1593 |
++p; |
1594 |
} |
1595 |
} |
1596 |
return 0; |
1597 |
} |
1598 |
|
1599 |
bool TiXmlText::Blank() const |
1600 |
{ |
1601 |
for ( unsigned i=0; i<value.length(); i++ ) |
1602 |
if ( !IsWhiteSpace( value[i] ) ) |
1603 |
return false; |
1604 |
return true; |
1605 |
} |
1606 |
|