1 |
///////////////////////////////////////////////////////////////////////////// |
2 |
// Name: src/common/tokenzr.cpp |
3 |
// Purpose: String tokenizer |
4 |
// Author: Guilhem Lavaux |
5 |
// Modified by: Vadim Zeitlin (almost full rewrite) |
6 |
// Created: 04/22/98 |
7 |
// RCS-ID: $Id: tokenzr.cpp 39694 2006-06-13 11:30:40Z ABX $ |
8 |
// Copyright: (c) Guilhem Lavaux |
9 |
// Licence: wxWindows licence |
10 |
///////////////////////////////////////////////////////////////////////////// |
11 |
|
12 |
// ============================================================================ |
13 |
// declarations |
14 |
// ============================================================================ |
15 |
|
16 |
// ---------------------------------------------------------------------------- |
17 |
// headers |
18 |
// ---------------------------------------------------------------------------- |
19 |
|
20 |
// For compilers that support precompilation, includes "wx.h". |
21 |
#include "wx/wxprec.h" |
22 |
|
23 |
#ifdef __BORLANDC__ |
24 |
#pragma hdrstop |
25 |
#endif |
26 |
|
27 |
#include "wx/tokenzr.h" |
28 |
|
29 |
#ifndef WX_PRECOMP |
30 |
#include "wx/arrstr.h" |
31 |
#endif |
32 |
|
33 |
// Required for wxIs... functions |
34 |
#include <ctype.h> |
35 |
|
36 |
// ============================================================================ |
37 |
// implementation |
38 |
// ============================================================================ |
39 |
|
40 |
// ---------------------------------------------------------------------------- |
41 |
// wxStringTokenizer construction |
42 |
// ---------------------------------------------------------------------------- |
43 |
|
44 |
wxStringTokenizer::wxStringTokenizer(const wxString& str, |
45 |
const wxString& delims, |
46 |
wxStringTokenizerMode mode) |
47 |
{ |
48 |
SetString(str, delims, mode); |
49 |
} |
50 |
|
51 |
void wxStringTokenizer::SetString(const wxString& str, |
52 |
const wxString& delims, |
53 |
wxStringTokenizerMode mode) |
54 |
{ |
55 |
if ( mode == wxTOKEN_DEFAULT ) |
56 |
{ |
57 |
// by default, we behave like strtok() if the delimiters are only |
58 |
// whitespace characters and as wxTOKEN_RET_EMPTY otherwise (for |
59 |
// whitespace delimiters, strtok() behaviour is better because we want |
60 |
// to count consecutive spaces as one delimiter) |
61 |
const wxChar *p; |
62 |
for ( p = delims.c_str(); *p; p++ ) |
63 |
{ |
64 |
if ( !wxIsspace(*p) ) |
65 |
break; |
66 |
} |
67 |
|
68 |
if ( *p ) |
69 |
{ |
70 |
// not whitespace char in delims |
71 |
mode = wxTOKEN_RET_EMPTY; |
72 |
} |
73 |
else |
74 |
{ |
75 |
// only whitespaces |
76 |
mode = wxTOKEN_STRTOK; |
77 |
} |
78 |
} |
79 |
|
80 |
m_delims = delims; |
81 |
m_mode = mode; |
82 |
|
83 |
Reinit(str); |
84 |
} |
85 |
|
86 |
void wxStringTokenizer::Reinit(const wxString& str) |
87 |
{ |
88 |
wxASSERT_MSG( IsOk(), _T("you should call SetString() first") ); |
89 |
|
90 |
m_string = str; |
91 |
m_pos = 0; |
92 |
m_lastDelim = _T('\0'); |
93 |
} |
94 |
|
95 |
// ---------------------------------------------------------------------------- |
96 |
// access to the tokens |
97 |
// ---------------------------------------------------------------------------- |
98 |
|
99 |
// do we have more of them? |
100 |
bool wxStringTokenizer::HasMoreTokens() const |
101 |
{ |
102 |
wxCHECK_MSG( IsOk(), false, _T("you should call SetString() first") ); |
103 |
|
104 |
if ( m_string.find_first_not_of(m_delims, m_pos) != wxString::npos ) |
105 |
{ |
106 |
// there are non delimiter characters left, so we do have more tokens |
107 |
return true; |
108 |
} |
109 |
|
110 |
switch ( m_mode ) |
111 |
{ |
112 |
case wxTOKEN_RET_EMPTY: |
113 |
case wxTOKEN_RET_DELIMS: |
114 |
// special hack for wxTOKEN_RET_EMPTY: we should return the initial |
115 |
// empty token even if there are only delimiters after it |
116 |
return m_pos == 0 && !m_string.empty(); |
117 |
|
118 |
case wxTOKEN_RET_EMPTY_ALL: |
119 |
// special hack for wxTOKEN_RET_EMPTY_ALL: we can know if we had |
120 |
// already returned the trailing empty token after the last |
121 |
// delimiter by examining m_lastDelim: it is set to NUL if we run |
122 |
// up to the end of the string in GetNextToken(), but if it is not |
123 |
// NUL yet we still have this last token to return even if m_pos is |
124 |
// already at m_string.length() |
125 |
return m_pos < m_string.length() || m_lastDelim != _T('\0'); |
126 |
|
127 |
case wxTOKEN_INVALID: |
128 |
case wxTOKEN_DEFAULT: |
129 |
wxFAIL_MSG( _T("unexpected tokenizer mode") ); |
130 |
// fall through |
131 |
|
132 |
case wxTOKEN_STRTOK: |
133 |
// never return empty delimiters |
134 |
break; |
135 |
} |
136 |
|
137 |
return false; |
138 |
} |
139 |
|
140 |
// count the number of (remaining) tokens in the string |
141 |
size_t wxStringTokenizer::CountTokens() const |
142 |
{ |
143 |
wxCHECK_MSG( IsOk(), 0, _T("you should call SetString() first") ); |
144 |
|
145 |
// VZ: this function is IMHO not very useful, so it's probably not very |
146 |
// important if its implementation here is not as efficient as it |
147 |
// could be -- but OTOH like this we're sure to get the correct answer |
148 |
// in all modes |
149 |
wxStringTokenizer tkz(m_string.c_str() + m_pos, m_delims, m_mode); |
150 |
|
151 |
size_t count = 0; |
152 |
while ( tkz.HasMoreTokens() ) |
153 |
{ |
154 |
count++; |
155 |
|
156 |
(void)tkz.GetNextToken(); |
157 |
} |
158 |
|
159 |
return count; |
160 |
} |
161 |
|
162 |
// ---------------------------------------------------------------------------- |
163 |
// token extraction |
164 |
// ---------------------------------------------------------------------------- |
165 |
|
166 |
wxString wxStringTokenizer::GetNextToken() |
167 |
{ |
168 |
wxString token; |
169 |
do |
170 |
{ |
171 |
if ( !HasMoreTokens() ) |
172 |
{ |
173 |
break; |
174 |
} |
175 |
|
176 |
// find the end of this token |
177 |
size_t pos = m_string.find_first_of(m_delims, m_pos); |
178 |
|
179 |
// and the start of the next one |
180 |
if ( pos == wxString::npos ) |
181 |
{ |
182 |
// no more delimiters, the token is everything till the end of |
183 |
// string |
184 |
token.assign(m_string, m_pos, wxString::npos); |
185 |
|
186 |
// skip the token |
187 |
m_pos = m_string.length(); |
188 |
|
189 |
// it wasn't terminated |
190 |
m_lastDelim = _T('\0'); |
191 |
} |
192 |
else // we found a delimiter at pos |
193 |
{ |
194 |
// in wxTOKEN_RET_DELIMS mode we return the delimiter character |
195 |
// with token, otherwise leave it out |
196 |
size_t len = pos - m_pos; |
197 |
if ( m_mode == wxTOKEN_RET_DELIMS ) |
198 |
len++; |
199 |
|
200 |
token.assign(m_string, m_pos, len); |
201 |
|
202 |
// skip the token and the trailing delimiter |
203 |
m_pos = pos + 1; |
204 |
|
205 |
m_lastDelim = m_string[pos]; |
206 |
} |
207 |
} |
208 |
while ( !AllowEmpty() && token.empty() ); |
209 |
|
210 |
return token; |
211 |
} |
212 |
|
213 |
// ---------------------------------------------------------------------------- |
214 |
// public functions |
215 |
// ---------------------------------------------------------------------------- |
216 |
|
217 |
wxArrayString wxStringTokenize(const wxString& str, |
218 |
const wxString& delims, |
219 |
wxStringTokenizerMode mode) |
220 |
{ |
221 |
wxArrayString tokens; |
222 |
wxStringTokenizer tk(str, delims, mode); |
223 |
while ( tk.HasMoreTokens() ) |
224 |
{ |
225 |
tokens.Add(tk.GetNextToken()); |
226 |
} |
227 |
|
228 |
return tokens; |
229 |
} |