Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
ExprSeparatorWithStrings |
|
| 3.0;3 | ||||
LineSeparator |
|
| 3.0;3 | ||||
MyTokenizer |
|
| 3.0;3 | ||||
QuotedStringSeparator |
|
| 3.0;3 | ||||
TokenSep |
|
| 3.0;3 |
1 | /* $Id: MyTokenizer.java 17887 2010-01-12 21:17:18Z linus $ | |
2 | ***************************************************************************** | |
3 | * Copyright (c) 2009 Contributors - see below | |
4 | * All rights reserved. This program and the accompanying materials | |
5 | * are made available under the terms of the Eclipse Public License v1.0 | |
6 | * which accompanies this distribution, and is available at | |
7 | * http://www.eclipse.org/legal/epl-v10.html | |
8 | * | |
9 | * Contributors: | |
10 | * tfmorris | |
11 | ***************************************************************************** | |
12 | * | |
13 | * Some portions of this file was previously release using the BSD License: | |
14 | */ | |
15 | ||
16 | // Copyright (c) 1996-2006 The Regents of the University of California. All | |
17 | // Rights Reserved. Permission to use, copy, modify, and distribute this | |
18 | // software and its documentation without fee, and without a written | |
19 | // agreement is hereby granted, provided that the above copyright notice | |
20 | // and this paragraph appear in all copies. This software program and | |
21 | // documentation are copyrighted by The Regents of the University of | |
22 | // California. The software program and documentation are supplied "AS | |
23 | // IS", without any accompanying services from The Regents. The Regents | |
24 | // does not warrant that the operation of the program will be | |
25 | // uninterrupted or error-free. The end-user understands that the program | |
26 | // was developed for research purposes and is advised not to rely | |
27 | // exclusively on the program for any reason. IN NO EVENT SHALL THE | |
28 | // UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, | |
29 | // SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, | |
30 | // ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF | |
31 | // THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF | |
32 | // SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY | |
33 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
34 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE | |
35 | // PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF | |
36 | // CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, | |
37 | // UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
38 | ||
39 | package org.argouml.util; | |
40 | ||
41 | import java.util.ArrayList; | |
42 | import java.util.Collection; | |
43 | import java.util.Enumeration; | |
44 | import java.util.List; | |
45 | import java.util.NoSuchElementException; | |
46 | ||
47 | /** | |
48 | * Internal class for managing the delimiters in MyTokenizer. It's rather | |
49 | * similar to CustomSeparator, but faster for short constant strings. | |
50 | */ | |
51 | class TokenSep { | |
52 | 0 | private TokenSep next = null; |
53 | private final String theString; | |
54 | private final int length; | |
55 | private int pattern; | |
56 | ||
57 | /** | |
58 | * Constructs a TokenSep that will match the String given in str. | |
59 | * | |
60 | * @param str The delimiter string. | |
61 | */ | |
62 | 0 | public TokenSep(String str) { |
63 | 0 | theString = str; |
64 | 0 | length = str.length(); |
65 | 0 | if (length > 32) |
66 | 0 | throw new IllegalArgumentException("TokenSep " + str |
67 | + " is " + length + " (> 32) chars long"); | |
68 | 0 | pattern = 0; |
69 | 0 | } |
70 | ||
71 | /** | |
72 | * Called by MyTokenizer when a new character is processed in the | |
73 | * sequence. Returns true if we have found the delimiter. | |
74 | */ | |
75 | public boolean addChar(char c) { | |
76 | int i; | |
77 | ||
78 | 0 | pattern <<= 1; |
79 | 0 | pattern |= 1; |
80 | 0 | for (i = 0; i < length; i++) { |
81 | 0 | if (theString.charAt(i) != c) { |
82 | 0 | pattern &= ~(1 << i); |
83 | } | |
84 | } | |
85 | ||
86 | 0 | return (pattern & (1 << (length - 1))) != 0; |
87 | } | |
88 | ||
89 | /** | |
90 | * Called by MyTokenizer before starting scanning for a new token. | |
91 | */ | |
92 | public void reset() { | |
93 | 0 | pattern = 0; |
94 | 0 | } |
95 | ||
96 | /** | |
97 | * Gets the length of this token. | |
98 | */ | |
99 | public int length() { | |
100 | 0 | return length; |
101 | } | |
102 | ||
103 | /** | |
104 | * Gets this token. | |
105 | */ | |
106 | public String getString() { | |
107 | 0 | return theString; |
108 | } | |
109 | ||
110 | /** | |
111 | * @param n The next to set. | |
112 | */ | |
113 | public void setNext(TokenSep n) { | |
114 | 0 | this.next = n; |
115 | 0 | } |
116 | ||
117 | /** | |
118 | * @return Returns the next. | |
119 | */ | |
120 | public TokenSep getNext() { | |
121 | 0 | return next; |
122 | } | |
123 | } | |
124 | ||
125 | /** | |
126 | * A descendent of CustomSeparator that recognizes tokens on one of two forms: | |
127 | * <ul> | |
128 | * <li><pre>'chr'.....'esc' 'chr'.....'chr'</pre> | |
129 | * <li><pre>'lchr'...'lchr'...'rchr'...'esc' 'rchr'....'rchr'</pre></ul> | |
130 | * | |
131 | * <p>The first form is suited for quoted strings, like <pre>"...\"...."</pre> | |
132 | * or <pre>'...\'...'</pre>. | |
133 | * | |
134 | * <p>The second form is suited for expressions, like | |
135 | * <pre>(a+(b*c)-15*eq(a, b))</pre>. | |
136 | * | |
137 | * <p>This is in fact the class currently used for the public separators in | |
138 | * MyTokenizer, except PAREN_EXPR_STRING_SEPARATOR and LINE_SEPARATOR. | |
139 | */ | |
140 | class QuotedStringSeparator extends CustomSeparator { | |
141 | private final char escChr; | |
142 | private final char startChr; | |
143 | private final char stopChr; | |
144 | private boolean esced; | |
145 | private int tokLen; | |
146 | private int level; | |
147 | ||
148 | /** | |
149 | * Creates a separator of the first form (see above) where | |
150 | * 'chr' = q and 'esc' = esc. | |
151 | * | |
152 | * @param q The delimiter character. | |
153 | * @param esc The escape character. | |
154 | */ | |
155 | public QuotedStringSeparator(char q, char esc) { | |
156 | 1800 | super(q); |
157 | ||
158 | 1800 | esced = false; |
159 | 1800 | escChr = esc; |
160 | 1800 | startChr = 0; |
161 | 1800 | stopChr = q; |
162 | 1800 | tokLen = 0; |
163 | 1800 | level = 1; |
164 | 1800 | } |
165 | ||
166 | /** | |
167 | * Creates a separator of the second form (see above) where | |
168 | * 'lchr' = sq, 'rchr' = eq and 'esc' = esc. | |
169 | * | |
170 | * @param sq The left delimiter character. | |
171 | * @param eq The right delimiter character. | |
172 | * @param esc The escape character. | |
173 | */ | |
174 | public QuotedStringSeparator(char sq, char eq, char esc) { | |
175 | 900 | super(sq); |
176 | ||
177 | 900 | esced = false; |
178 | 900 | escChr = esc; |
179 | 900 | startChr = sq; |
180 | 900 | stopChr = eq; |
181 | 900 | tokLen = 0; |
182 | 900 | level = 1; |
183 | 900 | } |
184 | ||
185 | public void reset() { | |
186 | 0 | super.reset(); |
187 | 0 | tokLen = 0; |
188 | 0 | level = 1; |
189 | 0 | } |
190 | ||
191 | /** | |
192 | * {@inheritDoc} | |
193 | * | |
194 | * Overridden to return the entire length of the token. | |
195 | */ | |
196 | public int tokenLength() { | |
197 | 0 | return super.tokenLength() + tokLen; |
198 | } | |
199 | ||
200 | /** | |
201 | * {@inheritDoc} | |
202 | * | |
203 | * Overridden to return true. | |
204 | * | |
205 | * @return true | |
206 | */ | |
207 | public boolean hasFreePart() { | |
208 | 0 | return true; |
209 | } | |
210 | ||
211 | /** | |
212 | * {@inheritDoc} | |
213 | * | |
214 | * Overridden to find the end of the token. | |
215 | */ | |
216 | public boolean endChar(char c) { | |
217 | 0 | tokLen++; |
218 | ||
219 | 0 | if (esced) { |
220 | 0 | esced = false; |
221 | 0 | return false; |
222 | } | |
223 | 0 | if (escChr != 0 && c == escChr) { |
224 | 0 | esced = true; |
225 | 0 | return false; |
226 | } | |
227 | 0 | if (startChr != 0 && c == startChr) |
228 | 0 | level++; |
229 | 0 | if (c == stopChr) |
230 | 0 | level--; |
231 | 0 | return level <= 0; |
232 | } | |
233 | } | |
234 | ||
235 | /** | |
236 | * A descendent of CustomSeparator that recognizes tokens on the form: | |
237 | * | |
238 | * <br>( " \" ) " ' \' ) ' ) | |
239 | * | |
240 | * <p>This is, an expression inside parentheses with proper consideration | |
241 | * for quoted strings inside the the expression. | |
242 | */ | |
243 | class ExprSeparatorWithStrings extends CustomSeparator { | |
244 | private boolean isSQuot; | |
245 | private boolean isDQuot; | |
246 | private boolean isEsc; | |
247 | private int tokLevel; | |
248 | private int tokLen; | |
249 | ||
250 | /** | |
251 | * The constructor. No choices available. | |
252 | */ | |
253 | public ExprSeparatorWithStrings() { | |
254 | 900 | super('('); |
255 | ||
256 | 900 | isEsc = false; |
257 | 900 | isSQuot = false; |
258 | 900 | isDQuot = false; |
259 | 900 | tokLevel = 1; |
260 | 900 | tokLen = 0; |
261 | 900 | } |
262 | ||
263 | public void reset() { | |
264 | 0 | super.reset(); |
265 | ||
266 | 0 | isEsc = false; |
267 | 0 | isSQuot = false; |
268 | 0 | isDQuot = false; |
269 | 0 | tokLevel = 1; |
270 | 0 | tokLen = 0; |
271 | 0 | } |
272 | ||
273 | /** | |
274 | * {@inheritDoc} | |
275 | * | |
276 | * Overridden to return the entire length of the token. | |
277 | */ | |
278 | public int tokenLength() { | |
279 | 0 | return super.tokenLength() + tokLen; |
280 | } | |
281 | ||
282 | /** | |
283 | * {@inheritDoc} | |
284 | * | |
285 | * Overridden to return true. | |
286 | * | |
287 | * @return true | |
288 | */ | |
289 | public boolean hasFreePart() { | |
290 | 0 | return true; |
291 | } | |
292 | ||
293 | /** | |
294 | * {@inheritDoc} | |
295 | * | |
296 | * Overridden to find the end of the token. | |
297 | */ | |
298 | public boolean endChar(char c) { | |
299 | 0 | tokLen++; |
300 | 0 | if (isSQuot) { |
301 | 0 | if (isEsc) { |
302 | 0 | isEsc = false; |
303 | 0 | return false; |
304 | } | |
305 | 0 | if (c == '\\') |
306 | 0 | isEsc = true; |
307 | 0 | else if (c == '\'') |
308 | 0 | isSQuot = false; |
309 | 0 | return false; |
310 | 0 | } else if (isDQuot) { |
311 | 0 | if (isEsc) { |
312 | 0 | isEsc = false; |
313 | 0 | return false; |
314 | } | |
315 | 0 | if (c == '\\') |
316 | 0 | isEsc = true; |
317 | 0 | else if (c == '\"') |
318 | 0 | isDQuot = false; |
319 | 0 | return false; |
320 | } else { | |
321 | 0 | if (c == '\'') |
322 | 0 | isSQuot = true; |
323 | 0 | else if (c == '\"') |
324 | 0 | isDQuot = true; |
325 | 0 | else if (c == '(') |
326 | 0 | tokLevel++; |
327 | 0 | else if (c == ')') |
328 | 0 | tokLevel--; |
329 | 0 | return tokLevel <= 0; |
330 | } | |
331 | } | |
332 | } | |
333 | ||
334 | /** | |
335 | * A descendent of CustomSeparator that recognizes "the tree line ends": | |
336 | * <ul> | |
337 | * <li>UNIX: <lf></li> | |
338 | * <li>DOS: <cr> <lf></li> | |
339 | * <li>MAC: <cr></li> | |
340 | * </ul> | |
341 | * | |
342 | * <p>This is in fact the class currently used LINE_SEPARATOR in MyTokenizer. | |
343 | */ | |
344 | class LineSeparator extends CustomSeparator { | |
345 | private boolean hasCr; | |
346 | private boolean hasLf; | |
347 | private boolean hasPeeked; | |
348 | ||
349 | /** | |
350 | * Creates a LineSeparator. | |
351 | */ | |
352 | 900 | public LineSeparator() { |
353 | 900 | hasCr = false; |
354 | 900 | hasLf = false; |
355 | 900 | hasPeeked = false; |
356 | 900 | } |
357 | ||
358 | public void reset() { | |
359 | 0 | super.reset(); |
360 | 0 | hasCr = false; |
361 | 0 | hasLf = false; |
362 | 0 | hasPeeked = false; |
363 | 0 | } |
364 | ||
365 | /** | |
366 | * {@inheritDoc} | |
367 | */ | |
368 | public int tokenLength() { | |
369 | 0 | return hasCr && hasLf ? 2 : 1; |
370 | } | |
371 | ||
372 | /** | |
373 | * {@inheritDoc} | |
374 | */ | |
375 | public int getPeekCount() { | |
376 | 0 | return hasPeeked ? 1 : 0; |
377 | } | |
378 | ||
379 | /** | |
380 | * {@inheritDoc} | |
381 | */ | |
382 | public boolean hasFreePart() { | |
383 | 0 | return !hasLf; |
384 | } | |
385 | ||
386 | /** | |
387 | * {@inheritDoc} | |
388 | * | |
389 | * Overridden to find the start of a line-end. | |
390 | */ | |
391 | public boolean addChar(char c) { | |
392 | 0 | if (c == '\n') { |
393 | 0 | hasLf = true; |
394 | 0 | return true; |
395 | } | |
396 | ||
397 | 0 | if (c == '\r') { |
398 | 0 | hasCr = true; |
399 | 0 | return true; |
400 | } | |
401 | ||
402 | 0 | return false; |
403 | } | |
404 | ||
405 | /** | |
406 | * {@inheritDoc} | |
407 | * | |
408 | * Overridden to find the end of a line-end. | |
409 | */ | |
410 | public boolean endChar(char c) { | |
411 | 0 | if (c == '\n') { |
412 | 0 | hasLf = true; |
413 | } else { | |
414 | 0 | hasPeeked = true; |
415 | } | |
416 | ||
417 | 0 | return true; |
418 | } | |
419 | } | |
420 | ||
421 | /** | |
422 | * Class for dividing a String into any number of parts. Each part will be a | |
423 | * substring of the original String. The first part will at least contain the | |
424 | * first character in the string. All following parts will at least contain | |
425 | * the first character in the String not covered by any previous part. | |
426 | * | |
427 | * <p>The delim parameter to the constructors is a comma separated list of | |
428 | * tokens that should be recognized by the tokenizer. These tokens will be | |
429 | * returned by the tokenizer as tokens, and any arbitrary text between them | |
430 | * will also be returned as tokens. Since the comma has special meaning in | |
431 | * this string, it can be escaped with \ to only mean itself (like in "\\,"). | |
432 | * For technical reasons it is not possible for any token in this list to be | |
433 | * more than 32 characters long. | |
434 | * | |
435 | * <p>In addition to the delim parameter it is also possible to use custom | |
436 | * separators that allow any string that can be generated by the limited | |
437 | * version of a Turing machine that your computer is, to be used as a | |
438 | * delimiter. | |
439 | * | |
440 | * <p>There are some custom separators provided that you can use to get | |
441 | * things like strings in one token. These cannot be used simultaneously by | |
442 | * several tokenizers, ie they are not thread safe. | |
443 | * | |
444 | * <p>The tokenizer works in a kind of greedy way. When the first separator | |
445 | * token from delim is matched or any CustomSeparator returns true from | |
446 | * addChar, then it is satisfied it has found a token and does NOT check if | |
447 | * it could have found a longer token. Eg: if you have this delim string | |
448 | * "<,<<", then "<<" will never be found. | |
449 | * | |
450 | * <p><b>Example</b><br><pre> | |
451 | * MyTokenizer tzer = new MyTokenizer("Hello, how are you?", " ,\\,"); | |
452 | * while (tzer.hasMoreTokens()) | |
453 | * _cat.info("\"" + tzer.nextToken() + "\""); | |
454 | * </pre> | |
455 | * | |
456 | * <p>Which whould yield the following output:<pre> | |
457 | * "Hello" | |
458 | * "," | |
459 | * " " | |
460 | * "how" | |
461 | * " " | |
462 | * "are" | |
463 | * " " | |
464 | * "you?" | |
465 | * </pre> | |
466 | * | |
467 | * @author Michael Stockman | |
468 | * @since 0.11.2 | |
469 | * @see CustomSeparator | |
470 | */ | |
471 | public class MyTokenizer implements Enumeration { | |
472 | /** A custom separator for quoted strings enclosed in single quotes | |
473 | * and using \ as escape character. There may not be an end quote | |
474 | * if the tokenizer reaches the end of the String. */ | |
475 | 900 | public static final CustomSeparator SINGLE_QUOTED_SEPARATOR = |
476 | new QuotedStringSeparator('\'', '\\'); | |
477 | ||
478 | /** A custom separator for quoted strings enclosed in double quotes | |
479 | * and using \ as escape character. There may not be an end quote | |
480 | * if the tokenizer reaches the end of the String. */ | |
481 | 900 | public static final CustomSeparator DOUBLE_QUOTED_SEPARATOR = |
482 | new QuotedStringSeparator('\"', '\\'); | |
483 | ||
484 | /** A custom separator for expressions enclosed in parentheses and | |
485 | * matching lparams with rparams. There may not be proper matching | |
486 | * if the tokenizer reaches the end of the String. Do not use this | |
487 | * together with PAREN_EXPR_STRING_SEPARATOR. */ | |
488 | 900 | public static final CustomSeparator PAREN_EXPR_SEPARATOR = |
489 | new QuotedStringSeparator('(', ')', '\0'); | |
490 | ||
491 | /** A custom separator for expressions enclosed in parentheses and | |
492 | * matching lparams with rparams. There may not be proper matching | |
493 | * if the tokenizer reaches the end of the String. It also takes | |
494 | * quoted strings (either single or double quotes) in the expression | |
495 | * into consideration, unlike PAREN_EXPR_SEPARATOR. Do not use this | |
496 | * together with PAREN_EXPR_SEPARATOR. */ | |
497 | 900 | public static final CustomSeparator PAREN_EXPR_STRING_SEPARATOR = |
498 | new ExprSeparatorWithStrings(); | |
499 | ||
500 | /** A custom separator for texts. Singles out the line ends, | |
501 | * and consequently the lines, if they are in either dos, mac | |
502 | * or unix format. */ | |
503 | 900 | public static final CustomSeparator LINE_SEPARATOR = |
504 | new LineSeparator(); | |
505 | ||
506 | private int sIdx; | |
507 | private final int eIdx; | |
508 | private int tokIdx; | |
509 | private final String source; | |
510 | private final TokenSep delims; | |
511 | private String savedToken; | |
512 | private int savedIdx; | |
513 | private List customSeps; | |
514 | private String putToken; | |
515 | ||
516 | /** | |
517 | * Constructs a new instance. See above for a description of the | |
518 | * delimiter string. | |
519 | * | |
520 | * @param string The String to be tokenized. | |
521 | * @param delim The String of delimiters. | |
522 | */ | |
523 | 0 | public MyTokenizer(String string, String delim) { |
524 | 0 | source = string; |
525 | 0 | delims = parseDelimString(delim); |
526 | 0 | sIdx = 0; |
527 | 0 | tokIdx = 0; |
528 | 0 | eIdx = string.length(); |
529 | 0 | savedToken = null; |
530 | 0 | customSeps = null; |
531 | 0 | putToken = null; |
532 | 0 | } |
533 | ||
534 | /** | |
535 | * Constructs a new instance. See above for a description of the | |
536 | * delimiter string and custom separators. | |
537 | * | |
538 | * @param string The String to be tokenized. | |
539 | * @param delim The String of delimiters. | |
540 | * @param sep A custom separator to use. | |
541 | */ | |
542 | 0 | public MyTokenizer(String string, String delim, CustomSeparator sep) { |
543 | 0 | source = string; |
544 | 0 | delims = parseDelimString(delim); |
545 | 0 | sIdx = 0; |
546 | 0 | tokIdx = 0; |
547 | 0 | eIdx = string.length(); |
548 | 0 | savedToken = null; |
549 | 0 | customSeps = new ArrayList(); |
550 | 0 | customSeps.add(sep); |
551 | 0 | } |
552 | ||
553 | /** | |
554 | * Constructs a new instance. See above for a description of the | |
555 | * delimiter string and custom separators. | |
556 | * | |
557 | * @param string The String to be tokenized. | |
558 | * @param delim The String of delimiters. | |
559 | * @param seps Some container with custom separators to use. | |
560 | */ | |
561 | 0 | public MyTokenizer(String string, String delim, Collection seps) { |
562 | 0 | source = string; |
563 | 0 | delims = parseDelimString(delim); |
564 | 0 | sIdx = 0; |
565 | 0 | tokIdx = 0; |
566 | 0 | eIdx = string.length(); |
567 | 0 | savedToken = null; |
568 | 0 | customSeps = new ArrayList(seps); |
569 | 0 | } |
570 | ||
571 | /** | |
572 | * Returns true if there are more tokens left. | |
573 | * | |
574 | * @return true if another token can be fetched with nextToken. | |
575 | */ | |
576 | public boolean hasMoreTokens() { | |
577 | 0 | return sIdx < eIdx || savedToken != null |
578 | || putToken != null; | |
579 | } | |
580 | ||
581 | /** | |
582 | * Retrives the next token. | |
583 | * | |
584 | * @return The next token. | |
585 | */ | |
586 | public String nextToken() { | |
587 | CustomSeparator csep; | |
588 | TokenSep sep; | |
589 | 0 | String s = null; |
590 | int i, j; | |
591 | ||
592 | 0 | if (putToken != null) { |
593 | 0 | s = putToken; |
594 | 0 | putToken = null; |
595 | 0 | return s; |
596 | } | |
597 | ||
598 | 0 | if (savedToken != null) { |
599 | 0 | s = savedToken; |
600 | 0 | tokIdx = savedIdx; |
601 | 0 | savedToken = null; |
602 | 0 | return s; |
603 | } | |
604 | ||
605 | 0 | if (sIdx >= eIdx) |
606 | 0 | throw new NoSuchElementException( |
607 | "No more tokens available"); | |
608 | ||
609 | 0 | for (sep = delims; sep != null; sep = sep.getNext()) |
610 | 0 | sep.reset(); |
611 | ||
612 | 0 | if (customSeps != null) { |
613 | 0 | for (i = 0; i < customSeps.size(); i++) |
614 | 0 | ((CustomSeparator) customSeps.get(i)).reset(); |
615 | } | |
616 | ||
617 | 0 | for (i = sIdx; i < eIdx; i++) { |
618 | 0 | char c = source.charAt(i); |
619 | ||
620 | 0 | for (j = 0; customSeps != null |
621 | 0 | && j < customSeps.size(); j++) { |
622 | 0 | csep = (CustomSeparator) customSeps.get(j); |
623 | ||
624 | 0 | if (csep.addChar(c)) |
625 | 0 | break; |
626 | } | |
627 | 0 | if (customSeps != null && j < customSeps.size()) { |
628 | 0 | csep = (CustomSeparator) customSeps.get(j); |
629 | ||
630 | 0 | while (csep.hasFreePart() && i + 1 < eIdx) |
631 | 0 | if (csep.endChar(source.charAt(++i))) |
632 | 0 | break; |
633 | 0 | i -= Math.min(csep.getPeekCount(), i); |
634 | ||
635 | 0 | int clen = Math.min(i + 1, source.length()); |
636 | ||
637 | 0 | if (i - sIdx + 1 > csep.tokenLength()) { |
638 | 0 | s = source.substring(sIdx, |
639 | i - csep.tokenLength() + 1); | |
640 | ||
641 | 0 | savedIdx = i - csep.tokenLength() + 1; |
642 | 0 | savedToken = source.substring( |
643 | savedIdx, clen); | |
644 | } else { | |
645 | 0 | s = source.substring(sIdx, clen); |
646 | } | |
647 | ||
648 | 0 | tokIdx = sIdx; |
649 | 0 | sIdx = i + 1; |
650 | 0 | break; |
651 | } | |
652 | ||
653 | 0 | for (sep = delims; sep != null; sep = sep.getNext()) |
654 | 0 | if (sep.addChar(c)) |
655 | 0 | break; |
656 | 0 | if (sep != null) { |
657 | 0 | if (i - sIdx + 1 > sep.length()) { |
658 | 0 | s = source.substring(sIdx, |
659 | i - sep.length() + 1); | |
660 | 0 | savedIdx = i - sep.length() + 1; |
661 | 0 | savedToken = sep.getString(); |
662 | } else { | |
663 | 0 | s = sep.getString(); |
664 | } | |
665 | 0 | tokIdx = sIdx; |
666 | 0 | sIdx = i + 1; |
667 | 0 | break; |
668 | } | |
669 | } | |
670 | ||
671 | 0 | if (s == null) { |
672 | 0 | s = source.substring(sIdx); |
673 | 0 | tokIdx = sIdx; |
674 | 0 | sIdx = eIdx; |
675 | } | |
676 | ||
677 | 0 | return s; |
678 | } | |
679 | ||
680 | /** | |
681 | * This class implements the Enumeration interface. This call maps | |
682 | * to nextToken. | |
683 | * | |
684 | * @return nextToken(); | |
685 | * @see #nextToken() nextToken | |
686 | */ | |
687 | public Object nextElement() { | |
688 | 0 | return nextToken(); |
689 | } | |
690 | ||
691 | /** | |
692 | * This class implements the Enumeration interface. This call maps | |
693 | * to hasMoreTokens. | |
694 | * | |
695 | * @return hasMoreTokens(); | |
696 | * @see #hasMoreTokens() hasMoreTokens | |
697 | */ | |
698 | public boolean hasMoreElements() { | |
699 | 0 | return hasMoreTokens(); |
700 | } | |
701 | ||
702 | /** | |
703 | * Returns the index in the string of the last token returned by | |
704 | * nextToken, or zero if no token has been retrived. | |
705 | * | |
706 | * @return The index of the last token. | |
707 | */ | |
708 | public int getTokenIndex() { | |
709 | 0 | return tokIdx; |
710 | } | |
711 | ||
712 | /** | |
713 | * Put a token on the input stream. This will be the next token read | |
714 | * from the tokenizer. If this function is called again before the | |
715 | * last token has been read, then it will be lost. | |
716 | * | |
717 | * <p>The index returned from getTokenIndex will be the same for the | |
718 | * token put as that of the last token that wasn't put. | |
719 | * | |
720 | * @param s The token to put. | |
721 | * @throws NullPointerException if s is null. | |
722 | */ | |
723 | public void putToken(String s) { | |
724 | 0 | if (s == null) |
725 | 0 | throw new NullPointerException( |
726 | "Cannot put a null token"); | |
727 | ||
728 | 0 | putToken = s; |
729 | 0 | } |
730 | ||
731 | /** | |
732 | * Creates a linked list of TokenSeps from the comma separated string | |
733 | * str. | |
734 | * | |
735 | * @param str The string specifying delimiter strings. | |
736 | * @return A list of TokenSeps. | |
737 | */ | |
738 | private static TokenSep parseDelimString(String str) { | |
739 | 0 | TokenSep first = null; |
740 | 0 | TokenSep p = null; |
741 | int idx0, idx1, length; | |
742 | 0 | StringBuilder val = new StringBuilder(); |
743 | char c; | |
744 | ||
745 | 0 | length = str.length(); |
746 | 0 | for (idx0 = 0; idx0 < length;) { |
747 | 0 | for (idx1 = idx0; idx1 < length; idx1++) { |
748 | 0 | c = str.charAt(idx1); |
749 | 0 | if (c == '\\') { |
750 | 0 | idx1++; |
751 | 0 | if (idx1 < length) |
752 | 0 | val.append(str.charAt(idx1)); |
753 | 0 | } else if (c == ',') { |
754 | 0 | break; |
755 | } else { | |
756 | 0 | val.append(c); |
757 | } | |
758 | } | |
759 | 0 | idx1 = Math.min(idx1, length); |
760 | 0 | if (idx1 > idx0) { |
761 | 0 | p = new TokenSep(val.toString()); |
762 | 0 | val = new StringBuilder(); |
763 | 0 | p.setNext(first); |
764 | 0 | first = p; |
765 | } | |
766 | ||
767 | 0 | idx0 = idx1 + 1; |
768 | } | |
769 | ||
770 | 0 | return first; |
771 | } | |
772 | } | |
773 |