Coverage Report

 /* $Id: MyTokenizer.java 17887 2010-01-12 21:17:18Z linus $
  *****************************************************************************
  * Copyright (c) 2009 Contributors - see below
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
  * Contributors:
  *    tfmorris
  *****************************************************************************
  *
  * Some portions of this file was previously release using the BSD License:
  */
 
 // Copyright (c) 1996-2006 The Regents of the University of California. All
 // Rights Reserved. Permission to use, copy, modify, and distribute this
 // software and its documentation without fee, and without a written
 // agreement is hereby granted, provided that the above copyright notice
 // and this paragraph appear in all copies.  This software program and
 // documentation are copyrighted by The Regents of the University of
 // California. The software program and documentation are supplied "AS
 // IS", without any accompanying services from The Regents. The Regents
 // does not warrant that the operation of the program will be
 // uninterrupted or error-free. The end-user understands that the program
 // was developed for research purposes and is advised not to rely
 // exclusively on the program for any reason.  IN NO EVENT SHALL THE
 // UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
 // SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
 // ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
 // THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
 // SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
 // PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
 // CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
 // UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 
 package org.argouml.util;
 
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Enumeration;
 import java.util.List;
 import java.util.NoSuchElementException;
 
 /**
  * Internal class for managing the delimiters in MyTokenizer. It's rather
  * similar to CustomSeparator, but faster for short constant strings.
  */
 class TokenSep {
     private TokenSep next = null;
     private final String theString;
     private final int length;
     private int pattern;
 
     /**
      * Constructs a TokenSep that will match the String given in str.
      *
      * @param str The delimiter string.
      */
     public TokenSep(String str) {
         theString = str;
         length = str.length();
         if (length > 32)
             throw new IllegalArgumentException("TokenSep " + str
                         + " is " + length + " (> 32) chars long");
         pattern = 0;
     }
 
     /**
      * Called by MyTokenizer when a new character is processed in the
      * sequence. Returns true if we have found the delimiter.
      */
     public boolean addChar(char c) {
         int i;
 
         pattern <<= 1;
         pattern |= 1;
         for (i = 0; i < length; i++) {
             if (theString.charAt(i) != c) {
                 pattern &= ~(1 << i);
             }
         }
 
         return (pattern & (1 << (length - 1))) != 0;
     }
 
     /**
      * Called by MyTokenizer before starting scanning for a new token.
      */
     public void reset() {
         pattern = 0;
     }
 
     /**
      * Gets the length of this token.
      */
     public int length() {
         return length;
     }
 
     /**
      * Gets this token.
      */
     public String getString() {
         return theString;
     }
 
     /**
      * @param n The next to set.
      */
     public void setNext(TokenSep n) {
         this.next = n;
     }
 
     /**
      * @return Returns the next.
      */
     public TokenSep getNext() {
         return next;
     }
 }
 
 /**
  * A descendent of CustomSeparator that recognizes tokens on one of two forms:
  * <ul>
  * <li><pre>'chr'.....'esc' 'chr'.....'chr'</pre>
  * <li><pre>'lchr'...'lchr'...'rchr'...'esc' 'rchr'....'rchr'</pre></ul>
  *
  * <p>The first form is suited for quoted strings, like <pre>"...\"...."</pre>
  * or <pre>'...\'...'</pre>.
  *
  * <p>The second form is suited for expressions, like
  * <pre>(a+(b*c)-15*eq(a, b))</pre>.
  *
  * <p>This is in fact the class currently used for the public separators in
  * MyTokenizer, except PAREN_EXPR_STRING_SEPARATOR and LINE_SEPARATOR.
  */
 class QuotedStringSeparator extends CustomSeparator {
     private final char escChr;
     private final char startChr;
     private final char stopChr;
     private boolean esced;
     private int tokLen;
     private int level;
 
     /**
      * Creates a separator of the first form (see above) where
      * 'chr' = q and 'esc' = esc.
      *
      * @param q The delimiter character.
      * @param esc The escape character.
      */
     public QuotedStringSeparator(char q, char esc) {
         super(q);
 
         esced = false;
         escChr = esc;
         startChr = 0;
         stopChr = q;
         tokLen = 0;
         level = 1;
     }
 
     /**
      * Creates a separator of the second form (see above) where
      * 'lchr' = sq, 'rchr' = eq and 'esc' = esc.
      *
      * @param sq The left delimiter character.
      * @param eq The right delimiter character.
      * @param esc The escape character.
      */
     public QuotedStringSeparator(char sq, char eq, char esc) {
         super(sq);
 
         esced = false;
         escChr = esc;
         startChr = sq;
         stopChr = eq;
         tokLen = 0;
         level = 1;
     }
 
     public void reset() {
         super.reset();
         tokLen = 0;
         level = 1;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to return the entire length of the token.
      */
     public int tokenLength() {
         return super.tokenLength() + tokLen;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to return true.
      *
      * @return true
      */
     public boolean hasFreePart() {
         return true;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to find the end of the token.
      */
     public boolean endChar(char c) {
         tokLen++;
 
         if (esced) {
             esced = false;
             return false;
         }
         if (escChr != 0 && c == escChr) {
             esced = true;
             return false;
         }
         if (startChr != 0 && c == startChr)
             level++;
         if (c == stopChr)
             level--;
         return level <= 0;
     }
 }
 
 /**
  * A descendent of CustomSeparator that recognizes tokens on the form:
  *
  * <br>( " \" ) " ' \' ) ' )
  *
  * <p>This is, an expression inside parentheses with proper consideration
  * for quoted strings inside the the expression.
  */
 class ExprSeparatorWithStrings extends CustomSeparator {
     private boolean isSQuot;
     private boolean isDQuot;
     private boolean isEsc;
     private int tokLevel;
     private int tokLen;
 
     /**
      * The constructor. No choices available.
      */
     public ExprSeparatorWithStrings() {
         super('(');
 
         isEsc = false;
         isSQuot = false;
         isDQuot = false;
         tokLevel = 1;
         tokLen = 0;
     }
 
     public void reset() {
         super.reset();
 
         isEsc = false;
         isSQuot = false;
         isDQuot = false;
         tokLevel = 1;
         tokLen = 0;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to return the entire length of the token.
      */
     public int tokenLength() {
         return super.tokenLength() + tokLen;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to return true.
      *
      * @return true
      */
     public boolean hasFreePart() {
         return true;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to find the end of the token.
      */
     public boolean endChar(char c) {
         tokLen++;
         if (isSQuot) {
             if (isEsc) {
                 isEsc = false;
                 return false;
             }
             if (c == '\\')
                 isEsc = true;
             else if (c == '\'')
                 isSQuot = false;
             return false;
         } else if (isDQuot) {
             if (isEsc) {
                 isEsc = false;
                 return false;
             }
             if (c == '\\')
                 isEsc = true;
             else if (c == '\"')
                 isDQuot = false;
             return false;
         } else {
             if (c == '\'')
                 isSQuot = true;
             else if (c == '\"')
                 isDQuot = true;
             else if (c == '(')
                 tokLevel++;
             else if (c == ')')
                 tokLevel--;
             return tokLevel <= 0;
         }
     }
 }
 
 /**
  * A descendent of CustomSeparator that recognizes "the tree line ends":
  * <ul>
  * <li>UNIX: &lt;lf&gt;</li>
  * <li>DOS: &lt;cr&gt; &lt;lf&gt;</li>
  * <li>MAC: &lt;cr&gt;</li>
  * </ul>
  *
  * <p>This is in fact the class currently used LINE_SEPARATOR in MyTokenizer.
  */
 class LineSeparator extends CustomSeparator {
     private boolean hasCr;
     private boolean hasLf;
     private boolean hasPeeked;
 
     /**
      * Creates a LineSeparator.
      */
     public LineSeparator() {
         hasCr = false;
         hasLf = false;
         hasPeeked = false;
     }
 
     public void reset() {
         super.reset();
         hasCr = false;
         hasLf = false;
         hasPeeked = false;
     }
 
     /**
      * {@inheritDoc}
      */
     public int tokenLength() {
         return hasCr && hasLf ? 2 : 1;
     }
 
     /**
      * {@inheritDoc}
      */
     public int getPeekCount() {
         return hasPeeked ? 1 : 0;
     }
 
     /**
      * {@inheritDoc}
      */
     public boolean hasFreePart() {
         return !hasLf;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to find the start of a line-end.
      */
     public boolean addChar(char c) {
         if (c == '\n') {
             hasLf = true;
             return true;
         }
 
         if (c == '\r') {
             hasCr = true;
             return true;
         }
 
         return false;
     }
 
     /**
      * {@inheritDoc}
      *
      * Overridden to find the end of a line-end.
      */
     public boolean endChar(char c) {
         if (c == '\n') {
             hasLf = true;
         } else {
             hasPeeked = true;
         }
 
         return true;
     }
 }
 
 /**
  * Class for dividing a String into any number of parts. Each part will be a
  * substring of the original String. The first part will at least contain the
  * first character in the string. All following parts will at least contain
  * the first character in the String not covered by any previous part.
  *
  * <p>The delim parameter to the constructors is a comma separated list of
  * tokens that should be recognized by the tokenizer. These tokens will be
  * returned by the tokenizer as tokens, and any arbitrary text between them
  * will also be returned as tokens. Since the comma has special meaning in
  * this string, it can be escaped with \ to only mean itself (like in "\\,").
  * For technical reasons it is not possible for any token in this list to be
  * more than 32 characters long.
  *
  * <p>In addition to the delim parameter it is also possible to use custom
  * separators that allow any string that can be generated by the limited
  * version of a Turing machine that your computer is, to be used as a
  * delimiter.
  *
  * <p>There are some custom separators provided that you can use to get
  * things like strings in one token. These cannot be used simultaneously by
  * several tokenizers, ie they are not thread safe.
  *
  * <p>The tokenizer works in a kind of greedy way. When the first separator
  * token from delim is matched or any CustomSeparator returns true from
  * addChar, then it is satisfied it has found a token and does NOT check if
  * it could have found a longer token. Eg: if you have this delim string
  * "<,<<", then "<<" will never be found.
  *
  * <p><b>Example</b><br><pre>
  * MyTokenizer tzer = new MyTokenizer("Hello, how are you?", " ,\\,");
  * while (tzer.hasMoreTokens())
  *   _cat.info("\"" + tzer.nextToken() + "\"");
  * </pre>
  *
  * <p>Which whould yield the following output:<pre>
  *   "Hello"
  *   ","
  *   " "
  *   "how"
  *   " "
  *   "are"
  *   " "
  *   "you?"
  * </pre>
  *
  * @author Michael Stockman
  * @since 0.11.2
  * @see CustomSeparator
  */
 public class MyTokenizer implements Enumeration {
     /** A custom separator for quoted strings enclosed in single quotes
      *  and using \ as escape character. There may not be an end quote
      *  if the tokenizer reaches the end of the String. */
     public static final CustomSeparator SINGLE_QUOTED_SEPARATOR =
         new QuotedStringSeparator('\'', '\\');
 
     /** A custom separator for quoted strings enclosed in double quotes
      *  and using \ as escape character. There may not be an end quote
      *  if the tokenizer reaches the end of the String. */
     public static final CustomSeparator DOUBLE_QUOTED_SEPARATOR =
         new QuotedStringSeparator('\"', '\\');
 
     /** A custom separator for expressions enclosed in parentheses and
      *  matching lparams with rparams. There may not be proper matching
      *  if the tokenizer reaches the end of the String. Do not use this
      *  together with PAREN_EXPR_STRING_SEPARATOR. */
     public static final CustomSeparator PAREN_EXPR_SEPARATOR =
         new QuotedStringSeparator('(', ')', '\0');
 
     /** A custom separator for expressions enclosed in parentheses and
      *  matching lparams with rparams. There may not be proper matching
      *  if the tokenizer reaches the end of the String. It also takes
      *  quoted strings (either single or double quotes) in the expression
      *  into consideration, unlike PAREN_EXPR_SEPARATOR. Do not use this
      *  together with PAREN_EXPR_SEPARATOR. */
     public static final CustomSeparator PAREN_EXPR_STRING_SEPARATOR =
         new ExprSeparatorWithStrings();
 
     /** A custom separator for texts. Singles out the line ends,
      *  and consequently the lines, if they are in either dos, mac
      *  or unix format. */
     public static final CustomSeparator LINE_SEPARATOR =
         new LineSeparator();
 
     private int sIdx;
     private final int eIdx;
     private int tokIdx;
     private final String source;
     private final TokenSep delims;
     private String savedToken;
     private int savedIdx;
     private List customSeps;
     private String putToken;
 
     /**
      * Constructs a new instance. See above for a description of the
      * delimiter string.
      *
      * @param string        The String to be tokenized.
      * @param delim        The String of delimiters.
      */
     public MyTokenizer(String string, String delim) {
         source = string;
         delims = parseDelimString(delim);
         sIdx = 0;
         tokIdx = 0;
         eIdx = string.length();
         savedToken = null;
         customSeps = null;
         putToken = null;
     }
 
     /**
      * Constructs a new instance. See above for a description of the
      * delimiter string and custom separators.
      *
      * @param string        The String to be tokenized.
      * @param delim        The String of delimiters.
      * @param sep        A custom separator to use.
      */
     public MyTokenizer(String string, String delim, CustomSeparator sep) {
         source = string;
         delims = parseDelimString(delim);
         sIdx = 0;
         tokIdx = 0;
         eIdx = string.length();
         savedToken = null;
         customSeps = new ArrayList();
         customSeps.add(sep);
     }
 
     /**
      * Constructs a new instance. See above for a description of the
      * delimiter string and custom separators.
      *
      * @param string        The String to be tokenized.
      * @param delim        The String of delimiters.
      * @param seps        Some container with custom separators to use.
      */
     public MyTokenizer(String string, String delim, Collection seps) {
         source = string;
         delims = parseDelimString(delim);
         sIdx = 0;
         tokIdx = 0;
         eIdx = string.length();
         savedToken = null;
         customSeps = new ArrayList(seps);
     }
 
     /**
      * Returns true if there are more tokens left.
      *
      * @return true if another token can be fetched with nextToken.
      */
     public boolean hasMoreTokens() {
         return sIdx < eIdx || savedToken != null
             || putToken != null;
     }
 
     /**
      * Retrives the next token.
      *
      * @return The next token.
      */
     public String nextToken() {
         CustomSeparator csep;
         TokenSep sep;
         String s = null;
         int i, j;
 
         if (putToken != null) {
             s = putToken;
             putToken = null;
             return s;
         }
 
         if (savedToken != null) {
             s = savedToken;
             tokIdx = savedIdx;
             savedToken = null;
             return s;
         }
 
         if (sIdx >= eIdx)
             throw new NoSuchElementException(
                                              "No more tokens available");
 
         for (sep = delims; sep != null; sep = sep.getNext())
             sep.reset();
 
         if (customSeps != null) {
             for (i = 0; i < customSeps.size(); i++)
                 ((CustomSeparator) customSeps.get(i)).reset();
         }
 
         for (i = sIdx; i < eIdx; i++) {
             char c = source.charAt(i);
 
             for (j = 0; customSeps != null
                     && j < customSeps.size(); j++) {
                 csep = (CustomSeparator) customSeps.get(j);
 
                 if (csep.addChar(c))
                     break;
             }
             if (customSeps != null && j < customSeps.size()) {
                 csep = (CustomSeparator) customSeps.get(j);
 
                 while (csep.hasFreePart() && i + 1 < eIdx)
                     if (csep.endChar(source.charAt(++i)))
                         break;
                 i -= Math.min(csep.getPeekCount(), i);
 
                 int clen = Math.min(i + 1, source.length());
 
                 if (i - sIdx + 1 > csep.tokenLength()) {
                     s = source.substring(sIdx,
                                           i - csep.tokenLength() + 1);
 
                     savedIdx = i - csep.tokenLength() + 1;
                     savedToken = source.substring(
                                                     savedIdx, clen);
                 } else {
                     s = source.substring(sIdx, clen);
                 }
 
                 tokIdx = sIdx;
                 sIdx = i + 1;
                 break;
             }
 
             for (sep = delims; sep != null; sep = sep.getNext())
                 if (sep.addChar(c))
                     break;
             if (sep != null) {
                 if (i - sIdx + 1 > sep.length()) {
                     s = source.substring(sIdx,
                                           i - sep.length() + 1);
                     savedIdx = i - sep.length() + 1;
                     savedToken = sep.getString();
                 } else {
                     s = sep.getString();
                 }
                 tokIdx = sIdx;
                 sIdx = i + 1;
                 break;
             }
         }
 
         if (s == null) {
             s = source.substring(sIdx);
             tokIdx = sIdx;
             sIdx = eIdx;
         }
 
         return s;
     }
 
     /**
      * This class implements the Enumeration interface. This call maps
      * to nextToken.
      *
      * @return nextToken();
      * @see        #nextToken() nextToken
      */
     public Object nextElement() {
         return nextToken();
     }
 
     /**
      * This class implements the Enumeration interface. This call maps
      * to hasMoreTokens.
      *
      * @return hasMoreTokens();
      * @see        #hasMoreTokens() hasMoreTokens
      */
     public boolean hasMoreElements() {
         return hasMoreTokens();
     }
 
     /**
      * Returns the index in the string of the last token returned by
      * nextToken, or zero if no token has been retrived.
      *
      * @return The index of the last token.
      */
     public int getTokenIndex() {
         return tokIdx;
     }
 
     /**
      * Put a token on the input stream. This will be the next token read
      * from the tokenizer. If this function is called again before the
      * last token has been read, then it will be lost.
      *
      * <p>The index returned from getTokenIndex will be the same for the
      * token put as that of the last token that wasn't put.
      *
      * @param s The token to put.
      * @throws NullPointerException if s is null.
      */
     public void putToken(String s) {
         if (s == null)
             throw new NullPointerException(
                                            "Cannot put a null token");
 
         putToken = s;
     }
 
     /**
      * Creates a linked list of TokenSeps from the comma separated string
      * str.
      *
      * @param str The string specifying delimiter strings.
      * @return A list of TokenSeps.
      */
     private static TokenSep parseDelimString(String str) {
         TokenSep first = null;
         TokenSep p = null;
         int idx0, idx1, length;
         StringBuilder val = new StringBuilder();
         char c;
 
         length = str.length();
         for (idx0 = 0; idx0 < length;) {
             for (idx1 = idx0; idx1 < length; idx1++) {
                 c = str.charAt(idx1);
                 if (c == '\\') {
                     idx1++;
                     if (idx1 < length)
                         val.append(str.charAt(idx1));
                 } else if (c == ',') {
                     break;
                 } else {
                     val.append(c);
                 }
             }
             idx1 = Math.min(idx1, length);
             if (idx1 > idx0) {
                 p = new TokenSep(val.toString());
                 val = new StringBuilder();
                 p.setNext(first);
                 first = p;
             }
 
             idx0 = idx1 + 1;
         }
 
         return first;
     }
 }
 

1		/* $Id: MyTokenizer.java 17887 2010-01-12 21:17:18Z linus $
2		*****************************************************************************
3		* Copyright (c) 2009 Contributors - see below
4		* All rights reserved. This program and the accompanying materials
5		* are made available under the terms of the Eclipse Public License v1.0
6		* which accompanies this distribution, and is available at
7		* http://www.eclipse.org/legal/epl-v10.html
8		*
9		* Contributors:
10		* tfmorris
11		*****************************************************************************
12		*
13		* Some portions of this file was previously release using the BSD License:
14		*/
15
16		// Copyright (c) 1996-2006 The Regents of the University of California. All
17		// Rights Reserved. Permission to use, copy, modify, and distribute this
18		// software and its documentation without fee, and without a written
19		// agreement is hereby granted, provided that the above copyright notice
20		// and this paragraph appear in all copies. This software program and
21		// documentation are copyrighted by The Regents of the University of
22		// California. The software program and documentation are supplied "AS
23		// IS", without any accompanying services from The Regents. The Regents
24		// does not warrant that the operation of the program will be
25		// uninterrupted or error-free. The end-user understands that the program
26		// was developed for research purposes and is advised not to rely
27		// exclusively on the program for any reason. IN NO EVENT SHALL THE
28		// UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
29		// SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
30		// ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
31		// THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF
32		// SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY
33		// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
34		// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE
35		// PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF
36		// CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
37		// UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
38
39		package org.argouml.util;
40
41		import java.util.ArrayList;
42		import java.util.Collection;
43		import java.util.Enumeration;
44		import java.util.List;
45		import java.util.NoSuchElementException;
46
47		/**
48		* Internal class for managing the delimiters in MyTokenizer. It's rather
49		* similar to CustomSeparator, but faster for short constant strings.
50		*/
51		class TokenSep {
52	0	private TokenSep next = null;
53		private final String theString;
54		private final int length;
55		private int pattern;
56
57		/**
58		* Constructs a TokenSep that will match the String given in str.
59		*
60		* @param str The delimiter string.
61		*/
62	0	public TokenSep(String str) {
63	0	theString = str;
64	0	length = str.length();
65	0	if (length > 32)
66	0	throw new IllegalArgumentException("TokenSep " + str
67		+ " is " + length + " (> 32) chars long");
68	0	pattern = 0;
69	0	}
70
71		/**
72		* Called by MyTokenizer when a new character is processed in the
73		* sequence. Returns true if we have found the delimiter.
74		*/
75		public boolean addChar(char c) {
76		int i;
77
78	0	pattern <<= 1;
79	0	pattern \|= 1;
80	0	for (i = 0; i < length; i++) {
81	0	if (theString.charAt(i) != c) {
82	0	pattern &= ~(1 << i);
83		}
84		}
85
86	0	return (pattern & (1 << (length - 1))) != 0;
87		}
88
89		/**
90		* Called by MyTokenizer before starting scanning for a new token.
91		*/
92		public void reset() {
93	0	pattern = 0;
94	0	}
95
96		/**
97		* Gets the length of this token.
98		*/
99		public int length() {
100	0	return length;
101		}
102
103		/**
104		* Gets this token.
105		*/
106		public String getString() {
107	0	return theString;
108		}
109
110		/**
111		* @param n The next to set.
112		*/
113		public void setNext(TokenSep n) {
114	0	this.next = n;
115	0	}
116
117		/**
118		* @return Returns the next.
119		*/
120		public TokenSep getNext() {
121	0	return next;
122		}
123		}
124
125		/**
126		* A descendent of CustomSeparator that recognizes tokens on one of two forms:
127		* <ul>
128		* <li><pre>'chr'.....'esc' 'chr'.....'chr'</pre>
129		* <li><pre>'lchr'...'lchr'...'rchr'...'esc' 'rchr'....'rchr'</pre></ul>
130		*
131		* <p>The first form is suited for quoted strings, like <pre>"...\"...."</pre>
132		* or <pre>'...\'...'</pre>.
133		*
134		* <p>The second form is suited for expressions, like
135		* <pre>(a+(bc)-15eq(a, b))</pre>.
136		*
137		* <p>This is in fact the class currently used for the public separators in
138		* MyTokenizer, except PAREN_EXPR_STRING_SEPARATOR and LINE_SEPARATOR.
139		*/
140		class QuotedStringSeparator extends CustomSeparator {
141		private final char escChr;
142		private final char startChr;
143		private final char stopChr;
144		private boolean esced;
145		private int tokLen;
146		private int level;
147
148		/**
149		* Creates a separator of the first form (see above) where
150		* 'chr' = q and 'esc' = esc.
151		*
152		* @param q The delimiter character.
153		* @param esc The escape character.
154		*/
155		public QuotedStringSeparator(char q, char esc) {
156	1800	super(q);
157
158	1800	esced = false;
159	1800	escChr = esc;
160	1800	startChr = 0;
161	1800	stopChr = q;
162	1800	tokLen = 0;
163	1800	level = 1;
164	1800	}
165
166		/**
167		* Creates a separator of the second form (see above) where
168		* 'lchr' = sq, 'rchr' = eq and 'esc' = esc.
169		*
170		* @param sq The left delimiter character.
171		* @param eq The right delimiter character.
172		* @param esc The escape character.
173		*/
174		public QuotedStringSeparator(char sq, char eq, char esc) {
175	900	super(sq);
176
177	900	esced = false;
178	900	escChr = esc;
179	900	startChr = sq;
180	900	stopChr = eq;
181	900	tokLen = 0;
182	900	level = 1;
183	900	}
184
185		public void reset() {
186	0	super.reset();
187	0	tokLen = 0;
188	0	level = 1;
189	0	}
190
191		/**
192		* {@inheritDoc}
193		*
194		* Overridden to return the entire length of the token.
195		*/
196		public int tokenLength() {
197	0	return super.tokenLength() + tokLen;
198		}
199
200		/**
201		* {@inheritDoc}
202		*
203		* Overridden to return true.
204		*
205		* @return true
206		*/
207		public boolean hasFreePart() {
208	0	return true;
209		}
210
211		/**
212		* {@inheritDoc}
213		*
214		* Overridden to find the end of the token.
215		*/
216		public boolean endChar(char c) {
217	0	tokLen++;
218
219	0	if (esced) {
220	0	esced = false;
221	0	return false;
222		}
223	0	if (escChr != 0 && c == escChr) {
224	0	esced = true;
225	0	return false;
226		}
227	0	if (startChr != 0 && c == startChr)
228	0	level++;
229	0	if (c == stopChr)
230	0	level--;
231	0	return level <= 0;
232		}
233		}
234
235		/**
236		* A descendent of CustomSeparator that recognizes tokens on the form:
237		*
238		* <br>( " \" ) " ' \' ) ' )
239		*
240		* <p>This is, an expression inside parentheses with proper consideration
241		* for quoted strings inside the the expression.
242		*/
243		class ExprSeparatorWithStrings extends CustomSeparator {
244		private boolean isSQuot;
245		private boolean isDQuot;
246		private boolean isEsc;
247		private int tokLevel;
248		private int tokLen;
249
250		/**
251		* The constructor. No choices available.
252		*/
253		public ExprSeparatorWithStrings() {
254	900	super('(');
255
256	900	isEsc = false;
257	900	isSQuot = false;
258	900	isDQuot = false;
259	900	tokLevel = 1;
260	900	tokLen = 0;
261	900	}
262
263		public void reset() {
264	0	super.reset();
265
266	0	isEsc = false;
267	0	isSQuot = false;
268	0	isDQuot = false;
269	0	tokLevel = 1;
270	0	tokLen = 0;
271	0	}
272
273		/**
274		* {@inheritDoc}
275		*
276		* Overridden to return the entire length of the token.
277		*/
278		public int tokenLength() {
279	0	return super.tokenLength() + tokLen;
280		}
281
282		/**
283		* {@inheritDoc}
284		*
285		* Overridden to return true.
286		*
287		* @return true
288		*/
289		public boolean hasFreePart() {
290	0	return true;
291		}
292
293		/**
294		* {@inheritDoc}
295		*
296		* Overridden to find the end of the token.
297		*/
298		public boolean endChar(char c) {
299	0	tokLen++;
300	0	if (isSQuot) {
301	0	if (isEsc) {
302	0	isEsc = false;
303	0	return false;
304		}
305	0	if (c == '\\')
306	0	isEsc = true;
307	0	else if (c == '\'')
308	0	isSQuot = false;
309	0	return false;
310	0	} else if (isDQuot) {
311	0	if (isEsc) {
312	0	isEsc = false;
313	0	return false;
314		}
315	0	if (c == '\\')
316	0	isEsc = true;
317	0	else if (c == '\"')
318	0	isDQuot = false;
319	0	return false;
320		} else {
321	0	if (c == '\'')
322	0	isSQuot = true;
323	0	else if (c == '\"')
324	0	isDQuot = true;
325	0	else if (c == '(')
326	0	tokLevel++;
327	0	else if (c == ')')
328	0	tokLevel--;
329	0	return tokLevel <= 0;
330		}
331		}
332		}
333
334		/**
335		* A descendent of CustomSeparator that recognizes "the tree line ends":
336		* <ul>
337		* <li>UNIX: <lf></li>
338		* <li>DOS: <cr> <lf></li>
339		* <li>MAC: <cr></li>
340		* </ul>
341		*
342		* <p>This is in fact the class currently used LINE_SEPARATOR in MyTokenizer.
343		*/
344		class LineSeparator extends CustomSeparator {
345		private boolean hasCr;
346		private boolean hasLf;
347		private boolean hasPeeked;
348
349		/**
350		* Creates a LineSeparator.
351		*/
352	900	public LineSeparator() {
353	900	hasCr = false;
354	900	hasLf = false;
355	900	hasPeeked = false;
356	900	}
357
358		public void reset() {
359	0	super.reset();
360	0	hasCr = false;
361	0	hasLf = false;
362	0	hasPeeked = false;
363	0	}
364
365		/**
366		* {@inheritDoc}
367		*/
368		public int tokenLength() {
369	0	return hasCr && hasLf ? 2 : 1;
370		}
371
372		/**
373		* {@inheritDoc}
374		*/
375		public int getPeekCount() {
376	0	return hasPeeked ? 1 : 0;
377		}
378
379		/**
380		* {@inheritDoc}
381		*/
382		public boolean hasFreePart() {
383	0	return !hasLf;
384		}
385
386		/**
387		* {@inheritDoc}
388		*
389		* Overridden to find the start of a line-end.
390		*/
391		public boolean addChar(char c) {
392	0	if (c == '\n') {
393	0	hasLf = true;
394	0	return true;
395		}
396
397	0	if (c == '\r') {
398	0	hasCr = true;
399	0	return true;
400		}
401
402	0	return false;
403		}
404
405		/**
406		* {@inheritDoc}
407		*
408		* Overridden to find the end of a line-end.
409		*/
410		public boolean endChar(char c) {
411	0	if (c == '\n') {
412	0	hasLf = true;
413		} else {
414	0	hasPeeked = true;
415		}
416
417	0	return true;
418		}
419		}
420
421		/**
422		* Class for dividing a String into any number of parts. Each part will be a
423		* substring of the original String. The first part will at least contain the
424		* first character in the string. All following parts will at least contain
425		* the first character in the String not covered by any previous part.
426		*
427		* <p>The delim parameter to the constructors is a comma separated list of
428		* tokens that should be recognized by the tokenizer. These tokens will be
429		* returned by the tokenizer as tokens, and any arbitrary text between them
430		* will also be returned as tokens. Since the comma has special meaning in
431		* this string, it can be escaped with \ to only mean itself (like in "\\,").
432		* For technical reasons it is not possible for any token in this list to be
433		* more than 32 characters long.
434		*
435		* <p>In addition to the delim parameter it is also possible to use custom
436		* separators that allow any string that can be generated by the limited
437		* version of a Turing machine that your computer is, to be used as a
438		* delimiter.
439		*
440		* <p>There are some custom separators provided that you can use to get
441		* things like strings in one token. These cannot be used simultaneously by
442		* several tokenizers, ie they are not thread safe.
443		*
444		* <p>The tokenizer works in a kind of greedy way. When the first separator
445		* token from delim is matched or any CustomSeparator returns true from
446		* addChar, then it is satisfied it has found a token and does NOT check if
447		* it could have found a longer token. Eg: if you have this delim string
448		* "<,<<", then "<<" will never be found.
449		*
450		* <p><b>Example</b><br><pre>
451		* MyTokenizer tzer = new MyTokenizer("Hello, how are you?", " ,\\,");
452		* while (tzer.hasMoreTokens())
453		* _cat.info("\"" + tzer.nextToken() + "\"");
454		* </pre>
455		*
456		* <p>Which whould yield the following output:<pre>
457		* "Hello"
458		* ","
459		* " "
460		* "how"
461		* " "
462		* "are"
463		* " "
464		* "you?"
465		* </pre>
466		*
467		* @author Michael Stockman
468		* @since 0.11.2
469		* @see CustomSeparator
470		*/
471		public class MyTokenizer implements Enumeration {
472		/** A custom separator for quoted strings enclosed in single quotes
473		* and using \ as escape character. There may not be an end quote
474		* if the tokenizer reaches the end of the String. */
475	900	public static final CustomSeparator SINGLE_QUOTED_SEPARATOR =
476		new QuotedStringSeparator('\'', '\\');
477
478		/** A custom separator for quoted strings enclosed in double quotes
479		* and using \ as escape character. There may not be an end quote
480		* if the tokenizer reaches the end of the String. */
481	900	public static final CustomSeparator DOUBLE_QUOTED_SEPARATOR =
482		new QuotedStringSeparator('\"', '\\');
483
484		/** A custom separator for expressions enclosed in parentheses and
485		* matching lparams with rparams. There may not be proper matching
486		* if the tokenizer reaches the end of the String. Do not use this
487		* together with PAREN_EXPR_STRING_SEPARATOR. */
488	900	public static final CustomSeparator PAREN_EXPR_SEPARATOR =
489		new QuotedStringSeparator('(', ')', '\0');
490
491		/** A custom separator for expressions enclosed in parentheses and
492		* matching lparams with rparams. There may not be proper matching
493		* if the tokenizer reaches the end of the String. It also takes
494		* quoted strings (either single or double quotes) in the expression
495		* into consideration, unlike PAREN_EXPR_SEPARATOR. Do not use this
496		* together with PAREN_EXPR_SEPARATOR. */
497	900	public static final CustomSeparator PAREN_EXPR_STRING_SEPARATOR =
498		new ExprSeparatorWithStrings();
499
500		/** A custom separator for texts. Singles out the line ends,
501		* and consequently the lines, if they are in either dos, mac
502		* or unix format. */
503	900	public static final CustomSeparator LINE_SEPARATOR =
504		new LineSeparator();
505
506		private int sIdx;
507		private final int eIdx;
508		private int tokIdx;
509		private final String source;
510		private final TokenSep delims;
511		private String savedToken;
512		private int savedIdx;
513		private List customSeps;
514		private String putToken;
515
516		/**
517		* Constructs a new instance. See above for a description of the
518		* delimiter string.
519		*
520		* @param string The String to be tokenized.
521		* @param delim The String of delimiters.
522		*/
523	0	public MyTokenizer(String string, String delim) {
524	0	source = string;
525	0	delims = parseDelimString(delim);
526	0	sIdx = 0;
527	0	tokIdx = 0;
528	0	eIdx = string.length();
529	0	savedToken = null;
530	0	customSeps = null;
531	0	putToken = null;
532	0	}
533
534		/**
535		* Constructs a new instance. See above for a description of the
536		* delimiter string and custom separators.
537		*
538		* @param string The String to be tokenized.
539		* @param delim The String of delimiters.
540		* @param sep A custom separator to use.
541		*/
542	0	public MyTokenizer(String string, String delim, CustomSeparator sep) {
543	0	source = string;
544	0	delims = parseDelimString(delim);
545	0	sIdx = 0;
546	0	tokIdx = 0;
547	0	eIdx = string.length();
548	0	savedToken = null;
549	0	customSeps = new ArrayList();
550	0	customSeps.add(sep);
551	0	}
552
553		/**
554		* Constructs a new instance. See above for a description of the
555		* delimiter string and custom separators.
556		*
557		* @param string The String to be tokenized.
558		* @param delim The String of delimiters.
559		* @param seps Some container with custom separators to use.
560		*/
561	0	public MyTokenizer(String string, String delim, Collection seps) {
562	0	source = string;
563	0	delims = parseDelimString(delim);
564	0	sIdx = 0;
565	0	tokIdx = 0;
566	0	eIdx = string.length();
567	0	savedToken = null;
568	0	customSeps = new ArrayList(seps);
569	0	}
570
571		/**
572		* Returns true if there are more tokens left.
573		*
574		* @return true if another token can be fetched with nextToken.
575		*/
576		public boolean hasMoreTokens() {
577	0	return sIdx < eIdx \|\| savedToken != null
578		\|\| putToken != null;
579		}
580
581		/**
582		* Retrives the next token.
583		*
584		* @return The next token.
585		*/
586		public String nextToken() {
587		CustomSeparator csep;
588		TokenSep sep;
589	0	String s = null;
590		int i, j;
591
592	0	if (putToken != null) {
593	0	s = putToken;
594	0	putToken = null;
595	0	return s;
596		}
597
598	0	if (savedToken != null) {
599	0	s = savedToken;
600	0	tokIdx = savedIdx;
601	0	savedToken = null;
602	0	return s;
603		}
604
605	0	if (sIdx >= eIdx)
606	0	throw new NoSuchElementException(
607		"No more tokens available");
608
609	0	for (sep = delims; sep != null; sep = sep.getNext())
610	0	sep.reset();
611
612	0	if (customSeps != null) {
613	0	for (i = 0; i < customSeps.size(); i++)
614	0	((CustomSeparator) customSeps.get(i)).reset();
615		}
616
617	0	for (i = sIdx; i < eIdx; i++) {
618	0	char c = source.charAt(i);
619
620	0	for (j = 0; customSeps != null
621	0	&& j < customSeps.size(); j++) {
622	0	csep = (CustomSeparator) customSeps.get(j);
623
624	0	if (csep.addChar(c))
625	0	break;
626		}
627	0	if (customSeps != null && j < customSeps.size()) {
628	0	csep = (CustomSeparator) customSeps.get(j);
629
630	0	while (csep.hasFreePart() && i + 1 < eIdx)
631	0	if (csep.endChar(source.charAt(++i)))
632	0	break;
633	0	i -= Math.min(csep.getPeekCount(), i);
634
635	0	int clen = Math.min(i + 1, source.length());
636
637	0	if (i - sIdx + 1 > csep.tokenLength()) {
638	0	s = source.substring(sIdx,
639		i - csep.tokenLength() + 1);
640
641	0	savedIdx = i - csep.tokenLength() + 1;
642	0	savedToken = source.substring(
643		savedIdx, clen);
644		} else {
645	0	s = source.substring(sIdx, clen);
646		}
647
648	0	tokIdx = sIdx;
649	0	sIdx = i + 1;
650	0	break;
651		}
652
653	0	for (sep = delims; sep != null; sep = sep.getNext())
654	0	if (sep.addChar(c))
655	0	break;
656	0	if (sep != null) {
657	0	if (i - sIdx + 1 > sep.length()) {
658	0	s = source.substring(sIdx,
659		i - sep.length() + 1);
660	0	savedIdx = i - sep.length() + 1;
661	0	savedToken = sep.getString();
662		} else {
663	0	s = sep.getString();
664		}
665	0	tokIdx = sIdx;
666	0	sIdx = i + 1;
667	0	break;
668		}
669		}
670
671	0	if (s == null) {
672	0	s = source.substring(sIdx);
673	0	tokIdx = sIdx;
674	0	sIdx = eIdx;
675		}
676
677	0	return s;
678		}
679
680		/**
681		* This class implements the Enumeration interface. This call maps
682		* to nextToken.
683		*
684		* @return nextToken();
685		* @see #nextToken() nextToken
686		*/
687		public Object nextElement() {
688	0	return nextToken();
689		}
690
691		/**
692		* This class implements the Enumeration interface. This call maps
693		* to hasMoreTokens.
694		*
695		* @return hasMoreTokens();
696		* @see #hasMoreTokens() hasMoreTokens
697		*/
698		public boolean hasMoreElements() {
699	0	return hasMoreTokens();
700		}
701
702		/**
703		* Returns the index in the string of the last token returned by
704		* nextToken, or zero if no token has been retrived.
705		*
706		* @return The index of the last token.
707		*/
708		public int getTokenIndex() {
709	0	return tokIdx;
710		}
711
712		/**
713		* Put a token on the input stream. This will be the next token read
714		* from the tokenizer. If this function is called again before the
715		* last token has been read, then it will be lost.
716		*
717		* <p>The index returned from getTokenIndex will be the same for the
718		* token put as that of the last token that wasn't put.
719		*
720		* @param s The token to put.
721		* @throws NullPointerException if s is null.
722		*/
723		public void putToken(String s) {
724	0	if (s == null)
725	0	throw new NullPointerException(
726		"Cannot put a null token");
727
728	0	putToken = s;
729	0	}
730
731		/**
732		* Creates a linked list of TokenSeps from the comma separated string
733		* str.
734		*
735		* @param str The string specifying delimiter strings.
736		* @return A list of TokenSeps.
737		*/
738		private static TokenSep parseDelimString(String str) {
739	0	TokenSep first = null;
740	0	TokenSep p = null;
741		int idx0, idx1, length;
742	0	StringBuilder val = new StringBuilder();
743		char c;
744
745	0	length = str.length();
746	0	for (idx0 = 0; idx0 < length;) {
747	0	for (idx1 = idx0; idx1 < length; idx1++) {
748	0	c = str.charAt(idx1);
749	0	if (c == '\\') {
750	0	idx1++;
751	0	if (idx1 < length)
752	0	val.append(str.charAt(idx1));
753	0	} else if (c == ',') {
754	0	break;
755		} else {
756	0	val.append(c);
757		}
758		}
759	0	idx1 = Math.min(idx1, length);
760	0	if (idx1 > idx0) {
761	0	p = new TokenSep(val.toString());
762	0	val = new StringBuilder();
763	0	p.setNext(first);
764	0	first = p;
765		}
766
767	0	idx0 = idx1 + 1;
768		}
769
770	0	return first;
771		}
772		}
773