Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlInputStream |
|
| 5.416666666666667;5.417 |
1 | /* $Id: XmlInputStream.java 17940 2010-01-30 16:15:11Z euluis $ | |
2 | ***************************************************************************** | |
3 | * Copyright (c) 2009-2010 Contributors - see below | |
4 | * All rights reserved. This program and the accompanying materials | |
5 | * are made available under the terms of the Eclipse Public License v1.0 | |
6 | * which accompanies this distribution, and is available at | |
7 | * http://www.eclipse.org/legal/epl-v10.html | |
8 | * | |
9 | * Contributors: | |
10 | * tfmorris | |
11 | * euluis | |
12 | ***************************************************************************** | |
13 | * | |
14 | * Some portions of this file was previously release using the BSD License: | |
15 | */ | |
16 | ||
17 | // Copyright (c) 1996-2006 The Regents of the University of California. All | |
18 | // Rights Reserved. Permission to use, copy, modify, and distribute this | |
19 | // software and its documentation without fee, and without a written | |
20 | // agreement is hereby granted, provided that the above copyright notice | |
21 | // and this paragraph appear in all copies. This software program and | |
22 | // documentation are copyrighted by The Regents of the University of | |
23 | // California. The software program and documentation are supplied "AS | |
24 | // IS", without any accompanying services from The Regents. The Regents | |
25 | // does not warrant that the operation of the program will be | |
26 | // uninterrupted or error-free. The end-user understands that the program | |
27 | // was developed for research purposes and is advised not to rely | |
28 | // exclusively on the program for any reason. IN NO EVENT SHALL THE | |
29 | // UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, | |
30 | // SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, | |
31 | // ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF | |
32 | // THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF | |
33 | // SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY | |
34 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | |
35 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE | |
36 | // PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF | |
37 | // CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, | |
38 | // UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | |
39 | ||
40 | package org.argouml.persistence; | |
41 | ||
42 | import java.io.BufferedInputStream; | |
43 | import java.io.IOException; | |
44 | import java.io.InputStream; | |
45 | import java.util.HashMap; | |
46 | import java.util.Iterator; | |
47 | import java.util.Map; | |
48 | ||
49 | //import javax.swing.event.EventListenerList; | |
50 | ||
51 | import org.apache.log4j.Logger; | |
52 | ||
53 | ||
54 | /** | |
55 | * A BufferInputStream that is aware of XML structure. | |
56 | * It searches for the first occurrence of a named tag | |
57 | * and reads only the data (inclusively) from that tag | |
58 | * to the matching end tag or it can search for the first | |
59 | * occurrence of a named tag and read on the child tags. | |
60 | * The tag is not expected to be an empty tag. | |
61 | * <p> | |
62 | * TODO: This is hardwired to assume a fixed single byte | |
63 | * character encoding. It needs to be updated to handle different | |
64 | * encodings, including multi-byte encodings. - tfm 20070607 | |
65 | * | |
66 | * @author Bob Tarling | |
67 | */ | |
68 | class XmlInputStream extends BufferedInputStream { | |
69 | ||
70 | private boolean xmlStarted; | |
71 | private boolean inTag; | |
72 | 0 | private StringBuffer currentTag = new StringBuffer(); |
73 | private boolean endStream; | |
74 | private String tagName; | |
75 | private String endTagName; | |
76 | private Map attributes; | |
77 | private boolean childOnly; | |
78 | private int instanceCount; | |
79 | //private EventListenerList listenerList = new EventListenerList(); | |
80 | ||
81 | /** | |
82 | * Logger. | |
83 | */ | |
84 | 0 | private static final Logger LOG = |
85 | Logger.getLogger(XmlInputStream.class); | |
86 | ||
87 | /** | |
88 | * Construct a new XmlInputStream. | |
89 | * | |
90 | * @param inStream the input stream to wrap. | |
91 | * @param theTag the tag name from which to start reading | |
92 | * @param theLength the expected length of the input stream | |
93 | * @param theEventSpacing the number of characters to read before | |
94 | * firing a progress event. | |
95 | */ | |
96 | public XmlInputStream( | |
97 | InputStream inStream, | |
98 | String theTag, | |
99 | long theLength, | |
100 | long theEventSpacing) { | |
101 | 0 | super(inStream); |
102 | 0 | tagName = theTag; |
103 | 0 | endTagName = '/' + theTag; |
104 | 0 | attributes = null; |
105 | 0 | childOnly = false; |
106 | 0 | } |
107 | ||
108 | /** | |
109 | * Reopen a stream that has already reached the end | |
110 | * of an XML fragment. | |
111 | * | |
112 | * @param theTag the tag name | |
113 | * @param attribs the attributes | |
114 | * @param child child only | |
115 | */ | |
116 | public synchronized void reopen( | |
117 | String theTag, | |
118 | Map attribs, | |
119 | boolean child) { | |
120 | 0 | endStream = false; |
121 | 0 | xmlStarted = false; |
122 | 0 | inTag = false; |
123 | 0 | tagName = theTag; |
124 | 0 | endTagName = '/' + theTag; |
125 | 0 | attributes = attribs; |
126 | 0 | childOnly = child; |
127 | 0 | } |
128 | ||
129 | /** | |
130 | * Reopen a stream that has already reached the end | |
131 | * of an XML fragment. | |
132 | * | |
133 | * @param theTag the tag name | |
134 | */ | |
135 | public synchronized void reopen(String theTag) { | |
136 | 0 | endStream = false; |
137 | 0 | xmlStarted = false; |
138 | 0 | inTag = false; |
139 | 0 | tagName = theTag; |
140 | 0 | endTagName = '/' + theTag; |
141 | 0 | attributes = null; |
142 | 0 | childOnly = false; |
143 | 0 | } |
144 | ||
145 | /* | |
146 | * @see java.io.InputStream#read() | |
147 | */ | |
148 | public synchronized int read() throws IOException { | |
149 | ||
150 | 0 | if (!xmlStarted) { |
151 | 0 | skipToTag(); |
152 | 0 | xmlStarted = true; |
153 | } | |
154 | 0 | if (endStream) { |
155 | 0 | return -1; |
156 | } | |
157 | 0 | int ch = super.read(); |
158 | 0 | endStream = isLastTag(ch); |
159 | 0 | return ch; |
160 | } | |
161 | ||
162 | /* | |
163 | * @see java.io.InputStream#read(byte[], int, int) | |
164 | */ | |
165 | public synchronized int read(byte[] b, int off, int len) | |
166 | throws IOException { | |
167 | ||
168 | 0 | if (!xmlStarted) { |
169 | 0 | skipToTag(); |
170 | 0 | xmlStarted = true; |
171 | } | |
172 | 0 | if (endStream) { |
173 | 0 | return -1; |
174 | } | |
175 | ||
176 | int cnt; | |
177 | 0 | for (cnt = 0; cnt < len; ++cnt) { |
178 | 0 | int read = read(); |
179 | 0 | if (read == -1) { |
180 | 0 | break; |
181 | } | |
182 | 0 | b[cnt + off] = (byte) read; |
183 | } | |
184 | ||
185 | 0 | if (cnt > 0) { |
186 | 0 | return cnt; |
187 | } | |
188 | 0 | return -1; |
189 | } | |
190 | ||
191 | ||
192 | ||
193 | /** | |
194 | * Determines if the character is the last character of the last tag of | |
195 | * interest. | |
196 | * Every character read after the first tag of interest should be passed | |
197 | * through this method in order. | |
198 | * | |
199 | * @param ch the character to test. | |
200 | * @return true if this is the end of the last tag. | |
201 | */ | |
202 | private boolean isLastTag(int ch) { | |
203 | 0 | if (ch == '<') { |
204 | 0 | inTag = true; |
205 | 0 | currentTag.setLength(0); |
206 | 0 | } else if (ch == '>') { |
207 | 0 | inTag = false; |
208 | 0 | String tag = currentTag.toString(); |
209 | 0 | if (tag.equals(endTagName) |
210 | // TODO: The below is not strictly correct, but should | |
211 | // cover the case we deal with. Using a real XML parser | |
212 | // would be better. | |
213 | // Look for XML document has just a single root element | |
214 | || (currentTag.charAt(currentTag.length() - 1) == '/' | |
215 | && tag.startsWith(tagName) | |
216 | && tag.indexOf(' ') == tagName.indexOf(' '))) { | |
217 | 0 | return true; |
218 | } | |
219 | 0 | } else if (inTag) { |
220 | 0 | currentTag.append((char) ch); |
221 | } | |
222 | 0 | return false; |
223 | } | |
224 | ||
225 | /** | |
226 | * Keep on reading an input stream until a specific | |
227 | * sequence of characters has ben read. | |
228 | * This method assumes there is at least one match. | |
229 | * | |
230 | * @throws IOException | |
231 | */ | |
232 | private void skipToTag() throws IOException { | |
233 | 0 | char[] searchChars = tagName.toCharArray(); |
234 | int i; | |
235 | boolean found; | |
236 | while (true) { | |
237 | 0 | if (!childOnly) { |
238 | 0 | mark(1000); |
239 | } | |
240 | // Keep reading till we get the left bracket of an opening tag | |
241 | 0 | while (realRead() != '<') { |
242 | 0 | if (!childOnly) { |
243 | 0 | mark(1000); |
244 | } | |
245 | } | |
246 | 0 | found = true; |
247 | // Compare each following character to see | |
248 | // that it matches the tag we want | |
249 | 0 | for (i = 0; i < tagName.length(); ++i) { |
250 | 0 | int c = realRead(); |
251 | 0 | if (c != searchChars[i]) { |
252 | 0 | found = false; |
253 | 0 | break; |
254 | } | |
255 | } | |
256 | 0 | int terminator = realRead(); |
257 | // We also want to match with the right bracket of the tag or | |
258 | // some other terminator | |
259 | 0 | if (found && !isNameTerminator((char) terminator)) { |
260 | 0 | found = false; |
261 | } | |
262 | ||
263 | 0 | if (found) { |
264 | // We've found the matching tag but do we have | |
265 | // the correct instance with matching attributes? | |
266 | 0 | if (attributes != null) { |
267 | 0 | Map attributesFound = new HashMap(); |
268 | 0 | if (terminator != '>') { |
269 | 0 | attributesFound = readAttributes(); |
270 | } | |
271 | // Search all attributes found to those expected. | |
272 | // If any don't match then turn off the found flag | |
273 | // so that we search for the next matching tag. | |
274 | 0 | Iterator it = attributes.entrySet().iterator(); |
275 | 0 | while (found && it.hasNext()) { |
276 | 0 | Map.Entry pair = (Map.Entry) it.next(); |
277 | 0 | if (!pair.getValue().equals( |
278 | attributesFound.get(pair.getKey()))) { | |
279 | 0 | found = false; |
280 | } | |
281 | 0 | } |
282 | } | |
283 | } | |
284 | ||
285 | 0 | if (found) { |
286 | 0 | if (instanceCount < 0) { |
287 | 0 | found = false; |
288 | 0 | ++instanceCount; |
289 | } | |
290 | } | |
291 | ||
292 | 0 | if (found) { |
293 | 0 | if (childOnly) { |
294 | // Read the name of the child tag | |
295 | // and then reset read position | |
296 | // back to that child tag. | |
297 | 0 | mark(1000); |
298 | 0 | while (realRead() != '<') { |
299 | /* do nothing */ | |
300 | } | |
301 | 0 | tagName = ""; |
302 | 0 | char ch = (char) realRead(); |
303 | 0 | while (!isNameTerminator(ch)) { |
304 | 0 | tagName += ch; |
305 | 0 | ch = (char) realRead(); |
306 | } | |
307 | 0 | endTagName = "/" + tagName; |
308 | 0 | LOG.info("Start tag = " + tagName); |
309 | 0 | LOG.info("End tag = " + endTagName); |
310 | } | |
311 | 0 | reset(); |
312 | 0 | return; |
313 | } | |
314 | 0 | } |
315 | } | |
316 | ||
317 | private boolean isNameTerminator(char ch) { | |
318 | 0 | return (ch == '>' || Character.isWhitespace(ch)); |
319 | } | |
320 | ||
321 | /** | |
322 | * Having read the inputstream up until the tag name. | |
323 | * This method continues to read the contents of the tag to | |
324 | * retrieve any attribute names and values. | |
325 | * @return a map of name value pairs. | |
326 | * @throws IOException | |
327 | */ | |
328 | private Map readAttributes() throws IOException { | |
329 | 0 | Map attributesFound = new HashMap(); |
330 | int character; | |
331 | 0 | while ((character = realRead()) != '>') { |
332 | 0 | if (!Character.isWhitespace((char) character)) { |
333 | 0 | StringBuffer attributeName = new StringBuffer(); |
334 | 0 | attributeName.append((char) character); |
335 | while ((character = realRead()) != '=' | |
336 | 0 | && !Character.isWhitespace((char) character)) { |
337 | 0 | attributeName.append((char) character); |
338 | } | |
339 | // Skip any whitespace till we should be on an equals sign. | |
340 | 0 | while (Character.isWhitespace((char) character)) { |
341 | 0 | character = realRead(); |
342 | } | |
343 | 0 | if (character != '=') { |
344 | 0 | throw new IOException( |
345 | "Expected = sign after attribute " | |
346 | + attributeName); | |
347 | } | |
348 | // Skip any whitespace till we should be on a quote symbol. | |
349 | 0 | int quoteSymbol = realRead(); |
350 | 0 | while (Character.isWhitespace((char) quoteSymbol)) { |
351 | 0 | quoteSymbol = realRead(); |
352 | } | |
353 | 0 | if (quoteSymbol != '"' && quoteSymbol != '\'') { |
354 | 0 | throw new IOException( |
355 | "Expected \" or ' around attribute value after " | |
356 | + "attribute " + attributeName); | |
357 | } | |
358 | 0 | StringBuffer attributeValue = new StringBuffer(); |
359 | 0 | while ((character = realRead()) != quoteSymbol) { |
360 | 0 | attributeValue.append((char) character); |
361 | } | |
362 | 0 | attributesFound.put( |
363 | attributeName.toString(), | |
364 | attributeValue.toString()); | |
365 | 0 | } |
366 | } | |
367 | 0 | return attributesFound; |
368 | } | |
369 | ||
370 | ||
371 | ||
372 | /** | |
373 | * The close method is overridden to prevent some class out of | |
374 | * our control from closing the stream (such as a SAX parser). | |
375 | * Use realClose() to finally close the stream for real. | |
376 | * @throws IOException to satisfy ancestor but will never happen. | |
377 | */ | |
378 | public void close() throws IOException { | |
379 | 0 | } |
380 | ||
381 | /** | |
382 | * Really close the input. | |
383 | * | |
384 | * @throws IOException if an I/O error occurs. | |
385 | */ | |
386 | public void realClose() throws IOException { | |
387 | 0 | super.close(); |
388 | 0 | } |
389 | ||
390 | private int realRead() throws IOException { | |
391 | 0 | int read = super.read(); |
392 | 0 | if (read == -1) { |
393 | 0 | throw new IOException("Tag " + tagName + " not found"); |
394 | } | |
395 | 0 | return read; |
396 | } | |
397 | ||
398 | } |