Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
BibtexParser |
|
| 7.125;7.125 | ||||
BibtexParser$NoLabelException |
|
| 7.125;7.125 |
1 | /* | |
2 | Copyright (C) 2003-06 David Weitzman, Nizar N. Batada, Morten O. Alver, Christopher Oezbek | |
3 | ||
4 | All programs in this directory and | |
5 | subdirectories are published under the GNU General Public License as | |
6 | described below. | |
7 | ||
8 | This program is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or (at | |
11 | your option) any later version. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
20 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
21 | USA | |
22 | ||
23 | Further information about the GNU GPL is available at: | |
24 | http://www.gnu.org/copyleft/gpl.ja.html | |
25 | ||
26 | */ | |
27 | ||
28 | package net.sf.jabref.imports; | |
29 | ||
30 | import java.io.BufferedReader; | |
31 | import java.io.IOException; | |
32 | import java.io.PushbackReader; | |
33 | import java.io.Reader; | |
34 | import java.io.StringReader; | |
35 | import java.util.Collection; | |
36 | import java.util.HashMap; | |
37 | import java.util.regex.Matcher; | |
38 | import java.util.regex.Pattern; | |
39 | ||
40 | import net.sf.jabref.BibtexDatabase; | |
41 | import net.sf.jabref.BibtexEntry; | |
42 | import net.sf.jabref.BibtexEntryType; | |
43 | import net.sf.jabref.BibtexFields; | |
44 | import net.sf.jabref.BibtexString; | |
45 | import net.sf.jabref.CustomEntryType; | |
46 | import net.sf.jabref.GUIGlobals; | |
47 | import net.sf.jabref.Globals; | |
48 | import net.sf.jabref.JabRefPreferences; | |
49 | import net.sf.jabref.KeyCollisionException; | |
50 | import net.sf.jabref.UnknownEntryType; | |
51 | import net.sf.jabref.Util; | |
52 | ||
53 | /** | |
54 | * Class for importing BibTeX-files. | |
55 | * | |
56 | * Use: | |
57 | * | |
58 | * BibtexParser parser = new BibtexParser(reader); | |
59 | * | |
60 | * ParserResult result = parser.parse(); | |
61 | * | |
62 | * or | |
63 | * | |
64 | * ParserResult result = BibtexParser.parse(reader); | |
65 | * | |
66 | * Can be used stand-alone. | |
67 | * | |
68 | * @author David Weitzman | |
69 | * @author Nizar N. Batada | |
70 | * @author Morten O. Alver | |
71 | * @author Christopher Oezbek | |
72 | */ | |
73 | public class BibtexParser { | |
74 | ||
75 | private PushbackReader _in; | |
76 | ||
77 | private BibtexDatabase _db; | |
78 | ||
79 | private HashMap<String, String> _meta; | |
80 | ||
81 | private HashMap<String, BibtexEntryType> entryTypes; | |
82 | ||
83 | 9772415 | private boolean _eof = false; |
84 | ||
85 | 9772415 | private int line = 1; |
86 | ||
87 | 9772415 | private FieldContentParser fieldContentParser = new FieldContentParser(); |
88 | ||
89 | private ParserResult _pr; | |
90 | ||
91 | 9766015 | private static final Integer LOOKAHEAD = 64; |
92 | ||
93 | 9772415 | public BibtexParser(Reader in) { |
94 | ||
95 | 9772415 | if (in == null) { |
96 | 0 | throw new NullPointerException(); |
97 | } | |
98 | 9772415 | if (Globals.prefs == null) { |
99 | 0 | Globals.prefs = JabRefPreferences.getInstance(); |
100 | } | |
101 | 9772415 | _in = new PushbackReader(in, LOOKAHEAD); |
102 | 9772415 | } |
103 | ||
104 | /** | |
105 | * Shortcut usage to create a Parser and read the input. | |
106 | * | |
107 | * @param in - | |
108 | * Reader to read from | |
109 | * @throws IOException | |
110 | */ | |
111 | public static ParserResult parse(Reader in) throws IOException { | |
112 | 0 | BibtexParser parser = new BibtexParser(in); |
113 | 0 | return parser.parse(); |
114 | } | |
115 | ||
116 | ||
117 | /** | |
118 | * Parses BibtexEntries from the given string and returns the collection of all entries found. | |
119 | * | |
120 | * @param bibtexString | |
121 | * | |
122 | * @return Returns null if an error occurred, returns an empty collection if no entries where found. | |
123 | */ | |
124 | public static Collection<BibtexEntry> fromString(String bibtexString){ | |
125 | 0 | StringReader reader = new StringReader(bibtexString); |
126 | 0 | BibtexParser parser = new BibtexParser(reader); |
127 | try { | |
128 | 0 | return parser.parse().getDatabase().getEntries(); |
129 | 0 | } catch (Exception e){ |
130 | 0 | return null; |
131 | } | |
132 | } | |
133 | ||
134 | /** | |
135 | * Parses BibtexEntries from the given string and returns one entry found (or null if none found) | |
136 | * | |
137 | * It is undetermined which entry is returned, so use this in case you know there is only one entry in the string. | |
138 | * | |
139 | * @param bibtexString | |
140 | * | |
141 | * @return The bibtexentry or null if non was found or an error occurred. | |
142 | */ | |
143 | public static BibtexEntry singleFromString(String bibtexString) { | |
144 | 0 | Collection<BibtexEntry> c = fromString(bibtexString); |
145 | 0 | if (c == null){ |
146 | 0 | return null; |
147 | } | |
148 | 0 | return c.iterator().next(); |
149 | } | |
150 | ||
151 | /** | |
152 | * Check whether the source is in the correct format for this importer. | |
153 | */ | |
154 | public static boolean isRecognizedFormat(Reader inOrig) throws IOException { | |
155 | // Our strategy is to look for the "@<type> {" line. | |
156 | 0 | BufferedReader in = new BufferedReader(inOrig); |
157 | ||
158 | 0 | Pattern pat1 = Pattern.compile("@[a-zA-Z]*\\s*\\{"); |
159 | ||
160 | String str; | |
161 | ||
162 | 0 | while ((str = in.readLine()) != null) { |
163 | ||
164 | 0 | if (pat1.matcher(str).find()) |
165 | 0 | return true; |
166 | 0 | else if (str.startsWith(GUIGlobals.SIGNATURE)) |
167 | 0 | return true; |
168 | } | |
169 | ||
170 | 0 | return false; |
171 | } | |
172 | ||
173 | private void skipWhitespace() throws IOException { | |
174 | int c; | |
175 | ||
176 | while (true) { | |
177 | 9772415 | c = read(); |
178 | 9772415 | if ((c == -1) || (c == 65535)) { |
179 | 0 | _eof = true; |
180 | 0 | return; |
181 | } | |
182 | ||
183 | 9772415 | if (Character.isWhitespace((char) c)) { |
184 | 0 | continue; |
185 | } else | |
186 | // found non-whitespace char | |
187 | // Util.pr("SkipWhitespace, stops: "+c); | |
188 | 9772415 | unread(c); |
189 | /* | |
190 | * try { Thread.currentThread().sleep(500); } catch | |
191 | * (InterruptedException ex) {} | |
192 | */ | |
193 | 9772415 | break; |
194 | } | |
195 | 9772415 | } |
196 | ||
197 | private String skipAndRecordWhitespace(int j) throws IOException { | |
198 | int c; | |
199 | 0 | StringBuffer sb = new StringBuffer(); |
200 | 0 | if (j != ' ') |
201 | 0 | sb.append((char) j); |
202 | while (true) { | |
203 | 0 | c = read(); |
204 | 0 | if ((c == -1) || (c == 65535)) { |
205 | 0 | _eof = true; |
206 | 0 | return sb.toString(); |
207 | } | |
208 | ||
209 | 0 | if (Character.isWhitespace((char) c)) { |
210 | 0 | if (c != ' ') |
211 | 0 | sb.append((char) c); |
212 | continue; | |
213 | } else | |
214 | // found non-whitespace char | |
215 | // Util.pr("SkipWhitespace, stops: "+c); | |
216 | 0 | unread(c); |
217 | /* | |
218 | * try { Thread.currentThread().sleep(500); } catch | |
219 | * (InterruptedException ex) {} | |
220 | */ | |
221 | 0 | break; |
222 | } | |
223 | 0 | return sb.toString(); |
224 | } | |
225 | ||
226 | /** | |
227 | * Will parse the BibTex-Data found when reading from reader. | |
228 | * | |
229 | * The reader will be consumed. | |
230 | * | |
231 | * Multiple calls to parse() return the same results | |
232 | * | |
233 | * @return ParserResult | |
234 | * @throws IOException | |
235 | */ | |
236 | public ParserResult parse() throws IOException { | |
237 | ||
238 | // If we already parsed this, just return it. | |
239 | 9772415 | if (_pr != null) |
240 | 0 | return _pr; |
241 | ||
242 | 9772415 | _db = new BibtexDatabase(); // Bibtex related contents. |
243 | 9772415 | _meta = new HashMap<String, String>(); // Metadata in comments for Bibkeeper. |
244 | 9772415 | entryTypes = new HashMap<String, BibtexEntryType>(); // To store custem entry types parsed. |
245 | 9772415 | _pr = new ParserResult(_db, _meta, entryTypes); |
246 | ||
247 | // First see if we can find the version number of the JabRef version that | |
248 | // wrote the file: | |
249 | 9772415 | String versionNum = readJabRefVersionNumber(); |
250 | 9772415 | if (versionNum != null) { |
251 | 9772415 | _pr.setJabrefVersion(versionNum); |
252 | 9772415 | setMajorMinorVersions(); |
253 | } | |
254 | else { | |
255 | // No version number found. However, we have only | |
256 | } | |
257 | ||
258 | 9772415 | skipWhitespace(); |
259 | ||
260 | try { | |
261 | 9772415 | while (!_eof) { |
262 | 9772415 | boolean found = consumeUncritically('@'); |
263 | 9772415 | if (!found) |
264 | 9772415 | break; |
265 | 0 | skipWhitespace(); |
266 | 0 | String entryType = parseTextToken(); |
267 | 0 | BibtexEntryType tp = BibtexEntryType.getType(entryType); |
268 | 0 | boolean isEntry = (tp != null); |
269 | // Util.pr(tp.getName()); | |
270 | 0 | if (!isEntry) { |
271 | // The entry type name was not recognized. This can mean | |
272 | // that it is a string, preamble, or comment. If so, | |
273 | // parse and set accordingly. If not, assume it is an entry | |
274 | // with an unknown type. | |
275 | 0 | if (entryType.toLowerCase().equals("preamble")) { |
276 | 0 | _db.setPreamble(parsePreamble()); |
277 | 0 | } else if (entryType.toLowerCase().equals("string")) { |
278 | 0 | BibtexString bs = parseString(); |
279 | try { | |
280 | 0 | _db.addString(bs); |
281 | 0 | } catch (KeyCollisionException ex) { |
282 | 0 | _pr.addWarning(Globals.lang("Duplicate string name") + ": " |
283 | + bs.getName()); | |
284 | // ex.printStackTrace(); | |
285 | 0 | } |
286 | 0 | } else if (entryType.toLowerCase().equals("comment")) { |
287 | 0 | StringBuffer commentBuf = parseBracketedTextExactly(); |
288 | /** | |
289 | * | |
290 | * Metadata are used to store Bibkeeper-specific | |
291 | * information in .bib files. | |
292 | * | |
293 | * Metadata are stored in bibtex files in the format | |
294 | * | |
295 | * @comment{jabref-meta: type:data0;data1;data2;...} | |
296 | * | |
297 | * Each comment that starts with the META_FLAG is stored | |
298 | * in the meta HashMap, with type as key. Unluckily, the | |
299 | * old META_FLAG bibkeeper-meta: was used in JabRef 1.0 | |
300 | * and 1.1, so we need to support it as well. At least | |
301 | * for a while. We'll always save with the new one. | |
302 | */ | |
303 | 0 | String comment = commentBuf.toString().replaceAll("[\\x0d\\x0a]", ""); |
304 | 0 | if (comment.substring(0, |
305 | Math.min(comment.length(), GUIGlobals.META_FLAG.length())).equals( | |
306 | GUIGlobals.META_FLAG) | |
307 | || comment.substring(0, | |
308 | Math.min(comment.length(), GUIGlobals.META_FLAG_OLD.length())) | |
309 | .equals(GUIGlobals.META_FLAG_OLD)) { | |
310 | ||
311 | String rest; | |
312 | 0 | if (comment.substring(0, GUIGlobals.META_FLAG.length()).equals( |
313 | GUIGlobals.META_FLAG)) | |
314 | 0 | rest = comment.substring(GUIGlobals.META_FLAG.length()); |
315 | else | |
316 | 0 | rest = comment.substring(GUIGlobals.META_FLAG_OLD.length()); |
317 | ||
318 | 0 | int pos = rest.indexOf(':'); |
319 | ||
320 | 0 | if (pos > 0) |
321 | 0 | _meta.put(rest.substring(0, pos), rest.substring(pos + 1)); |
322 | // We remove all line breaks in the metadata - these | |
323 | // will have been inserted | |
324 | // to prevent too long lines when the file was | |
325 | // saved, and are not part of the data. | |
326 | } | |
327 | ||
328 | /** | |
329 | * A custom entry type can also be stored in a | |
330 | * | |
331 | * @comment: | |
332 | */ | |
333 | 0 | if (comment.substring(0, |
334 | Math.min(comment.length(), GUIGlobals.ENTRYTYPE_FLAG.length())).equals( | |
335 | GUIGlobals.ENTRYTYPE_FLAG)) { | |
336 | ||
337 | 0 | CustomEntryType typ = CustomEntryType.parseEntryType(comment); |
338 | 0 | entryTypes.put(typ.getName().toLowerCase(), typ); |
339 | ||
340 | } | |
341 | 0 | } else { |
342 | // The entry type was not recognized. This may mean that | |
343 | // it is a custom entry type whose definition will | |
344 | // appear | |
345 | // at the bottom of the file. So we use an | |
346 | // UnknownEntryType | |
347 | // to remember the type name by. | |
348 | 0 | tp = new UnknownEntryType(entryType.toLowerCase()); |
349 | // System.out.println("unknown type: "+entryType); | |
350 | 0 | isEntry = true; |
351 | } | |
352 | } | |
353 | ||
354 | 0 | if (isEntry) // True if not comment, preamble or string. |
355 | { | |
356 | /** | |
357 | * Morten Alver 13 Aug 2006: Trying to make the parser more | |
358 | * robust. If an exception is thrown when parsing an entry, | |
359 | * drop the entry and try to resume parsing. Add a warning | |
360 | * for the user. | |
361 | * | |
362 | * An alternative solution is to try rescuing the entry for | |
363 | * which parsing failed, by returning the entry with the | |
364 | * exception and adding it before parsing is continued. | |
365 | */ | |
366 | try { | |
367 | 0 | BibtexEntry be = parseEntry(tp); |
368 | ||
369 | 0 | boolean duplicateKey = _db.insertEntry(be); |
370 | 0 | if (duplicateKey) // JZTODO lyrics |
371 | 0 | _pr.addDuplicateKey(be.getCiteKey()); |
372 | /*_pr.addWarning(Globals.lang("duplicate BibTeX key") + ": " | |
373 | + be.getCiteKey() + " (" | |
374 | + Globals.lang("grouping may not work for this entry") + ")"); */ | |
375 | 0 | else if (be.getCiteKey() == null || be.getCiteKey().equals("")) { |
376 | 0 | _pr.addWarning(Globals.lang("empty BibTeX key") + ": " |
377 | + be.getAuthorTitleYear(40) + " (" | |
378 | + Globals.lang("grouping may not work for this entry") + ")"); | |
379 | } | |
380 | 0 | } catch (IOException ex) { |
381 | 0 | ex.printStackTrace(); |
382 | 0 | _pr.addWarning(Globals.lang("Error occured when parsing entry") + ": '" |
383 | + ex.getMessage() + "'. " + Globals.lang("Skipped entry.")); | |
384 | ||
385 | 0 | } |
386 | } | |
387 | ||
388 | 0 | skipWhitespace(); |
389 | 0 | } |
390 | ||
391 | // Before returning the database, update entries with unknown type | |
392 | // based on parsed type definitions, if possible. | |
393 | 9772415 | checkEntryTypes(_pr); |
394 | ||
395 | 9772415 | return _pr; |
396 | 0 | } catch (KeyCollisionException kce) { |
397 | // kce.printStackTrace(); | |
398 | 0 | throw new IOException("Duplicate ID in bibtex file: " + kce.toString()); |
399 | } | |
400 | } | |
401 | ||
402 | private int peek() throws IOException { | |
403 | 342034525 | int c = read(); |
404 | 342034525 | unread(c); |
405 | ||
406 | 342034525 | return c; |
407 | } | |
408 | ||
409 | private int read() throws IOException { | |
410 | 938151890 | int c = _in.read(); |
411 | 938151890 | if (c == '\n') |
412 | 29317245 | line++; |
413 | 938151890 | return c; |
414 | } | |
415 | ||
416 | private void unread(int c) throws IOException { | |
417 | 351806940 | if (c == '\n') |
418 | 0 | line--; |
419 | 351806940 | _in.unread(c); |
420 | 351806940 | } |
421 | ||
422 | public BibtexString parseString() throws IOException { | |
423 | // Util.pr("Parsing string"); | |
424 | 0 | skipWhitespace(); |
425 | 0 | consume('{', '('); |
426 | // while (read() != '}'); | |
427 | 0 | skipWhitespace(); |
428 | // Util.pr("Parsing string name"); | |
429 | 0 | String name = parseTextToken(); |
430 | // Util.pr("Parsed string name"); | |
431 | 0 | skipWhitespace(); |
432 | // Util.pr("Now the contents"); | |
433 | 0 | consume('='); |
434 | 0 | String content = parseFieldContent(name); |
435 | // Util.pr("Now I'm going to consume a }"); | |
436 | 0 | consume('}', ')'); |
437 | // Util.pr("Finished string parsing."); | |
438 | 0 | String id = Util.createNeutralId(); |
439 | 0 | return new BibtexString(id, name, content); |
440 | } | |
441 | ||
442 | public String parsePreamble() throws IOException { | |
443 | 0 | return parseBracketedText().toString(); |
444 | } | |
445 | ||
446 | public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException { | |
447 | 0 | String id = Util.createNeutralId();// createId(tp, _db); |
448 | 0 | BibtexEntry result = new BibtexEntry(id, tp); |
449 | 0 | skipWhitespace(); |
450 | 0 | consume('{', '('); |
451 | 0 | int c = peek(); |
452 | 0 | if ((c != '\n') && (c != '\r')) |
453 | 0 | skipWhitespace(); |
454 | 0 | String key = null; |
455 | 0 | boolean doAgain = true; |
456 | 0 | while (doAgain) { |
457 | 0 | doAgain = false; |
458 | try { | |
459 | 0 | if (key != null) |
460 | 0 | key = key + parseKey();// parseTextToken(), |
461 | else | |
462 | 0 | key = parseKey(); |
463 | 0 | } catch (NoLabelException ex) { |
464 | // This exception will be thrown if the entry lacks a key | |
465 | // altogether, like in "@article{ author = { ...". | |
466 | // It will also be thrown if a key contains =. | |
467 | 0 | c = (char) peek(); |
468 | 0 | if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) { |
469 | 0 | String fieldName = ex.getMessage().trim().toLowerCase(); |
470 | 0 | String cont = parseFieldContent(fieldName); |
471 | 0 | result.setField(fieldName, cont); |
472 | 0 | } else { |
473 | 0 | if (key != null) |
474 | 0 | key = key + ex.getMessage() + "="; |
475 | else | |
476 | 0 | key = ex.getMessage() + "="; |
477 | 0 | doAgain = true; |
478 | } | |
479 | 0 | } |
480 | } | |
481 | ||
482 | 0 | if ((key != null) && key.equals("")) |
483 | 0 | key = null; |
484 | ||
485 | 0 | result.setField(BibtexFields.KEY_FIELD, key); |
486 | 0 | skipWhitespace(); |
487 | ||
488 | while (true) { | |
489 | 0 | c = peek(); |
490 | 0 | if ((c == '}') || (c == ')')) { |
491 | 0 | break; |
492 | } | |
493 | ||
494 | 0 | if (c == ',') |
495 | 0 | consume(','); |
496 | ||
497 | 0 | skipWhitespace(); |
498 | ||
499 | 0 | c = peek(); |
500 | 0 | if ((c == '}') || (c == ')')) { |
501 | 0 | break; |
502 | } | |
503 | 0 | parseField(result); |
504 | } | |
505 | ||
506 | 0 | consume('}', ')'); |
507 | 0 | return result; |
508 | } | |
509 | ||
510 | private void parseField(BibtexEntry entry) throws IOException { | |
511 | 0 | String key = parseTextToken().toLowerCase(); |
512 | // Util.pr("Field: _"+key+"_"); | |
513 | 0 | skipWhitespace(); |
514 | 0 | consume('='); |
515 | 0 | String content = parseFieldContent(key); |
516 | // Now, if the field in question is set up to be fitted automatically | |
517 | // with braces around | |
518 | // capitals, we should remove those now when reading the field: | |
519 | 0 | if (Globals.prefs.putBracesAroundCapitals(key)) { |
520 | 0 | content = Util.removeBracesAroundCapitals(content); |
521 | } | |
522 | 0 | if (content.length() > 0) { |
523 | 0 | if (entry.getField(key) == null) |
524 | 0 | entry.setField(key, content); |
525 | else { | |
526 | // The following hack enables the parser to deal with multiple | |
527 | // author or | |
528 | // editor lines, stringing them together instead of getting just | |
529 | // one of them. | |
530 | // Multiple author or editor lines are not allowed by the bibtex | |
531 | // format, but | |
532 | // at least one online database exports bibtex like that, making | |
533 | // it inconvenient | |
534 | // for users if JabRef didn't accept it. | |
535 | 0 | if (key.equals("author") || key.equals("editor")) |
536 | 0 | entry.setField(key, entry.getField(key) + " and " + content); |
537 | } | |
538 | } | |
539 | 0 | } |
540 | ||
541 | private String parseFieldContent(String key) throws IOException { | |
542 | 0 | skipWhitespace(); |
543 | 0 | StringBuffer value = new StringBuffer(); |
544 | 0 | int c = '.'; |
545 | ||
546 | 0 | while (((c = peek()) != ',') && (c != '}') && (c != ')')) { |
547 | ||
548 | 0 | if (_eof) { |
549 | 0 | throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); |
550 | } | |
551 | 0 | if (c == '"') { |
552 | 0 | StringBuffer text = parseQuotedFieldExactly(); |
553 | 0 | value.append(fieldContentParser.format(text)); |
554 | /* | |
555 | * | |
556 | * The following code doesn't handle {"} correctly: // value is | |
557 | * a string consume('"'); | |
558 | * | |
559 | * while (!((peek() == '"') && (j != '\\'))) { j = read(); if | |
560 | * (_eof || (j == -1) || (j == 65535)) { throw new | |
561 | * RuntimeException("Error in line "+line+ ": EOF in | |
562 | * mid-string"); } | |
563 | * | |
564 | * value.append((char) j); } | |
565 | * | |
566 | * consume('"'); | |
567 | */ | |
568 | 0 | } else if (c == '{') { |
569 | // Value is a string enclosed in brackets. There can be pairs | |
570 | // of brackets inside of a field, so we need to count the | |
571 | // brackets to know when the string is finished. | |
572 | 0 | StringBuffer text = parseBracketedTextExactly(); |
573 | 0 | value.append(fieldContentParser.format(text, key)); |
574 | ||
575 | 0 | } else if (Character.isDigit((char) c)) { // value is a number |
576 | ||
577 | 0 | String numString = parseTextToken(); |
578 | // Morten Alver 2007-07-04: I don't see the point of parsing the integer | |
579 | // and converting it back to a string, so I'm removing the construct below | |
580 | // the following line: | |
581 | 0 | value.append(numString); |
582 | /* | |
583 | try { | |
584 | // Fixme: What is this for? | |
585 | value.append(String.valueOf(Integer.parseInt(numString))); | |
586 | } catch (NumberFormatException e) { | |
587 | // If Integer could not be parsed then just add the text | |
588 | // Used to fix [ 1594123 ] Failure to import big numbers | |
589 | value.append(numString); | |
590 | } | |
591 | */ | |
592 | 0 | } else if (c == '#') { |
593 | 0 | consume('#'); |
594 | } else { | |
595 | 0 | String textToken = parseTextToken(); |
596 | 0 | if (textToken.length() == 0) |
597 | 0 | throw new IOException("Error in line " + line + " or above: " |
598 | + "Empty text token.\nThis could be caused " | |
599 | + "by a missing comma between two fields."); | |
600 | 0 | value.append("#").append(textToken).append("#"); |
601 | // Util.pr(parseTextToken()); | |
602 | // throw new RuntimeException("Unknown field type"); | |
603 | } | |
604 | 0 | skipWhitespace(); |
605 | } | |
606 | // Util.pr("Returning field content: "+value.toString()); | |
607 | ||
608 | // Check if we are to strip extra pairs of braces before returning: | |
609 | 0 | if (Globals.prefs.getBoolean("autoDoubleBraces")) { |
610 | // Do it: | |
611 | while ((value.length() > 1) && (value.charAt(0) == '{') | |
612 | 0 | && (value.charAt(value.length() - 1) == '}')) { |
613 | 0 | value.deleteCharAt(value.length() - 1); |
614 | 0 | value.deleteCharAt(0); |
615 | } | |
616 | // Problem: if the field content is "{DNA} blahblah {EPA}", one pair | |
617 | // too much will be removed. | |
618 | // Check if this is the case, and re-add as many pairs as needed. | |
619 | 0 | while (hasNegativeBraceCount(value.toString())) { |
620 | 0 | value.insert(0, '{'); |
621 | 0 | value.append('}'); |
622 | } | |
623 | ||
624 | } | |
625 | 0 | return value.toString(); |
626 | ||
627 | } | |
628 | ||
629 | /** | |
630 | * Originalinhalt nach parseFieldContent(String) verschoben. | |
631 | * @return | |
632 | * @throws IOException | |
633 | */ | |
634 | // private String parseFieldContent() throws IOException { | |
635 | // return parseFieldContent(null); | |
636 | // } | |
637 | ||
638 | /** | |
639 | * Check if a string at any point has had more ending braces (}) than | |
640 | * opening ones ({). Will e.g. return true for the string "DNA} blahblal | |
641 | * {EPA" | |
642 | * | |
643 | * @param s | |
644 | * The string to check. | |
645 | * @return true if at any index the brace count is negative. | |
646 | */ | |
647 | private boolean hasNegativeBraceCount(String s) { | |
648 | // System.out.println(s); | |
649 | 0 | int i = 0, count = 0; |
650 | 0 | while (i < s.length()) { |
651 | 0 | if (s.charAt(i) == '{') |
652 | 0 | count++; |
653 | 0 | else if (s.charAt(i) == '}') |
654 | 0 | count--; |
655 | 0 | if (count < 0) |
656 | 0 | return true; |
657 | 0 | i++; |
658 | } | |
659 | 0 | return false; |
660 | } | |
661 | ||
662 | /** | |
663 | * This method is used to parse string labels, field names, entry type and | |
664 | * numbers outside brackets. | |
665 | */ | |
666 | private String parseTextToken() throws IOException { | |
667 | 0 | StringBuffer token = new StringBuffer(20); |
668 | ||
669 | while (true) { | |
670 | 0 | int c = read(); |
671 | // Util.pr(".. "+c); | |
672 | 0 | if (c == -1) { |
673 | 0 | _eof = true; |
674 | ||
675 | 0 | return token.toString(); |
676 | } | |
677 | ||
678 | 0 | if (Character.isLetterOrDigit((char) c) || (c == ':') || (c == '-') || (c == '_') |
679 | || (c == '*') || (c == '+') || (c == '.') || (c == '/') || (c == '\'')) { | |
680 | 0 | token.append((char) c); |
681 | } else { | |
682 | 0 | unread(c); |
683 | // Util.pr("Pasted text token: "+token.toString()); | |
684 | 0 | return token.toString(); |
685 | } | |
686 | 0 | } |
687 | } | |
688 | ||
689 | ||
690 | /** | |
691 | * Tries to restore the key | |
692 | * | |
693 | * @return rest of key on success, otherwise empty string | |
694 | * @throws IOException | |
695 | * on Reader-Error | |
696 | */ | |
697 | private String fixKey() throws IOException { | |
698 | 0 | StringBuilder key = new StringBuilder(); |
699 | 0 | int lookahead_used = 0; |
700 | char currentChar; | |
701 | ||
702 | // Find a char which ends key (','&&'\n') or entryfield ('='): | |
703 | do { | |
704 | 0 | currentChar = (char) read(); |
705 | 0 | key.append(currentChar); |
706 | 0 | lookahead_used++; |
707 | } while ((currentChar != ',' && currentChar != '\n' && currentChar != '=') | |
708 | 0 | && (lookahead_used < LOOKAHEAD)); |
709 | ||
710 | // Consumed a char too much, back into reader and remove from key: | |
711 | 0 | unread(currentChar); |
712 | 0 | key.deleteCharAt(key.length() - 1); |
713 | ||
714 | // Restore if possible: | |
715 | 0 | switch (currentChar) { |
716 | case '=': | |
717 | ||
718 | // Get entryfieldname, push it back and take rest as key | |
719 | 0 | key = key.reverse(); |
720 | ||
721 | 0 | boolean matchedAlpha = false; |
722 | 0 | for (int i = 0; i < key.length(); i++) { |
723 | 0 | currentChar = key.charAt(i); |
724 | ||
725 | /// Skip spaces: | |
726 | 0 | if (!matchedAlpha && currentChar == ' ') { |
727 | 0 | continue; |
728 | } | |
729 | 0 | matchedAlpha = true; |
730 | ||
731 | // Begin of entryfieldname (e.g. author) -> push back: | |
732 | 0 | unread(currentChar); |
733 | 0 | if (currentChar == ' ' || currentChar == '\n') { |
734 | ||
735 | /* | |
736 | * found whitespaces, entryfieldname completed -> key in | |
737 | * keybuffer, skip whitespaces | |
738 | */ | |
739 | 0 | StringBuilder newKey = new StringBuilder(); |
740 | 0 | for (int j = i; j < key.length(); j++) { |
741 | 0 | currentChar = key.charAt(j); |
742 | 0 | if (!Character.isWhitespace(currentChar)) { |
743 | 0 | newKey.append(currentChar); |
744 | } | |
745 | } | |
746 | ||
747 | // Finished, now reverse newKey and remove whitespaces: | |
748 | 0 | _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key.", |
749 | String.valueOf(line))); | |
750 | 0 | key = newKey.reverse(); |
751 | } | |
752 | } | |
753 | 0 | break; |
754 | ||
755 | case ',': | |
756 | ||
757 | 0 | _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key (contains whitespaces).", |
758 | String.valueOf(line))); | |
759 | ||
760 | case '\n': | |
761 | ||
762 | 0 | _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key (comma missing).", |
763 | String.valueOf(line))); | |
764 | ||
765 | 0 | break; |
766 | ||
767 | default: | |
768 | ||
769 | // No more lookahead, give up: | |
770 | 0 | unreadBuffer(key); |
771 | 0 | return ""; |
772 | } | |
773 | ||
774 | 0 | return removeWhitespaces(key).toString(); |
775 | } | |
776 | ||
777 | /** | |
778 | * removes whitespaces from <code>sb</code> | |
779 | * | |
780 | * @param sb | |
781 | * @return | |
782 | */ | |
783 | private StringBuilder removeWhitespaces(StringBuilder sb) { | |
784 | 0 | StringBuilder newSb = new StringBuilder(); |
785 | char current; | |
786 | 0 | for (int i = 0; i < sb.length(); ++i) { |
787 | 0 | current = sb.charAt(i); |
788 | 0 | if (!Character.isWhitespace(current)) |
789 | 0 | newSb.append(current); |
790 | } | |
791 | 0 | return newSb; |
792 | } | |
793 | ||
794 | /** | |
795 | * pushes buffer back into input | |
796 | * | |
797 | * @param sb | |
798 | * @throws IOException | |
799 | * can be thrown if buffer is bigger than LOOKAHEAD | |
800 | */ | |
801 | private void unreadBuffer(StringBuilder sb) throws IOException { | |
802 | 0 | for (int i = sb.length() - 1; i >= 0; --i) { |
803 | 0 | unread(sb.charAt(i)); |
804 | } | |
805 | 0 | } |
806 | ||
807 | ||
808 | /** | |
809 | * This method is used to parse the bibtex key for an entry. | |
810 | */ | |
811 | private String parseKey() throws IOException, NoLabelException { | |
812 | 0 | StringBuffer token = new StringBuffer(20); |
813 | ||
814 | while (true) { | |
815 | 0 | int c = read(); |
816 | // Util.pr(".. '"+(char)c+"'\t"+c); | |
817 | 0 | if (c == -1) { |
818 | 0 | _eof = true; |
819 | ||
820 | 0 | return token.toString(); |
821 | } | |
822 | ||
823 | // Ikke: #{}\uFFFD~\uFFFD | |
824 | // | |
825 | // G\uFFFDr: $_*+.-\/?"^ | |
826 | 0 | if (!Character.isWhitespace((char) c) |
827 | && (Character.isLetterOrDigit((char) c) || ((c != '#') && (c != '{') && (c != '}') | |
828 | && (c != '\uFFFD') && (c != '~') && (c != '\uFFFD') && (c != ',') && (c != '=')))) { | |
829 | 0 | token.append((char) c); |
830 | } else { | |
831 | ||
832 | 0 | if (Character.isWhitespace((char) c)) { |
833 | // We have encountered white space instead of the comma at | |
834 | // the end of | |
835 | // the key. Possibly the comma is missing, so we try to | |
836 | // return what we | |
837 | // have found, as the key and try to restore the rest in fixKey(). | |
838 | 0 | return token.toString()+fixKey(); |
839 | 0 | } else if (c == ',') { |
840 | 0 | unread(c); |
841 | 0 | return token.toString(); |
842 | // } else if (Character.isWhitespace((char)c)) { | |
843 | // throw new NoLabelException(token.toString()); | |
844 | 0 | } else if (c == '=') { |
845 | // If we find a '=' sign, it is either an error, or | |
846 | // the entry lacked a comma signifying the end of the key. | |
847 | ||
848 | 0 | return token.toString(); |
849 | // throw new NoLabelException(token.toString()); | |
850 | ||
851 | } else | |
852 | 0 | throw new IOException("Error in line " + line + ":" + "Character '" + (char) c |
853 | + "' is not " + "allowed in bibtex keys."); | |
854 | ||
855 | } | |
856 | 0 | } |
857 | ||
858 | } | |
859 | ||
860 | private class NoLabelException extends Exception { | |
861 | 0 | public NoLabelException(String hasRead) { |
862 | 0 | super(hasRead); |
863 | 0 | } |
864 | } | |
865 | ||
866 | private StringBuffer parseBracketedText() throws IOException { | |
867 | // Util.pr("Parse bracketed text"); | |
868 | 0 | StringBuffer value = new StringBuffer(); |
869 | ||
870 | 0 | consume('{'); |
871 | ||
872 | 0 | int brackets = 0; |
873 | ||
874 | 0 | while (!((peek() == '}') && (brackets == 0))) { |
875 | ||
876 | 0 | int j = read(); |
877 | 0 | if ((j == -1) || (j == 65535)) { |
878 | 0 | throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); |
879 | 0 | } else if (j == '{') |
880 | 0 | brackets++; |
881 | 0 | else if (j == '}') |
882 | 0 | brackets--; |
883 | ||
884 | // If we encounter whitespace of any kind, read it as a | |
885 | // simple space, and ignore any others that follow immediately. | |
886 | /* | |
887 | * if (j == '\n') { if (peek() == '\n') value.append('\n'); } else | |
888 | */ | |
889 | 0 | if (Character.isWhitespace((char) j)) { |
890 | 0 | String whs = skipAndRecordWhitespace(j); |
891 | ||
892 | // System.out.println(":"+whs+":"); | |
893 | ||
894 | 0 | if (!whs.equals("") && !whs.equals("\n\t")) { // && |
895 | // !whs.equals("\n")) | |
896 | ||
897 | 0 | whs = whs.replaceAll("\t", ""); // Remove tabulators. |
898 | ||
899 | // while (whs.endsWith("\t")) | |
900 | // whs = whs.substring(0, whs.length()-1); | |
901 | ||
902 | 0 | value.append(whs); |
903 | ||
904 | } else { | |
905 | 0 | value.append(' '); |
906 | } | |
907 | ||
908 | 0 | } else |
909 | 0 | value.append((char) j); |
910 | ||
911 | 0 | } |
912 | ||
913 | 0 | consume('}'); |
914 | ||
915 | 0 | return value; |
916 | } | |
917 | ||
918 | private StringBuffer parseBracketedTextExactly() throws IOException { | |
919 | ||
920 | 0 | StringBuffer value = new StringBuffer(); |
921 | ||
922 | 0 | consume('{'); |
923 | ||
924 | 0 | int brackets = 0; |
925 | ||
926 | 0 | while (!((peek() == '}') && (brackets == 0))) { |
927 | ||
928 | 0 | int j = read(); |
929 | 0 | if ((j == -1) || (j == 65535)) { |
930 | 0 | throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); |
931 | 0 | } else if (j == '{') |
932 | 0 | brackets++; |
933 | 0 | else if (j == '}') |
934 | 0 | brackets--; |
935 | ||
936 | 0 | value.append((char) j); |
937 | ||
938 | 0 | } |
939 | ||
940 | 0 | consume('}'); |
941 | ||
942 | 0 | return value; |
943 | } | |
944 | ||
945 | private StringBuffer parseQuotedFieldExactly() throws IOException { | |
946 | ||
947 | 0 | StringBuffer value = new StringBuffer(); |
948 | ||
949 | 0 | consume('"'); |
950 | ||
951 | 0 | int brackets = 0; |
952 | ||
953 | 0 | while (!((peek() == '"') && (brackets == 0))) { |
954 | ||
955 | 0 | int j = read(); |
956 | 0 | if ((j == -1) || (j == 65535)) { |
957 | 0 | throw new RuntimeException("Error in line " + line + ": EOF in mid-string"); |
958 | 0 | } else if (j == '{') |
959 | 0 | brackets++; |
960 | 0 | else if (j == '}') |
961 | 0 | brackets--; |
962 | ||
963 | 0 | value.append((char) j); |
964 | ||
965 | 0 | } |
966 | ||
967 | 0 | consume('"'); |
968 | ||
969 | 0 | return value; |
970 | } | |
971 | ||
972 | private void consume(char expected) throws IOException { | |
973 | 0 | int c = read(); |
974 | ||
975 | 0 | if (c != expected) { |
976 | 0 | throw new RuntimeException("Error in line " + line + ": Expected " + expected |
977 | + " but received " + (char) c); | |
978 | } | |
979 | ||
980 | 0 | } |
981 | ||
982 | private boolean consumeUncritically(char expected) throws IOException { | |
983 | int c; | |
984 | 185675885 | while (((c = read()) != expected) && (c != -1) && (c != 65535)){ |
985 | // do nothing | |
986 | } | |
987 | ||
988 | 9772415 | if ((c == -1) || (c == 65535)) |
989 | 9772415 | _eof = true; |
990 | ||
991 | // Return true if we actually found the character we were looking for: | |
992 | 9772415 | return c == expected; |
993 | } | |
994 | ||
995 | private void consume(char expected1, char expected2) throws IOException { | |
996 | // Consumes one of the two, doesn't care which appears. | |
997 | ||
998 | 0 | int c = read(); |
999 | ||
1000 | 0 | if ((c != expected1) && (c != expected2)) { |
1001 | 0 | throw new RuntimeException("Error in line " + line + ": Expected " + expected1 + " or " |
1002 | + expected2 + " but received " + c); | |
1003 | ||
1004 | } | |
1005 | ||
1006 | 0 | } |
1007 | ||
1008 | public void checkEntryTypes(ParserResult _pr) { | |
1009 | ||
1010 | 9772415 | for (BibtexEntry be : _db.getEntries()){ |
1011 | 0 | if (be.getType() instanceof UnknownEntryType) { |
1012 | // Look up the unknown type name in our map of parsed types: | |
1013 | ||
1014 | 0 | Object o = entryTypes.get(be.getType().getName().toLowerCase()); |
1015 | 0 | if (o != null) { |
1016 | 0 | BibtexEntryType type = (BibtexEntryType) o; |
1017 | 0 | be.setType(type); |
1018 | 0 | } else { |
1019 | // System.out.println("Unknown entry type: | |
1020 | // "+be.getType().getName()); | |
1021 | 0 | _pr |
1022 | .addWarning(Globals.lang("unknown entry type") + ": " | |
1023 | + be.getType().getName() + ". " + Globals.lang("Type set to 'other'") | |
1024 | + "."); | |
1025 | 0 | be.setType(BibtexEntryType.OTHER); |
1026 | } | |
1027 | 0 | } |
1028 | } | |
1029 | 9772415 | } |
1030 | ||
1031 | /** | |
1032 | * Read the JabRef signature, if any, and find what version number is given. | |
1033 | * This method advances the file reader only as far as the end of the first line of | |
1034 | * the JabRef signature, or up until the point where the read characters don't match | |
1035 | * the signature. This should ensure that the parser can continue from that spot without | |
1036 | * resetting the reader, without the risk of losing important contents. | |
1037 | * | |
1038 | * @return The version number, or null if not found. | |
1039 | * @throws IOException | |
1040 | */ | |
1041 | private String readJabRefVersionNumber() throws IOException { | |
1042 | 9772415 | StringBuffer headerText = new StringBuffer(); |
1043 | ||
1044 | 9772415 | boolean keepon = true; |
1045 | 9772415 | int piv = 0; |
1046 | int c; | |
1047 | ||
1048 | // We start by reading the standard part of the signature, which precedes | |
1049 | // the version number: | |
1050 | // This file was created with JabRef X.y. | |
1051 | 342034525 | while (keepon) { |
1052 | 342034525 | c = peek(); |
1053 | 342034525 | headerText.append((char) c); |
1054 | 342034525 | if ((piv == 0) && (Character.isWhitespace((char) c) || (c == '%'))) |
1055 | 19544830 | read(); |
1056 | 322489695 | else if (c == GUIGlobals.SIGNATURE.charAt(piv)) { |
1057 | 322489695 | piv++; |
1058 | 322489695 | read(); |
1059 | } | |
1060 | else { | |
1061 | 0 | keepon = false; |
1062 | 0 | return null; |
1063 | } | |
1064 | ||
1065 | // Check if we've reached the end of the signature's standard part: | |
1066 | 342034525 | if (piv == GUIGlobals.SIGNATURE.length()) { |
1067 | 9772415 | keepon = false; |
1068 | ||
1069 | // Found the standard part. Now read the version number: | |
1070 | 9772415 | StringBuilder sb = new StringBuilder(); |
1071 | 58634540 | while (((c=read()) != '\n') && (c != -1)) |
1072 | 48862125 | sb.append((char)c); |
1073 | 9772415 | String versionNum = sb.toString().trim(); |
1074 | // See if it fits the X.y. pattern: | |
1075 | 9772415 | if (Pattern.compile("[1-9]+\\.[1-9A-Za-z ]+\\.").matcher(versionNum).matches()) { |
1076 | // It matched. Remove the last period and return: | |
1077 | 9772415 | return versionNum.substring(0, versionNum.length()-1); |
1078 | } | |
1079 | 0 | else if (Pattern.compile("[1-9]+\\.[1-9]\\.[1-9A-Za-z ]+\\.").matcher(versionNum).matches()) { |
1080 | // It matched. Remove the last period and return: | |
1081 | 0 | return versionNum.substring(0, versionNum.length()-1); |
1082 | } | |
1083 | ||
1084 | 0 | } |
1085 | } | |
1086 | ||
1087 | 0 | return null; |
1088 | } | |
1089 | ||
1090 | /** | |
1091 | * After a JabRef version number has been parsed and put into _pr, | |
1092 | * parse the version number to determine the JabRef major and minor version | |
1093 | * number | |
1094 | */ | |
1095 | private void setMajorMinorVersions() { | |
1096 | 9772415 | String v = _pr.getJabrefVersion(); |
1097 | 9772415 | Pattern p = Pattern.compile("([0-9]+)\\.([0-9]+).*"); |
1098 | 9772415 | Pattern p2 = Pattern.compile("([0-9]+)\\.([0-9]+)\\.([0-9]+).*"); |
1099 | 9772415 | Matcher m = p.matcher(v); |
1100 | 9772415 | Matcher m2 = p2.matcher(v); |
1101 | 9772415 | if (m.matches()) |
1102 | 9772415 | if (m.groupCount() >= 2) { |
1103 | 9772415 | _pr.setJabrefMajorVersion(Integer.parseInt(m.group(1))); |
1104 | 9772415 | _pr.setJabrefMinorVersion(Integer.parseInt(m.group(2))); |
1105 | } | |
1106 | 9772415 | if (m2.matches()) |
1107 | 0 | if (m2.groupCount() >= 3) { |
1108 | 0 | _pr.setJabrefMinor2Version(Integer.parseInt(m2.group(3))); |
1109 | } | |
1110 | 9772415 | } |
1111 | } |