Coverage Report - net.sf.jabref.imports.BibtexParser
 
Classes in this File Line Coverage Branch Coverage Complexity
BibtexParser
18%
77/409
11%
36/318
7.125
BibtexParser$NoLabelException
0%
0/3
N/A
7.125
 
 1  
 /*
 2  
  Copyright (C) 2003-06 David Weitzman, Nizar N. Batada, Morten O. Alver, Christopher Oezbek
 3  
 
 4  
  All programs in this directory and
 5  
  subdirectories are published under the GNU General Public License as
 6  
  described below.
 7  
 
 8  
  This program is free software; you can redistribute it and/or modify
 9  
  it under the terms of the GNU General Public License as published by
 10  
  the Free Software Foundation; either version 2 of the License, or (at
 11  
  your option) any later version.
 12  
 
 13  
  This program is distributed in the hope that it will be useful, but
 14  
  WITHOUT ANY WARRANTY; without even the implied warranty of
 15  
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 16  
  General Public License for more details.
 17  
 
 18  
  You should have received a copy of the GNU General Public License
 19  
  along with this program; if not, write to the Free Software
 20  
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 21  
  USA
 22  
 
 23  
  Further information about the GNU GPL is available at:
 24  
  http://www.gnu.org/copyleft/gpl.ja.html
 25  
 
 26  
  */
 27  
 
 28  
 package net.sf.jabref.imports;
 29  
 
 30  
 import java.io.BufferedReader;
 31  
 import java.io.IOException;
 32  
 import java.io.PushbackReader;
 33  
 import java.io.Reader;
 34  
 import java.io.StringReader;
 35  
 import java.util.Collection;
 36  
 import java.util.HashMap;
 37  
 import java.util.regex.Matcher;
 38  
 import java.util.regex.Pattern;
 39  
 
 40  
 import net.sf.jabref.BibtexDatabase;
 41  
 import net.sf.jabref.BibtexEntry;
 42  
 import net.sf.jabref.BibtexEntryType;
 43  
 import net.sf.jabref.BibtexFields;
 44  
 import net.sf.jabref.BibtexString;
 45  
 import net.sf.jabref.CustomEntryType;
 46  
 import net.sf.jabref.GUIGlobals;
 47  
 import net.sf.jabref.Globals;
 48  
 import net.sf.jabref.JabRefPreferences;
 49  
 import net.sf.jabref.KeyCollisionException;
 50  
 import net.sf.jabref.UnknownEntryType;
 51  
 import net.sf.jabref.Util;
 52  
 
 53  
 /**
 54  
  * Class for importing BibTeX-files.
 55  
  * 
 56  
  * Use:
 57  
  * 
 58  
  * BibtexParser parser = new BibtexParser(reader);
 59  
  * 
 60  
  * ParserResult result = parser.parse();
 61  
  * 
 62  
  * or
 63  
  * 
 64  
  * ParserResult result = BibtexParser.parse(reader);
 65  
  * 
 66  
  * Can be used stand-alone.
 67  
  * 
 68  
  * @author David Weitzman
 69  
  * @author Nizar N. Batada
 70  
  * @author Morten O. Alver
 71  
  * @author Christopher Oezbek 
 72  
  */
 73  
 public class BibtexParser {
 74  
         
 75  
         private PushbackReader _in;
 76  
 
 77  
         private BibtexDatabase _db;
 78  
 
 79  
         private HashMap<String, String> _meta;
 80  
         
 81  
         private HashMap<String, BibtexEntryType> entryTypes;
 82  
 
 83  9772415
         private boolean _eof = false;
 84  
 
 85  9772415
         private int line = 1;
 86  
 
 87  9772415
         private FieldContentParser fieldContentParser = new FieldContentParser();
 88  
 
 89  
         private ParserResult _pr;
 90  
         
 91  9766015
         private static final Integer LOOKAHEAD = 64;
 92  
 
 93  9772415
         public BibtexParser(Reader in) {
 94  
 
 95  9772415
                 if (in == null) {
 96  0
                         throw new NullPointerException();
 97  
                 }
 98  9772415
                 if (Globals.prefs == null) {
 99  0
                         Globals.prefs = JabRefPreferences.getInstance();
 100  
                 }
 101  9772415
                 _in = new PushbackReader(in, LOOKAHEAD);
 102  9772415
         }
 103  
 
 104  
         /**
 105  
          * Shortcut usage to create a Parser and read the input.
 106  
          * 
 107  
          * @param in -
 108  
          *            Reader to read from
 109  
          * @throws IOException
 110  
          */
 111  
         public static ParserResult parse(Reader in) throws IOException {
 112  0
                 BibtexParser parser = new BibtexParser(in);
 113  0
                 return parser.parse();
 114  
         }
 115  
         
 116  
         
 117  
         /**
 118  
          * Parses BibtexEntries from the given string and returns the collection of all entries found.
 119  
          * 
 120  
          * @param bibtexString
 121  
          * 
 122  
          * @return Returns null if an error occurred, returns an empty collection if no entries where found. 
 123  
          */
 124  
         public static Collection<BibtexEntry> fromString(String bibtexString){
 125  0
                 StringReader reader = new StringReader(bibtexString);
 126  0
                 BibtexParser parser = new BibtexParser(reader); 
 127  
                 try {
 128  0
                         return parser.parse().getDatabase().getEntries();
 129  0
                 } catch (Exception e){
 130  0
                         return null;
 131  
                 }
 132  
         }
 133  
         
 134  
         /**
 135  
          * Parses BibtexEntries from the given string and returns one entry found (or null if none found)
 136  
          * 
 137  
          * It is undetermined which entry is returned, so use this in case you know there is only one entry in the string.
 138  
          * 
 139  
          * @param bibtexString
 140  
          * 
 141  
          * @return The bibtexentry or null if non was found or an error occurred.
 142  
          */
 143  
         public static BibtexEntry singleFromString(String bibtexString) {
 144  0
                 Collection<BibtexEntry> c = fromString(bibtexString);
 145  0
                 if (c == null){
 146  0
                         return null;
 147  
                 }
 148  0
                 return c.iterator().next();
 149  
         }        
 150  
         
 151  
         /**
 152  
          * Check whether the source is in the correct format for this importer.
 153  
          */
 154  
         public static boolean isRecognizedFormat(Reader inOrig) throws IOException {
 155  
                 // Our strategy is to look for the "@<type>    {" line.
 156  0
                 BufferedReader in = new BufferedReader(inOrig);
 157  
 
 158  0
                 Pattern pat1 = Pattern.compile("@[a-zA-Z]*\\s*\\{");
 159  
 
 160  
                 String str;
 161  
 
 162  0
                 while ((str = in.readLine()) != null) {
 163  
 
 164  0
                         if (pat1.matcher(str).find())
 165  0
                                 return true;
 166  0
                         else if (str.startsWith(GUIGlobals.SIGNATURE))
 167  0
                                 return true;
 168  
                 }
 169  
 
 170  0
                 return false;
 171  
         }
 172  
 
 173  
         private void skipWhitespace() throws IOException {
 174  
                 int c;
 175  
 
 176  
                 while (true) {
 177  9772415
                         c = read();
 178  9772415
                         if ((c == -1) || (c == 65535)) {
 179  0
                                 _eof = true;
 180  0
                                 return;
 181  
                         }
 182  
 
 183  9772415
                         if (Character.isWhitespace((char) c)) {
 184  0
                                 continue;
 185  
                         } else
 186  
                                 // found non-whitespace char
 187  
                                 // Util.pr("SkipWhitespace, stops: "+c);
 188  9772415
                                 unread(c);
 189  
                         /*
 190  
                          * try { Thread.currentThread().sleep(500); } catch
 191  
                          * (InterruptedException ex) {}
 192  
                          */
 193  9772415
                         break;
 194  
                 }
 195  9772415
         }
 196  
 
 197  
         private String skipAndRecordWhitespace(int j) throws IOException {
 198  
                 int c;
 199  0
                 StringBuffer sb = new StringBuffer();
 200  0
                 if (j != ' ')
 201  0
                         sb.append((char) j);
 202  
                 while (true) {
 203  0
                         c = read();
 204  0
                         if ((c == -1) || (c == 65535)) {
 205  0
                                 _eof = true;
 206  0
                                 return sb.toString();
 207  
                         }
 208  
 
 209  0
                         if (Character.isWhitespace((char) c)) {
 210  0
                                 if (c != ' ')
 211  0
                                         sb.append((char) c);
 212  
                                 continue;
 213  
                         } else
 214  
                                 // found non-whitespace char
 215  
                                 // Util.pr("SkipWhitespace, stops: "+c);
 216  0
                                 unread(c);
 217  
                         /*
 218  
                          * try { Thread.currentThread().sleep(500); } catch
 219  
                          * (InterruptedException ex) {}
 220  
                          */
 221  0
                         break;
 222  
                 }
 223  0
                 return sb.toString();
 224  
         }
 225  
 
 226  
         /**
 227  
          * Will parse the BibTex-Data found when reading from reader.
 228  
          * 
 229  
          * The reader will be consumed.
 230  
          * 
 231  
          * Multiple calls to parse() return the same results
 232  
          * 
 233  
          * @return ParserResult
 234  
          * @throws IOException
 235  
          */
 236  
         public ParserResult parse() throws IOException {
 237  
 
 238  
                 // If we already parsed this, just return it.
 239  9772415
                 if (_pr != null)
 240  0
                         return _pr;
 241  
 
 242  9772415
         _db = new BibtexDatabase(); // Bibtex related contents.
 243  9772415
                 _meta = new HashMap<String, String>(); // Metadata in comments for Bibkeeper.
 244  9772415
                 entryTypes = new HashMap<String, BibtexEntryType>(); // To store custem entry types parsed.
 245  9772415
                 _pr = new ParserResult(_db, _meta, entryTypes);
 246  
 
 247  
         // First see if we can find the version number of the JabRef version that
 248  
         // wrote the file:
 249  9772415
         String versionNum = readJabRefVersionNumber();
 250  9772415
         if (versionNum != null) {
 251  9772415
             _pr.setJabrefVersion(versionNum);
 252  9772415
             setMajorMinorVersions();
 253  
         }
 254  
         else {
 255  
             // No version number found. However, we have only
 256  
         }
 257  
 
 258  9772415
         skipWhitespace();
 259  
 
 260  
                 try {
 261  9772415
                         while (!_eof) {
 262  9772415
                                 boolean found = consumeUncritically('@');
 263  9772415
                                 if (!found)
 264  9772415
                                         break;
 265  0
                                 skipWhitespace();
 266  0
                                 String entryType = parseTextToken();
 267  0
                                 BibtexEntryType tp = BibtexEntryType.getType(entryType);
 268  0
                                 boolean isEntry = (tp != null);
 269  
                                 // Util.pr(tp.getName());
 270  0
                                 if (!isEntry) {
 271  
                                         // The entry type name was not recognized. This can mean
 272  
                                         // that it is a string, preamble, or comment. If so,
 273  
                                         // parse and set accordingly. If not, assume it is an entry
 274  
                                         // with an unknown type.
 275  0
                                         if (entryType.toLowerCase().equals("preamble")) {
 276  0
                                                 _db.setPreamble(parsePreamble());
 277  0
                                         } else if (entryType.toLowerCase().equals("string")) {
 278  0
                                                 BibtexString bs = parseString();
 279  
                                                 try {
 280  0
                                                         _db.addString(bs);
 281  0
                                                 } catch (KeyCollisionException ex) {
 282  0
                                                         _pr.addWarning(Globals.lang("Duplicate string name") + ": "
 283  
                                                                 + bs.getName());
 284  
                                                         // ex.printStackTrace();
 285  0
                                                 }
 286  0
                                         } else if (entryType.toLowerCase().equals("comment")) {
 287  0
                                                 StringBuffer commentBuf = parseBracketedTextExactly();
 288  
                                                 /**
 289  
                                                  * 
 290  
                                                  * Metadata are used to store Bibkeeper-specific
 291  
                                                  * information in .bib files.
 292  
                                                  * 
 293  
                                                  * Metadata are stored in bibtex files in the format
 294  
                                                  * 
 295  
                                                  * @comment{jabref-meta: type:data0;data1;data2;...}
 296  
                                                  * 
 297  
                                                  * Each comment that starts with the META_FLAG is stored
 298  
                                                  * in the meta HashMap, with type as key. Unluckily, the
 299  
                                                  * old META_FLAG bibkeeper-meta: was used in JabRef 1.0
 300  
                                                  * and 1.1, so we need to support it as well. At least
 301  
                                                  * for a while. We'll always save with the new one.
 302  
                                                  */
 303  0
                                                 String comment = commentBuf.toString().replaceAll("[\\x0d\\x0a]", "");
 304  0
                                                 if (comment.substring(0,
 305  
                                                         Math.min(comment.length(), GUIGlobals.META_FLAG.length())).equals(
 306  
                                                         GUIGlobals.META_FLAG)
 307  
                                                         || comment.substring(0,
 308  
                                                                 Math.min(comment.length(), GUIGlobals.META_FLAG_OLD.length()))
 309  
                                                                 .equals(GUIGlobals.META_FLAG_OLD)) {
 310  
 
 311  
                                                         String rest;
 312  0
                                                         if (comment.substring(0, GUIGlobals.META_FLAG.length()).equals(
 313  
                                                                 GUIGlobals.META_FLAG))
 314  0
                                                                 rest = comment.substring(GUIGlobals.META_FLAG.length());
 315  
                                                         else
 316  0
                                                                 rest = comment.substring(GUIGlobals.META_FLAG_OLD.length());
 317  
 
 318  0
                                                         int pos = rest.indexOf(':');
 319  
 
 320  0
                                                         if (pos > 0)
 321  0
                                                                 _meta.put(rest.substring(0, pos), rest.substring(pos + 1));
 322  
                                                         // We remove all line breaks in the metadata - these
 323  
                                                         // will have been inserted
 324  
                                                         // to prevent too long lines when the file was
 325  
                                                         // saved, and are not part of the data.
 326  
                                                 }
 327  
 
 328  
                                                 /**
 329  
                                                  * A custom entry type can also be stored in a
 330  
                                                  * 
 331  
                                                  * @comment:
 332  
                                                  */
 333  0
                                                 if (comment.substring(0,
 334  
                                                         Math.min(comment.length(), GUIGlobals.ENTRYTYPE_FLAG.length())).equals(
 335  
                                                         GUIGlobals.ENTRYTYPE_FLAG)) {
 336  
 
 337  0
                                                         CustomEntryType typ = CustomEntryType.parseEntryType(comment);
 338  0
                                                         entryTypes.put(typ.getName().toLowerCase(), typ);
 339  
 
 340  
                                                 }
 341  0
                                         } else {
 342  
                                                 // The entry type was not recognized. This may mean that
 343  
                                                 // it is a custom entry type whose definition will
 344  
                                                 // appear
 345  
                                                 // at the bottom of the file. So we use an
 346  
                                                 // UnknownEntryType
 347  
                                                 // to remember the type name by.
 348  0
                                                 tp = new UnknownEntryType(entryType.toLowerCase());
 349  
                                                 // System.out.println("unknown type: "+entryType);
 350  0
                                                 isEntry = true;
 351  
                                         }
 352  
                                 }
 353  
 
 354  0
                                 if (isEntry) // True if not comment, preamble or string.
 355  
                                 {
 356  
                                         /**
 357  
                                          * Morten Alver 13 Aug 2006: Trying to make the parser more
 358  
                                          * robust. If an exception is thrown when parsing an entry,
 359  
                                          * drop the entry and try to resume parsing. Add a warning
 360  
                                          * for the user.
 361  
                                          * 
 362  
                                          * An alternative solution is to try rescuing the entry for
 363  
                                          * which parsing failed, by returning the entry with the
 364  
                                          * exception and adding it before parsing is continued.
 365  
                                          */
 366  
                                         try {
 367  0
                                                 BibtexEntry be = parseEntry(tp);
 368  
 
 369  0
                                                 boolean duplicateKey = _db.insertEntry(be);
 370  0
                                                 if (duplicateKey) // JZTODO lyrics
 371  0
                             _pr.addDuplicateKey(be.getCiteKey());
 372  
                                                         /*_pr.addWarning(Globals.lang("duplicate BibTeX key") + ": "
 373  
                                                                 + be.getCiteKey() + " ("
 374  
                                                                 + Globals.lang("grouping may not work for this entry") + ")");                        */
 375  0
                                                 else if (be.getCiteKey() == null || be.getCiteKey().equals("")) {
 376  0
                                                         _pr.addWarning(Globals.lang("empty BibTeX key") + ": "
 377  
                                                                 + be.getAuthorTitleYear(40) + " ("
 378  
                                                                 + Globals.lang("grouping may not work for this entry") + ")");
 379  
                                                 }
 380  0
                                         } catch (IOException ex) {
 381  0
                                                 ex.printStackTrace();
 382  0
                                                 _pr.addWarning(Globals.lang("Error occured when parsing entry") + ": '"
 383  
                                                         + ex.getMessage() + "'. " + Globals.lang("Skipped entry."));
 384  
 
 385  0
                                         }
 386  
                                 }
 387  
 
 388  0
                                 skipWhitespace();
 389  0
                         }
 390  
 
 391  
                         // Before returning the database, update entries with unknown type
 392  
                         // based on parsed type definitions, if possible.
 393  9772415
                         checkEntryTypes(_pr);
 394  
 
 395  9772415
                         return _pr;
 396  0
                 } catch (KeyCollisionException kce) {
 397  
                         // kce.printStackTrace();
 398  0
                         throw new IOException("Duplicate ID in bibtex file: " + kce.toString());
 399  
                 }
 400  
         }
 401  
 
 402  
         private int peek() throws IOException {
 403  342034525
                 int c = read();
 404  342034525
                 unread(c);
 405  
 
 406  342034525
                 return c;
 407  
         }
 408  
 
 409  
         private int read() throws IOException {
 410  938151890
                 int c = _in.read();
 411  938151890
                 if (c == '\n')
 412  29317245
                         line++;
 413  938151890
                 return c;
 414  
         }
 415  
 
 416  
         private void unread(int c) throws IOException {
 417  351806940
                 if (c == '\n')
 418  0
                         line--;
 419  351806940
                 _in.unread(c);
 420  351806940
         }
 421  
 
 422  
         public BibtexString parseString() throws IOException {
 423  
                 // Util.pr("Parsing string");
 424  0
                 skipWhitespace();
 425  0
                 consume('{', '(');
 426  
                 // while (read() != '}');
 427  0
                 skipWhitespace();
 428  
                 // Util.pr("Parsing string name");
 429  0
                 String name = parseTextToken();
 430  
                 // Util.pr("Parsed string name");
 431  0
                 skipWhitespace();
 432  
                 // Util.pr("Now the contents");
 433  0
                 consume('=');
 434  0
                 String content = parseFieldContent(name);
 435  
                 // Util.pr("Now I'm going to consume a }");
 436  0
                 consume('}', ')');
 437  
                 // Util.pr("Finished string parsing.");
 438  0
                 String id = Util.createNeutralId();
 439  0
                 return new BibtexString(id, name, content);
 440  
         }
 441  
 
 442  
         public String parsePreamble() throws IOException {
 443  0
                 return parseBracketedText().toString();
 444  
         }
 445  
 
 446  
         public BibtexEntry parseEntry(BibtexEntryType tp) throws IOException {
 447  0
                 String id = Util.createNeutralId();// createId(tp, _db);
 448  0
                 BibtexEntry result = new BibtexEntry(id, tp);
 449  0
                 skipWhitespace();
 450  0
                 consume('{', '(');
 451  0
         int c = peek();
 452  0
         if ((c != '\n') && (c != '\r'))
 453  0
             skipWhitespace();
 454  0
                 String key = null;
 455  0
                 boolean doAgain = true;
 456  0
                 while (doAgain) {
 457  0
                         doAgain = false;
 458  
                         try {
 459  0
                                 if (key != null)
 460  0
                                         key = key + parseKey();// parseTextToken(),
 461  
                                 else
 462  0
                                         key = parseKey();
 463  0
                         } catch (NoLabelException ex) {
 464  
                                 // This exception will be thrown if the entry lacks a key
 465  
                                 // altogether, like in "@article{ author = { ...".
 466  
                                 // It will also be thrown if a key contains =.
 467  0
                                 c = (char) peek();
 468  0
                                 if (Character.isWhitespace(c) || (c == '{') || (c == '\"')) {
 469  0
                                         String fieldName = ex.getMessage().trim().toLowerCase();
 470  0
                                         String cont = parseFieldContent(fieldName);
 471  0
                                         result.setField(fieldName, cont);
 472  0
                                 } else {
 473  0
                                         if (key != null)
 474  0
                                                 key = key + ex.getMessage() + "=";
 475  
                                         else
 476  0
                                                 key = ex.getMessage() + "=";
 477  0
                                         doAgain = true;
 478  
                                 }
 479  0
                         }
 480  
                 }
 481  
 
 482  0
                 if ((key != null) && key.equals(""))
 483  0
                         key = null;
 484  
 
 485  0
                 result.setField(BibtexFields.KEY_FIELD, key);
 486  0
                 skipWhitespace();
 487  
 
 488  
                 while (true) {
 489  0
                         c = peek();
 490  0
                         if ((c == '}') || (c == ')')) {
 491  0
                                 break;
 492  
                         }
 493  
 
 494  0
                         if (c == ',')
 495  0
                                 consume(',');
 496  
 
 497  0
                         skipWhitespace();
 498  
 
 499  0
                         c = peek();
 500  0
                         if ((c == '}') || (c == ')')) {
 501  0
                                 break;
 502  
                         }
 503  0
                         parseField(result);
 504  
                 }
 505  
 
 506  0
                 consume('}', ')');
 507  0
                 return result;
 508  
         }
 509  
 
 510  
         private void parseField(BibtexEntry entry) throws IOException {
 511  0
                 String key = parseTextToken().toLowerCase();
 512  
                 // Util.pr("Field: _"+key+"_");
 513  0
                 skipWhitespace();
 514  0
                 consume('=');
 515  0
                 String content = parseFieldContent(key);
 516  
                 // Now, if the field in question is set up to be fitted automatically
 517  
                 // with braces around
 518  
                 // capitals, we should remove those now when reading the field:
 519  0
                 if (Globals.prefs.putBracesAroundCapitals(key)) {
 520  0
                         content = Util.removeBracesAroundCapitals(content);
 521  
                 }
 522  0
                 if (content.length() > 0) {
 523  0
                         if (entry.getField(key) == null)
 524  0
                                 entry.setField(key, content);
 525  
                         else {
 526  
                                 // The following hack enables the parser to deal with multiple
 527  
                                 // author or
 528  
                                 // editor lines, stringing them together instead of getting just
 529  
                                 // one of them.
 530  
                                 // Multiple author or editor lines are not allowed by the bibtex
 531  
                                 // format, but
 532  
                                 // at least one online database exports bibtex like that, making
 533  
                                 // it inconvenient
 534  
                                 // for users if JabRef didn't accept it.
 535  0
                                 if (key.equals("author") || key.equals("editor"))
 536  0
                                         entry.setField(key, entry.getField(key) + " and " + content);
 537  
                         }
 538  
                 }
 539  0
         }
 540  
 
 541  
         private String parseFieldContent(String key) throws IOException {
 542  0
                 skipWhitespace();
 543  0
                 StringBuffer value = new StringBuffer();
 544  0
                 int c = '.';
 545  
 
 546  0
                 while (((c = peek()) != ',') && (c != '}') && (c != ')')) {
 547  
 
 548  0
                         if (_eof) {
 549  0
                                 throw new RuntimeException("Error in line " + line + ": EOF in mid-string");
 550  
                         }
 551  0
                         if (c == '"') {
 552  0
                                 StringBuffer text = parseQuotedFieldExactly();
 553  0
                                 value.append(fieldContentParser.format(text));
 554  
                                 /*
 555  
                                  * 
 556  
                                  * The following code doesn't handle {"} correctly: // value is
 557  
                                  * a string consume('"');
 558  
                                  * 
 559  
                                  * while (!((peek() == '"') && (j != '\\'))) { j = read(); if
 560  
                                  * (_eof || (j == -1) || (j == 65535)) { throw new
 561  
                                  * RuntimeException("Error in line "+line+ ": EOF in
 562  
                                  * mid-string"); }
 563  
                                  * 
 564  
                                  * value.append((char) j); }
 565  
                                  * 
 566  
                                  * consume('"');
 567  
                                  */
 568  0
                         } else if (c == '{') {
 569  
                                 // Value is a string enclosed in brackets. There can be pairs
 570  
                                 // of brackets inside of a field, so we need to count the
 571  
                                 // brackets to know when the string is finished.
 572  0
                                 StringBuffer text = parseBracketedTextExactly();
 573  0
                                 value.append(fieldContentParser.format(text, key));
 574  
 
 575  0
                         } else if (Character.isDigit((char) c)) { // value is a number
 576  
 
 577  0
                                 String numString = parseTextToken();
 578  
                 // Morten Alver 2007-07-04: I don't see the point of parsing the integer
 579  
                 // and converting it back to a string, so I'm removing the construct below
 580  
                 // the following line:
 581  0
                 value.append(numString);
 582  
                 /*
 583  
                 try {
 584  
                                         // Fixme: What is this for?
 585  
                                         value.append(String.valueOf(Integer.parseInt(numString)));
 586  
                                 } catch (NumberFormatException e) {
 587  
                                         // If Integer could not be parsed then just add the text
 588  
                                         // Used to fix [ 1594123 ] Failure to import big numbers
 589  
                                         value.append(numString);
 590  
                                 }
 591  
                                 */
 592  0
                         } else if (c == '#') {
 593  0
                                 consume('#');
 594  
                         } else {
 595  0
                                 String textToken = parseTextToken();
 596  0
                                 if (textToken.length() == 0)
 597  0
                                         throw new IOException("Error in line " + line + " or above: "
 598  
                                                 + "Empty text token.\nThis could be caused "
 599  
                                                 + "by a missing comma between two fields.");
 600  0
                                 value.append("#").append(textToken).append("#");
 601  
                                 // Util.pr(parseTextToken());
 602  
                                 // throw new RuntimeException("Unknown field type");
 603  
                         }
 604  0
                         skipWhitespace();
 605  
                 }
 606  
                 // Util.pr("Returning field content: "+value.toString());
 607  
 
 608  
                 // Check if we are to strip extra pairs of braces before returning:
 609  0
                 if (Globals.prefs.getBoolean("autoDoubleBraces")) {
 610  
                         // Do it:
 611  
                         while ((value.length() > 1) && (value.charAt(0) == '{')
 612  0
                                 && (value.charAt(value.length() - 1) == '}')) {
 613  0
                                 value.deleteCharAt(value.length() - 1);
 614  0
                                 value.deleteCharAt(0);
 615  
                         }
 616  
                         // Problem: if the field content is "{DNA} blahblah {EPA}", one pair
 617  
                         // too much will be removed.
 618  
                         // Check if this is the case, and re-add as many pairs as needed.
 619  0
                         while (hasNegativeBraceCount(value.toString())) {
 620  0
                                 value.insert(0, '{');
 621  0
                                 value.append('}');
 622  
                         }
 623  
 
 624  
                 }
 625  0
                 return value.toString();
 626  
 
 627  
         }
 628  
 
 629  
         /**
 630  
          * Originalinhalt nach parseFieldContent(String) verschoben.
 631  
          * @return
 632  
          * @throws IOException
 633  
          */
 634  
 //        private String parseFieldContent() throws IOException {
 635  
 //                return parseFieldContent(null);
 636  
 //        }
 637  
 
 638  
         /**
 639  
          * Check if a string at any point has had more ending braces (}) than
 640  
          * opening ones ({). Will e.g. return true for the string "DNA} blahblal
 641  
          * {EPA"
 642  
          * 
 643  
          * @param s
 644  
          *            The string to check.
 645  
          * @return true if at any index the brace count is negative.
 646  
          */
 647  
         private boolean hasNegativeBraceCount(String s) {
 648  
                 // System.out.println(s);
 649  0
                 int i = 0, count = 0;
 650  0
                 while (i < s.length()) {
 651  0
                         if (s.charAt(i) == '{')
 652  0
                                 count++;
 653  0
                         else if (s.charAt(i) == '}')
 654  0
                                 count--;
 655  0
                         if (count < 0)
 656  0
                                 return true;
 657  0
                         i++;
 658  
                 }
 659  0
                 return false;
 660  
         }
 661  
 
 662  
         /**
 663  
          * This method is used to parse string labels, field names, entry type and
 664  
          * numbers outside brackets.
 665  
          */
 666  
         private String parseTextToken() throws IOException {
 667  0
                 StringBuffer token = new StringBuffer(20);
 668  
 
 669  
                 while (true) {
 670  0
                         int c = read();
 671  
                         // Util.pr(".. "+c);
 672  0
                         if (c == -1) {
 673  0
                                 _eof = true;
 674  
 
 675  0
                                 return token.toString();
 676  
                         }
 677  
 
 678  0
                         if (Character.isLetterOrDigit((char) c) || (c == ':') || (c == '-') || (c == '_')
 679  
                                 || (c == '*') || (c == '+') || (c == '.') || (c == '/') || (c == '\'')) {
 680  0
                                 token.append((char) c);
 681  
                         } else {
 682  0
                                 unread(c);
 683  
                                 // Util.pr("Pasted text token: "+token.toString());
 684  0
                                 return token.toString();
 685  
                         }
 686  0
                 }
 687  
         }
 688  
         
 689  
         
 690  
         /**
 691  
          * Tries to restore the key
 692  
          * 
 693  
          * @return rest of key on success, otherwise empty string
 694  
          * @throws IOException
 695  
          *             on Reader-Error
 696  
          */
 697  
     private String fixKey() throws IOException {
 698  0
         StringBuilder key = new StringBuilder();
 699  0
         int lookahead_used = 0;
 700  
         char currentChar;
 701  
 
 702  
         // Find a char which ends key (','&&'\n') or entryfield ('='):
 703  
         do {
 704  0
             currentChar = (char) read();
 705  0
             key.append(currentChar);
 706  0
             lookahead_used++;
 707  
         } while ((currentChar != ',' && currentChar != '\n' && currentChar != '=')
 708  0
                 && (lookahead_used < LOOKAHEAD));
 709  
 
 710  
         // Consumed a char too much, back into reader and remove from key:
 711  0
         unread(currentChar);
 712  0
         key.deleteCharAt(key.length() - 1);
 713  
 
 714  
         // Restore if possible:
 715  0
         switch (currentChar) {
 716  
             case '=':
 717  
 
 718  
                 // Get entryfieldname, push it back and take rest as key
 719  0
                 key = key.reverse();
 720  
 
 721  0
                 boolean matchedAlpha = false;
 722  0
                 for (int i = 0; i < key.length(); i++) {
 723  0
                     currentChar = key.charAt(i);
 724  
 
 725  
                     /// Skip spaces:
 726  0
                     if (!matchedAlpha && currentChar == ' ') {
 727  0
                         continue;
 728  
                     }
 729  0
                     matchedAlpha = true;
 730  
 
 731  
                     // Begin of entryfieldname (e.g. author) -> push back:
 732  0
                     unread(currentChar);
 733  0
                     if (currentChar == ' ' || currentChar == '\n') {
 734  
 
 735  
                         /*
 736  
                          * found whitespaces, entryfieldname completed -> key in
 737  
                          * keybuffer, skip whitespaces
 738  
                          */
 739  0
                         StringBuilder newKey = new StringBuilder();
 740  0
                         for (int j = i; j < key.length(); j++) {
 741  0
                             currentChar = key.charAt(j);
 742  0
                             if (!Character.isWhitespace(currentChar)) {
 743  0
                                 newKey.append(currentChar);
 744  
                             }
 745  
                         }
 746  
 
 747  
                         // Finished, now reverse newKey and remove whitespaces:
 748  0
                         _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key.",
 749  
                                 String.valueOf(line)));
 750  0
                         key = newKey.reverse();
 751  
                     }
 752  
                 }
 753  0
                 break;
 754  
 
 755  
             case ',':
 756  
 
 757  0
                 _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key (contains whitespaces).",
 758  
                         String.valueOf(line)));
 759  
 
 760  
             case '\n':
 761  
 
 762  0
                 _pr.addWarning(Globals.lang("Line %0: Found corrupted BibTeX-key (comma missing).",
 763  
                         String.valueOf(line)));
 764  
 
 765  0
                 break;
 766  
 
 767  
             default:
 768  
 
 769  
                 // No more lookahead, give up:
 770  0
                 unreadBuffer(key);
 771  0
                 return "";
 772  
         }
 773  
 
 774  0
         return removeWhitespaces(key).toString();
 775  
     }
 776  
 
 777  
         /**
 778  
          * removes whitespaces from <code>sb</code>
 779  
          * 
 780  
          * @param sb
 781  
          * @return
 782  
          */
 783  
         private StringBuilder removeWhitespaces(StringBuilder sb) {
 784  0
                 StringBuilder newSb = new StringBuilder();
 785  
                 char current;
 786  0
                 for (int i = 0; i < sb.length(); ++i) {
 787  0
                         current = sb.charAt(i);
 788  0
                         if (!Character.isWhitespace(current))
 789  0
                                 newSb.append(current);
 790  
                 }
 791  0
                 return newSb;
 792  
         }
 793  
 
 794  
         /**
 795  
          * pushes buffer back into input
 796  
          * 
 797  
          * @param sb
 798  
          * @throws IOException
 799  
          *             can be thrown if buffer is bigger than LOOKAHEAD
 800  
          */
 801  
         private void unreadBuffer(StringBuilder sb) throws IOException {
 802  0
                 for (int i = sb.length() - 1; i >= 0; --i) {
 803  0
                         unread(sb.charAt(i));
 804  
                 }
 805  0
         }
 806  
         
 807  
         
 808  
         /**
 809  
          * This method is used to parse the bibtex key for an entry.
 810  
          */
 811  
         private String parseKey() throws IOException, NoLabelException {
 812  0
                 StringBuffer token = new StringBuffer(20);
 813  
 
 814  
                 while (true) {
 815  0
                         int c = read();
 816  
                         // Util.pr(".. '"+(char)c+"'\t"+c);
 817  0
                         if (c == -1) {
 818  0
                                 _eof = true;
 819  
 
 820  0
                                 return token.toString();
 821  
                         }
 822  
 
 823  
                         // Ikke: #{}\uFFFD~\uFFFD
 824  
                         //
 825  
                         // G\uFFFDr: $_*+.-\/?"^
 826  0
                         if (!Character.isWhitespace((char) c)
 827  
                                 && (Character.isLetterOrDigit((char) c) || ((c != '#') && (c != '{') && (c != '}')
 828  
                                         && (c != '\uFFFD') && (c != '~') && (c != '\uFFFD') && (c != ',') && (c != '=')))) {
 829  0
                                 token.append((char) c);
 830  
                         } else {
 831  
 
 832  0
                                 if (Character.isWhitespace((char) c)) {
 833  
                                         // We have encountered white space instead of the comma at
 834  
                                         // the end of
 835  
                                         // the key. Possibly the comma is missing, so we try to
 836  
                                         // return what we
 837  
                                         // have found, as the key and try to restore the rest in fixKey().
 838  0
                                         return token.toString()+fixKey();
 839  0
                                 } else if (c == ',') {
 840  0
                                         unread(c);
 841  0
                                         return token.toString();
 842  
                                         // } else if (Character.isWhitespace((char)c)) {
 843  
                                         // throw new NoLabelException(token.toString());
 844  0
                                 } else if (c == '=') {
 845  
                                         // If we find a '=' sign, it is either an error, or
 846  
                                         // the entry lacked a comma signifying the end of the key.
 847  
 
 848  0
                                         return token.toString();
 849  
                                         // throw new NoLabelException(token.toString());
 850  
 
 851  
                                 } else
 852  0
                                         throw new IOException("Error in line " + line + ":" + "Character '" + (char) c
 853  
                                                 + "' is not " + "allowed in bibtex keys.");
 854  
 
 855  
                         }
 856  0
                 }
 857  
 
 858  
         }
 859  
 
 860  
         private class NoLabelException extends Exception {
 861  0
                 public NoLabelException(String hasRead) {
 862  0
                         super(hasRead);
 863  0
                 }
 864  
         }
 865  
 
 866  
         private StringBuffer parseBracketedText() throws IOException {
 867  
                 // Util.pr("Parse bracketed text");
 868  0
                 StringBuffer value = new StringBuffer();
 869  
 
 870  0
                 consume('{');
 871  
 
 872  0
                 int brackets = 0;
 873  
 
 874  0
                 while (!((peek() == '}') && (brackets == 0))) {
 875  
 
 876  0
                         int j = read();
 877  0
                         if ((j == -1) || (j == 65535)) {
 878  0
                                 throw new RuntimeException("Error in line " + line + ": EOF in mid-string");
 879  0
                         } else if (j == '{')
 880  0
                                 brackets++;
 881  0
                         else if (j == '}')
 882  0
                                 brackets--;
 883  
 
 884  
                         // If we encounter whitespace of any kind, read it as a
 885  
                         // simple space, and ignore any others that follow immediately.
 886  
                         /*
 887  
                          * if (j == '\n') { if (peek() == '\n') value.append('\n'); } else
 888  
                          */
 889  0
                         if (Character.isWhitespace((char) j)) {
 890  0
                                 String whs = skipAndRecordWhitespace(j);
 891  
 
 892  
                                 // System.out.println(":"+whs+":");
 893  
 
 894  0
                                 if (!whs.equals("") && !whs.equals("\n\t")) { // &&
 895  
                                                                                                                                 // !whs.equals("\n"))
 896  
 
 897  0
                                         whs = whs.replaceAll("\t", ""); // Remove tabulators.
 898  
 
 899  
                                         // while (whs.endsWith("\t"))
 900  
                                         // whs = whs.substring(0, whs.length()-1);
 901  
 
 902  0
                                         value.append(whs);
 903  
 
 904  
                                 } else {
 905  0
                                         value.append(' ');
 906  
                                 }
 907  
 
 908  0
                         } else
 909  0
                                 value.append((char) j);
 910  
 
 911  0
                 }
 912  
 
 913  0
                 consume('}');
 914  
 
 915  0
                 return value;
 916  
         }
 917  
 
 918  
         private StringBuffer parseBracketedTextExactly() throws IOException {
 919  
 
 920  0
                 StringBuffer value = new StringBuffer();
 921  
 
 922  0
                 consume('{');
 923  
 
 924  0
                 int brackets = 0;
 925  
 
 926  0
                 while (!((peek() == '}') && (brackets == 0))) {
 927  
 
 928  0
                         int j = read();
 929  0
                         if ((j == -1) || (j == 65535)) {
 930  0
                                 throw new RuntimeException("Error in line " + line + ": EOF in mid-string");
 931  0
                         } else if (j == '{')
 932  0
                                 brackets++;
 933  0
                         else if (j == '}')
 934  0
                                 brackets--;
 935  
 
 936  0
                         value.append((char) j);
 937  
 
 938  0
                 }
 939  
 
 940  0
                 consume('}');
 941  
 
 942  0
                 return value;
 943  
         }
 944  
 
 945  
         private StringBuffer parseQuotedFieldExactly() throws IOException {
 946  
 
 947  0
                 StringBuffer value = new StringBuffer();
 948  
 
 949  0
                 consume('"');
 950  
 
 951  0
                 int brackets = 0;
 952  
 
 953  0
                 while (!((peek() == '"') && (brackets == 0))) {
 954  
 
 955  0
                         int j = read();
 956  0
                         if ((j == -1) || (j == 65535)) {
 957  0
                                 throw new RuntimeException("Error in line " + line + ": EOF in mid-string");
 958  0
                         } else if (j == '{')
 959  0
                                 brackets++;
 960  0
                         else if (j == '}')
 961  0
                                 brackets--;
 962  
 
 963  0
                         value.append((char) j);
 964  
 
 965  0
                 }
 966  
 
 967  0
                 consume('"');
 968  
 
 969  0
                 return value;
 970  
         }
 971  
 
 972  
         private void consume(char expected) throws IOException {
 973  0
                 int c = read();
 974  
 
 975  0
                 if (c != expected) {
 976  0
                         throw new RuntimeException("Error in line " + line + ": Expected " + expected
 977  
                                 + " but received " + (char) c);
 978  
                 }
 979  
 
 980  0
         }
 981  
 
 982  
         private boolean consumeUncritically(char expected) throws IOException {
 983  
                 int c;
 984  185675885
                 while (((c = read()) != expected) && (c != -1) && (c != 65535)){
 985  
                     // do nothing
 986  
                 }
 987  
                         
 988  9772415
                 if ((c == -1) || (c == 65535))
 989  9772415
                         _eof = true;
 990  
 
 991  
                 // Return true if we actually found the character we were looking for:
 992  9772415
                 return c == expected;
 993  
         }
 994  
 
 995  
         private void consume(char expected1, char expected2) throws IOException {
 996  
                 // Consumes one of the two, doesn't care which appears.
 997  
 
 998  0
                 int c = read();
 999  
 
 1000  0
                 if ((c != expected1) && (c != expected2)) {
 1001  0
                         throw new RuntimeException("Error in line " + line + ": Expected " + expected1 + " or "
 1002  
                                 + expected2 + " but received " + c);
 1003  
 
 1004  
                 }
 1005  
 
 1006  0
         }
 1007  
 
 1008  
         public void checkEntryTypes(ParserResult _pr) {
 1009  
                 
 1010  9772415
                 for (BibtexEntry be : _db.getEntries()){
 1011  0
                         if (be.getType() instanceof UnknownEntryType) {
 1012  
                                 // Look up the unknown type name in our map of parsed types:
 1013  
 
 1014  0
                                 Object o = entryTypes.get(be.getType().getName().toLowerCase());
 1015  0
                                 if (o != null) {
 1016  0
                                         BibtexEntryType type = (BibtexEntryType) o;
 1017  0
                                         be.setType(type);
 1018  0
                                 } else {
 1019  
                                         // System.out.println("Unknown entry type:
 1020  
                                         // "+be.getType().getName());
 1021  0
                                         _pr
 1022  
                                                 .addWarning(Globals.lang("unknown entry type") + ": "
 1023  
                                                         + be.getType().getName() + ". " + Globals.lang("Type set to 'other'")
 1024  
                                                         + ".");
 1025  0
                                         be.setType(BibtexEntryType.OTHER);
 1026  
                                 }
 1027  0
                         }
 1028  
                 }
 1029  9772415
         }
 1030  
 
 1031  
     /**
 1032  
      * Read the JabRef signature, if any, and find what version number is given.
 1033  
      * This method advances the file reader only as far as the end of the first line of
 1034  
      * the JabRef signature, or up until the point where the read characters don't match
 1035  
      * the signature. This should ensure that the parser can continue from that spot without
 1036  
      * resetting the reader, without the risk of losing important contents.
 1037  
      *
 1038  
      * @return The version number, or null if not found.
 1039  
      * @throws IOException
 1040  
      */
 1041  
     private String readJabRefVersionNumber() throws IOException {
 1042  9772415
         StringBuffer headerText = new StringBuffer();
 1043  
         
 1044  9772415
         boolean keepon = true;
 1045  9772415
         int piv = 0;
 1046  
         int c;
 1047  
 
 1048  
         // We start by reading the standard part of the signature, which precedes
 1049  
         // the version number:
 1050  
         //                     This file was created with JabRef X.y.
 1051  342034525
         while (keepon) {
 1052  342034525
             c = peek();
 1053  342034525
             headerText.append((char) c);
 1054  342034525
             if ((piv == 0) && (Character.isWhitespace((char) c) || (c == '%')))
 1055  19544830
                 read();
 1056  322489695
             else if (c == GUIGlobals.SIGNATURE.charAt(piv)) {
 1057  322489695
                 piv++;
 1058  322489695
                 read();
 1059  
             }
 1060  
             else {
 1061  0
                 keepon = false;
 1062  0
                 return null;
 1063  
             }
 1064  
 
 1065  
             // Check if we've reached the end of the signature's standard part:
 1066  342034525
             if (piv == GUIGlobals.SIGNATURE.length()) {
 1067  9772415
                 keepon = false;
 1068  
 
 1069  
                 // Found the standard part. Now read the version number:
 1070  9772415
                 StringBuilder sb = new StringBuilder();
 1071  58634540
                 while (((c=read()) != '\n') && (c != -1))
 1072  48862125
                     sb.append((char)c);
 1073  9772415
                 String versionNum = sb.toString().trim();
 1074  
                 // See if it fits the X.y. pattern:
 1075  9772415
                 if (Pattern.compile("[1-9]+\\.[1-9A-Za-z ]+\\.").matcher(versionNum).matches()) {
 1076  
                     // It matched. Remove the last period and return:
 1077  9772415
                     return versionNum.substring(0, versionNum.length()-1);
 1078  
                 }
 1079  0
                 else if (Pattern.compile("[1-9]+\\.[1-9]\\.[1-9A-Za-z ]+\\.").matcher(versionNum).matches()) {
 1080  
                     // It matched. Remove the last period and return:
 1081  0
                     return versionNum.substring(0, versionNum.length()-1);
 1082  
                 }
 1083  
 
 1084  0
             }
 1085  
         }
 1086  
 
 1087  0
         return null;
 1088  
     }
 1089  
 
 1090  
     /**
 1091  
      * After a JabRef version number has been parsed and put into _pr,
 1092  
      * parse the version number to determine the JabRef major and minor version
 1093  
      * number
 1094  
      */
 1095  
     private void setMajorMinorVersions() {
 1096  9772415
         String v = _pr.getJabrefVersion();
 1097  9772415
         Pattern p = Pattern.compile("([0-9]+)\\.([0-9]+).*");
 1098  9772415
         Pattern p2 = Pattern.compile("([0-9]+)\\.([0-9]+)\\.([0-9]+).*");
 1099  9772415
         Matcher m = p.matcher(v);
 1100  9772415
         Matcher m2 = p2.matcher(v);
 1101  9772415
         if (m.matches())
 1102  9772415
             if (m.groupCount() >= 2) {
 1103  9772415
                 _pr.setJabrefMajorVersion(Integer.parseInt(m.group(1)));
 1104  9772415
                 _pr.setJabrefMinorVersion(Integer.parseInt(m.group(2)));
 1105  
             }
 1106  9772415
         if (m2.matches())
 1107  0
             if (m2.groupCount() >= 3) {
 1108  0
                 _pr.setJabrefMinor2Version(Integer.parseInt(m2.group(3)));
 1109  
             }
 1110  9772415
     }
 1111  
 }