Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
IntegrityCheck |
|
| 6.714285714285714;6.714 |
1 | /* | |
2 | Copyright (C) 2004 R. Nagel | |
3 | ||
4 | All programs in this directory and | |
5 | subdirectories are published under the GNU General Public License as | |
6 | described below. | |
7 | ||
8 | This program is free software; you can redistribute it and/or modify | |
9 | it under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or (at | |
11 | your option) any later version. | |
12 | ||
13 | This program is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with this program; if not, write to the Free Software | |
20 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 | |
21 | USA | |
22 | ||
23 | Further information about the GNU GPL is available at: | |
24 | http://www.gnu.org/copyleft/gpl.ja.html | |
25 | ||
26 | */ | |
27 | ||
28 | // created by : r.nagel 27.10.2004 | |
29 | // | |
30 | // function : check all bibtex items and report errors, inconsistencies, | |
31 | // warnings, hints and .... | |
32 | // | |
33 | // todo : find equal authors: e.g.: D. Knuth = Donald Knuth = Donald E. Knuth | |
34 | // and try to give all items an identically look | |
35 | // | |
36 | // modified : | |
37 | ||
38 | ||
39 | ||
40 | package net.sf.jabref.wizard.integrity ; | |
41 | ||
42 | import java.util.Vector; | |
43 | ||
44 | import net.sf.jabref.BibtexDatabase; | |
45 | import net.sf.jabref.BibtexEntry; | |
46 | ||
47 | public class IntegrityCheck | |
48 | { | |
49 | private Vector<IntegrityMessage> messages ; | |
50 | ||
51 | public IntegrityCheck() | |
52 | 325535 | { |
53 | 325535 | messages = new Vector<IntegrityMessage>() ; |
54 | 325535 | } |
55 | ||
56 | public Vector<IntegrityMessage> checkBibtexDatabase(BibtexDatabase base) { | |
57 | 41314 | messages.clear(); |
58 | 41314 | if (base != null) { |
59 | 41314 | for (BibtexEntry entry : base.getEntries()) { |
60 | 0 | checkSingleEntry(entry); |
61 | } | |
62 | } | |
63 | 41314 | return new Vector<IntegrityMessage>(messages); |
64 | } | |
65 | ||
66 | public Vector<IntegrityMessage> checkBibtexEntry(BibtexEntry entry) { | |
67 | 0 | messages.clear(); |
68 | 0 | checkSingleEntry(entry); |
69 | 0 | return new Vector<IntegrityMessage>(messages); |
70 | } | |
71 | ||
72 | public void checkSingleEntry(BibtexEntry entry) | |
73 | { | |
74 | 0 | if (entry == null) |
75 | 0 | return ; |
76 | ||
77 | 0 | Object data = entry.getField("author") ; |
78 | 0 | if (data != null) |
79 | 0 | authorNameCheck( data.toString(), "author", entry) ; |
80 | ||
81 | 0 | data = entry.getField("editor") ; |
82 | 0 | if (data != null) |
83 | 0 | authorNameCheck( data.toString(), "editor", entry) ; |
84 | ||
85 | 0 | data = entry.getField("title") ; |
86 | 0 | if (data != null) |
87 | 0 | titleCheck( data.toString(), "title", entry) ; |
88 | ||
89 | 0 | data = entry.getField("year") ; |
90 | 0 | if (data != null) |
91 | 0 | yearCheck( data.toString(), "year", entry) ; |
92 | 0 | } |
93 | ||
94 | /** fills the class Vector (of IntegrityMessage Objects) which did inform about | |
95 | * failures, hints.... | |
96 | * The Authors or Editors field could be invalid -> try to detect it! | |
97 | * Knuth, Donald E. and Kurt Cobain and A. Einstein = N,NNaNNaNN | |
98 | */ | |
99 | private void authorNameCheck(String names, String fieldName, BibtexEntry entry) | |
100 | { | |
101 | // try to extract the structure of author tag | |
102 | // N = name, ","= seperator, "a" = and | |
103 | 0 | StringBuffer structure = new StringBuffer() ; |
104 | 0 | int len = names.length() ; |
105 | 0 | int mode = -1 ; |
106 | 0 | for (int t = 0 ; t < len ; t++) |
107 | { | |
108 | 0 | char ch = names.charAt(t) ; |
109 | 0 | switch (ch) |
110 | { | |
111 | case ',' : | |
112 | 0 | if (mode == 5) // "and" |
113 | 0 | structure.append('a') ; |
114 | else | |
115 | 0 | structure.append('N') ; |
116 | ||
117 | 0 | structure.append(',') ; |
118 | 0 | mode = 0 ; |
119 | 0 | break ; |
120 | ||
121 | case ' ' : | |
122 | 0 | if (mode == 5) // "and" |
123 | 0 | structure.append('a') ; |
124 | else | |
125 | 0 | if (mode != 0) |
126 | 0 | structure.append('N') ; |
127 | 0 | mode = -1 ; // blank processed |
128 | 0 | break ; |
129 | case 'a' : | |
130 | 0 | if (mode == -1) |
131 | 0 | mode = 2 ; |
132 | break ; | |
133 | case 'n' : | |
134 | 0 | if (mode == 2) |
135 | 0 | mode = 3 ; |
136 | break ; | |
137 | case 'd' : | |
138 | 0 | if (mode == 3) |
139 | 0 | mode = 5 ; |
140 | break ; | |
141 | default : | |
142 | 0 | mode = 1 ; |
143 | } | |
144 | } | |
145 | 0 | if (mode == 5) // "and" |
146 | 0 | structure.append('a') ; |
147 | else | |
148 | 0 | if (mode != 0) |
149 | 0 | structure.append('N') ; |
150 | ||
151 | // Check | |
152 | 0 | len = structure.length() ; |
153 | 0 | if (len > 0) |
154 | { | |
155 | 0 | if (structure.charAt(0) != 'N') // must start by name |
156 | { | |
157 | 0 | messages.add( new IntegrityMessage( IntegrityMessage.NAME_START_WARNING, |
158 | entry, fieldName, null)) ; | |
159 | // back.add("beginning of " +fieldName +" field"); | |
160 | } | |
161 | ||
162 | 0 | if (structure.charAt( structure.length() -1) != 'N') // end without seperator |
163 | { | |
164 | 0 | messages.add( new IntegrityMessage( IntegrityMessage.NAME_END_WARNING, |
165 | entry, fieldName, null)) ; | |
166 | // back.add("bad end (" +fieldName +" field)"); | |
167 | } | |
168 | /*if (structure.indexOf("NN,NN") > -1) | |
169 | { | |
170 | messages.add( new IntegrityMessage( IntegrityMessage.NAME_SEMANTIC_WARNING, | |
171 | entry, fieldName, null)) ; | |
172 | ||
173 | // back.add("something could be wrong in " +fieldName +" field") ; | |
174 | } */ | |
175 | } | |
176 | // messages.add( new IntegrityMessage( IntegrityMessage.NAME_END_WARNING, | |
177 | // entry, fieldName, null)) ; | |
178 | ||
179 | 0 | } |
180 | ||
181 | ||
182 | ||
183 | private void titleCheck(String title, String fieldName, BibtexEntry entry) | |
184 | { | |
185 | 0 | int len = title.length() ; |
186 | 0 | int mode = 0 ; |
187 | 0 | int upLowCounter = 0 ; |
188 | // boolean lastWasSpace = false ; | |
189 | 0 | for (int t = 0 ; t < len ; t++) |
190 | { | |
191 | 0 | char ch = title.charAt( t ) ; |
192 | 0 | switch (ch) |
193 | { | |
194 | case '}' : // end of Sequence | |
195 | 0 | if (mode == 0) |
196 | { | |
197 | // closing brace '}' without an opening | |
198 | 0 | messages.add( new IntegrityMessage( IntegrityMessage.UNEXPECTED_CLOSING_BRACE_FAILURE, |
199 | entry, fieldName, null)) ; | |
200 | } | |
201 | else // mode == 1 | |
202 | { | |
203 | 0 | mode-- ; |
204 | // lastWasSpace = true ; | |
205 | } | |
206 | 0 | break ; |
207 | ||
208 | case '{' : // open { | |
209 | 0 | mode++ ; |
210 | 0 | break ; |
211 | ||
212 | case ' ' : | |
213 | // lastWasSpace = true ; | |
214 | 0 | break ; |
215 | ||
216 | default : | |
217 | 0 | if (mode == 0) // out of {} |
218 | { | |
219 | 0 | if ( Character.isUpperCase(ch) && (t > 1)) |
220 | { | |
221 | 0 | upLowCounter++ ; |
222 | } | |
223 | } | |
224 | } | |
225 | } | |
226 | 0 | if (upLowCounter > 0) |
227 | { | |
228 | ||
229 | /* | |
230 | Morten Alver (2006.10.10): | |
231 | ||
232 | Disabling this warning because we have a feature for automatically adding | |
233 | braces when saving, which makes this warning misleading. It could be modified | |
234 | to suggest to use this feature if not enabled, and not give a warning if the | |
235 | feature is enabled. | |
236 | ||
237 | messages.add( new IntegrityMessage( IntegrityMessage.UPPER_AND_LOWER_HINT, | |
238 | entry, fieldName, null)) ;*/ | |
239 | ||
240 | } | |
241 | 0 | } |
242 | ||
243 | /** Checks, if the number String contains a four digit year */ | |
244 | private void yearCheck(String number, String fieldName, BibtexEntry entry) | |
245 | { | |
246 | 0 | int len = number.length() ; |
247 | 0 | int digitCounter = 0 ; |
248 | 0 | boolean fourDigitsBlock = false ; |
249 | 0 | boolean containsFourDigits = false ; |
250 | ||
251 | 0 | for (int t = 0 ; t < len ; t++) |
252 | { | |
253 | 0 | char ch = number.charAt( t ) ; |
254 | 0 | if ( Character.isDigit(ch)) |
255 | { | |
256 | 0 | digitCounter++ ; |
257 | 0 | if (digitCounter == 4) |
258 | 0 | fourDigitsBlock = true ; |
259 | else | |
260 | 0 | fourDigitsBlock = false ; |
261 | } else | |
262 | { | |
263 | 0 | if (fourDigitsBlock) |
264 | 0 | containsFourDigits = true ; |
265 | ||
266 | 0 | digitCounter = 0 ; |
267 | } | |
268 | } | |
269 | ||
270 | 0 | if ((!containsFourDigits) && (!fourDigitsBlock)) |
271 | { | |
272 | 0 | messages.add( new IntegrityMessage( IntegrityMessage.FOUR_DIGITS_HINT, |
273 | entry, fieldName, null)) ; | |
274 | } | |
275 | 0 | } |
276 | } |