1 | |
package net.sf.jabref.imports; |
2 | |
|
3 | |
import net.sf.jabref.BibtexEntry; |
4 | |
import net.sf.jabref.GUIGlobals; |
5 | |
import net.sf.jabref.Globals; |
6 | |
import net.sf.jabref.OutputPrinter; |
7 | |
import net.sf.jabref.net.URLDownload; |
8 | |
|
9 | |
import javax.swing.*; |
10 | |
import java.io.IOException; |
11 | |
import java.io.UnsupportedEncodingException; |
12 | |
import java.net.URL; |
13 | |
import java.net.URLEncoder; |
14 | |
import java.util.ArrayList; |
15 | |
import java.util.List; |
16 | |
import java.util.regex.Matcher; |
17 | |
import java.util.regex.Pattern; |
18 | |
|
19 | |
|
20 | 0 | public class ScienceDirectFetcher implements EntryFetcher { |
21 | |
|
22 | 0 | protected static int MAX_PAGES_TO_LOAD = 8; |
23 | |
protected static final String WEBSITE_URL = "http://www.sciencedirect.com"; |
24 | |
protected static final String SEARCH_URL = WEBSITE_URL +"/science/quicksearch?query="; |
25 | |
|
26 | |
protected static final String linkPrefix = "http://www.sciencedirect.com/science?_ob=ArticleURL&" ; |
27 | 0 | protected static final Pattern linkPattern = Pattern.compile( |
28 | |
"<a href=\""+ |
29 | |
linkPrefix.replaceAll("\\?", "\\\\?")+ |
30 | |
"([^\"]+)\"\""); |
31 | |
|
32 | 0 | protected static final Pattern nextPagePattern = Pattern.compile( |
33 | |
"<a href=\"(.*)\">Next >"); |
34 | |
|
35 | |
|
36 | 0 | protected boolean stopFetching = false; |
37 | 0 | protected boolean noAccessFound = false; |
38 | |
|
39 | |
public String getHelpPage() { |
40 | 0 | return "ScienceDirect.html"; |
41 | |
} |
42 | |
|
43 | |
public URL getIcon() { |
44 | 0 | return GUIGlobals.getIconUrl("www"); |
45 | |
} |
46 | |
|
47 | |
public String getKeyName() { |
48 | 0 | return "Search ScienceDirect"; |
49 | |
} |
50 | |
|
51 | |
public JPanel getOptionsPanel() { |
52 | |
|
53 | 0 | return null; |
54 | |
} |
55 | |
|
56 | |
public String getTitle() { |
57 | 0 | return Globals.menuTitle("Search ScienceDirect"); |
58 | |
} |
59 | |
|
60 | |
public void stopFetching() { |
61 | 0 | stopFetching = true; |
62 | 0 | noAccessFound = false; |
63 | 0 | } |
64 | |
|
65 | |
public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) { |
66 | 0 | stopFetching = false; |
67 | |
try { |
68 | 0 | List<String> citations = getCitations(query); |
69 | 0 | if (citations == null) |
70 | 0 | return false; |
71 | 0 | if (citations.size() == 0){ |
72 | 0 | status.showMessage(Globals.lang("No entries found for the search string '%0'", |
73 | |
query), |
74 | |
Globals.lang("Search ScienceDirect"), JOptionPane.INFORMATION_MESSAGE); |
75 | 0 | return false; |
76 | |
} |
77 | |
|
78 | 0 | int i=0; |
79 | 0 | for (String cit : citations) { |
80 | 0 | if (stopFetching) |
81 | 0 | break; |
82 | 0 | BibtexEntry entry = BibsonomyScraper.getEntry(cit); |
83 | 0 | if (entry != null) |
84 | 0 | dialog.addEntry(entry); |
85 | 0 | dialog.setProgress(++i, citations.size()); |
86 | 0 | } |
87 | |
|
88 | 0 | return true; |
89 | |
|
90 | 0 | } catch (IOException e) { |
91 | 0 | e.printStackTrace(); |
92 | 0 | status.showMessage(Globals.lang("Error while fetching from ScienceDirect") + ": " + e.getMessage()); |
93 | |
} |
94 | 0 | return false; |
95 | |
} |
96 | |
|
97 | |
|
98 | |
|
99 | |
|
100 | |
|
101 | |
|
102 | |
|
103 | |
|
104 | |
protected List<String> getCitations(String query) throws IOException { |
105 | |
String urlQuery; |
106 | 0 | ArrayList<String> ids = new ArrayList<String>(); |
107 | |
try { |
108 | 0 | urlQuery = SEARCH_URL + URLEncoder.encode(query, "UTF-8"); |
109 | 0 | int count = 1; |
110 | 0 | String nextPage = null; |
111 | |
while (((nextPage = getCitationsFromUrl(urlQuery, ids)) != null) |
112 | 0 | && (count < MAX_PAGES_TO_LOAD)) { |
113 | 0 | urlQuery = nextPage; |
114 | 0 | count++; |
115 | |
} |
116 | 0 | return ids; |
117 | 0 | } catch (UnsupportedEncodingException e) { |
118 | 0 | throw new RuntimeException(e); |
119 | |
} |
120 | |
} |
121 | |
|
122 | |
protected String getCitationsFromUrl(String urlQuery, List<String> ids) throws IOException { |
123 | 0 | URL url = new URL(urlQuery); |
124 | 0 | URLDownload ud = new URLDownload(url); |
125 | 0 | ud.download(); |
126 | |
|
127 | 0 | String cont = ud.getStringContent(); |
128 | |
|
129 | 0 | Matcher m = linkPattern.matcher(cont); |
130 | 0 | if (m.find()) { |
131 | 0 | while (m.find()) { |
132 | 0 | ids.add(linkPrefix+m.group(1)); |
133 | 0 | cont = cont.substring(m.end()); |
134 | 0 | m = linkPattern.matcher(cont); |
135 | |
} |
136 | |
} |
137 | |
|
138 | |
else { |
139 | 0 | return null; |
140 | |
} |
141 | |
|
142 | |
|
143 | |
|
144 | |
|
145 | |
|
146 | |
|
147 | 0 | return null; |
148 | |
} |
149 | |
|
150 | |
|
151 | |
} |