Coverage Report - org.omegat.util.WikiGet
 
Classes in this File Line Coverage Branch Coverage Complexity
WikiGet
65%
67/103
72%
16/22
3.714
 
 1  
 /**************************************************************************
 2  
  OmegaT - Computer Assisted Translation (CAT) tool 
 3  
           with fuzzy matching, translation memory, keyword search, 
 4  
           glossaries, and translation leveraging into updated projects.
 5  
 
 6  
  Copyright (C) 2007 Kim Bruning
 7  
                2010 Alex Buloichik, Didier Briel, Rashid Umarov
 8  
                Home page: http://www.omegat.org/
 9  
                Support center: http://groups.yahoo.com/group/OmegaT/
 10  
 
 11  
  This program is free software; you can redistribute it and/or modify
 12  
  it under the terms of the GNU General Public License as published by
 13  
  the Free Software Foundation; either version 2 of the License, or
 14  
  (at your option) any later version.
 15  
 
 16  
  This program is distributed in the hope that it will be useful,
 17  
  but WITHOUT ANY WARRANTY; without even the implied warranty of
 18  
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 19  
  GNU General Public License for more details.
 20  
 
 21  
  You should have received a copy of the GNU General Public License
 22  
  along with this program; if not, write to the Free Software
 23  
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 24  
  **************************************************************************/
 25  
 
 26  
 package org.omegat.util;
 27  
 
 28  
 import java.io.BufferedWriter;
 29  
 import java.io.ByteArrayOutputStream;
 30  
 import java.io.File;
 31  
 import java.io.FileOutputStream;
 32  
 import java.io.IOException;
 33  
 import java.io.InputStream;
 34  
 import java.io.OutputStream;
 35  
 import java.io.OutputStreamWriter;
 36  
 import java.net.HttpURLConnection;
 37  
 import java.net.URL;
 38  
 import java.net.URLEncoder;
 39  
 import java.util.Map;
 40  
 
 41  
 /**
 42  
  * Import pages from MediaWiki
 43  
  * 
 44  
  * @author Kim Bruning
 45  
  * @author Alex Buloichik (alex73mail@gmail.com)
 46  
  * @author Didier Briel
 47  
  * @author Rashid Umarov
 48  
  */
 49  0
 public class WikiGet {
 50  
     protected static final String CHARSET_MARK = "charset=";
 51  
 
 52  
     /**
 53  
      * ~inverse of String.split() refactor note: In future releases, this might
 54  
      * best be moved to a different file
 55  
      */
 56  
     public static String joinString(String separator, String[] items) {
 57  19190
         if (items.length < 1)
 58  0
             return "";
 59  19190
         StringBuffer joined = new StringBuffer();
 60  57570
         for (int i = 0; i < items.length; i++) {
 61  38380
             joined.append(items[i]);
 62  38380
             if (i != items.length - 1)
 63  19190
                 joined.append(separator);
 64  
         }
 65  19190
         return joined.toString();
 66  
     }
 67  
 
 68  
     /**
 69  
      * Gets mediawiki wiki-code data from remote server. The get strategy is
 70  
      * determined by the url format.
 71  
      * 
 72  
      * @param remote_url
 73  
      *            string representation of well-formed URL of wikipage to be
 74  
      *            retrieved
 75  
      * @param projectdir
 76  
      *            string representation of path to the project-dir where the
 77  
      *            file should be saved.
 78  
      */
 79  
     public static void doWikiGet(String remote_url, String projectdir) {
 80  
         try {
 81  19190
             String joined = null; // contains edited url
 82  19190
             String name = null; // contains a useful page name which we can use
 83  
                                 // as our filename
 84  19190
             if (remote_url.indexOf("index.php?title=") > 0) {
 85  
                 // We're directly calling the mediawiki index.php script
 86  0
                 String[] splitted = remote_url.split("index.php\\?title=");
 87  0
                 String s = splitted[splitted.length - 1];
 88  0
                 name = s;
 89  0
                 s = s.replaceAll(" ", "_");
 90  
                 // s=URLEncoder.encode(s, "UTF-8"); // breaks previously
 91  
                 // correctly encoded page names
 92  0
                 splitted[splitted.length - 1] = s;
 93  0
                 joined = joinString("index.php?title=", splitted);
 94  0
                 joined = joined + "&action=raw";
 95  0
             } else {
 96  
                 // assume script is behind some sort
 97  
                 // of url-rewriting
 98  19190
                 String[] splitted = remote_url.split("/");
 99  19190
                 String s = splitted[splitted.length - 1];
 100  19190
                 name = s;
 101  19190
                 s = s.replaceAll(" ", "_");
 102  
                 // s=URLEncoder.encode(s, "UTF-8");
 103  19190
                 splitted[splitted.length - 1] = s;
 104  19190
                 joined = joinString("/", splitted);
 105  19190
                 joined = joined + "?action=raw";
 106  
             }
 107  19190
             String page = getURL(joined);
 108  19190
             saveUTF8(projectdir, name + ".UTF8", page);
 109  0
         } catch (Exception e) {
 110  0
             e.printStackTrace();
 111  19190
         }
 112  
 
 113  19190
     }
 114  
 
 115  
     /**
 116  
      * Print UTF-8 text to stdout (useful for debugging)
 117  
      * 
 118  
      * @param output
 119  
      *            The UTF-8 format string to be printed.
 120  
      */
 121  
     public static void printUTF8(String output) {
 122  
         try {
 123  0
             BufferedWriter out = UTF8WriterBuilder(System.out);
 124  0
             out.write(output);
 125  
 
 126  0
             out.flush();
 127  0
         } catch (Exception e) {
 128  0
             e.printStackTrace();
 129  0
         }
 130  0
     }
 131  
 
 132  
     /**
 133  
      * Creates new BufferedWriter configured for UTF-8 output and connects it to
 134  
      * an OutputStream
 135  
      * 
 136  
      * @param out
 137  
      *            Outputstream to connect to.
 138  
      */
 139  
     public static BufferedWriter UTF8WriterBuilder(OutputStream out) throws Exception {
 140  0
         return new BufferedWriter(new OutputStreamWriter(out, "UTF-8"));
 141  
     }
 142  
 
 143  
     /**
 144  
      * Save UTF-8 format data to file.
 145  
      * 
 146  
      * @param dir
 147  
      *            directory to write to.
 148  
      * @param filename
 149  
      *            filename of file to write.
 150  
      * @param output
 151  
      *            UTF-8 format text to write
 152  
      */
 153  
     public static void saveUTF8(String dir, String filename, String output) {
 154  
         try {
 155  
             // Page name can contain invalid characters, see [1878113]
 156  
             // Contributed by Anatoly Techtonik
 157  19190
             filename = filename.replaceAll("[\\\\/:\\*\\?\\\"\\|\\<\\>]", "_");
 158  19190
             File path = new File(dir, filename);
 159  19190
             FileOutputStream f = new FileOutputStream(path);
 160  0
             BufferedWriter out = UTF8WriterBuilder(f);
 161  0
             out.write(output);
 162  0
             out.close();
 163  19190
         } catch (Exception e) {
 164  19190
             e.printStackTrace();
 165  0
         }
 166  19190
     }
 167  
 
 168  
     /**
 169  
      * Obtain UTF-8 format text from remote URL.
 170  
      * 
 171  
      * @param target
 172  
      *            String representation of well-formed URL.
 173  
      */
 174  
     public static String getURL(String target) {
 175  4817297
         StringBuffer page = new StringBuffer();
 176  
         try {
 177  4817297
             URL url = new URL(target);
 178  4798107
             InputStream in = url.openStream();
 179  4787653
             byte[] b = new byte[4096];
 180  14362959
             for (int n; (n = in.read(b)) != -1;) {
 181  9575306
                 page.append(new String(b, 0, n, "UTF-8"));
 182  
             }
 183  19190
         } catch (Exception e) {
 184  19190
             e.printStackTrace();
 185  4787653
         }
 186  4806843
         return page.toString();
 187  
     }
 188  
 
 189  
     /**
 190  
      * Post data to the remote URL.
 191  
      * 
 192  
      * @param address
 193  
      *            address to post
 194  
      * @param params
 195  
      *            parameters
 196  
      * @return sever output
 197  
      */
 198  
     public static String post(String address, Map<String, String> params) throws IOException {
 199  3890864
         URL url = new URL(address);
 200  
 
 201  3890864
         ByteArrayOutputStream pout = new ByteArrayOutputStream();
 202  3890864
         for (Map.Entry<String, String> p : params.entrySet()) {
 203  11672592
             if (pout.size() > 0) {
 204  7781728
                 pout.write('&');
 205  
             }
 206  11672592
             pout.write(p.getKey().getBytes(OConsts.UTF8));
 207  11672592
             pout.write('=');
 208  11672592
             pout.write(URLEncoder.encode(p.getValue(), OConsts.UTF8).getBytes(OConsts.UTF8));
 209  
         }
 210  
 
 211  3890864
         HttpURLConnection conn = (HttpURLConnection) url.openConnection();
 212  
         try {
 213  3890864
             conn.setRequestMethod("POST");
 214  3890864
             conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
 215  3890864
             conn.setRequestProperty("Content-Length", Integer.toString(pout.size()));
 216  
             
 217  
             // Added to pass through authenticated proxy
 218  3890864
             String encodedUser = (Preferences.getPreference(Preferences.PROXY_USER_NAME));
 219  3890864
             if (!StringUtil.isEmpty(encodedUser)) { // There is a proxy user
 220  0
                 String encodedPassword = (Preferences.getPreference(Preferences.PROXY_PASSWORD));
 221  0
                 sun.misc.BASE64Decoder dec = new sun.misc.BASE64Decoder();
 222  
                 try {
 223  0
                     String pass = (new String(dec.decodeBuffer(encodedUser)));
 224  0
                     pass += ":" + new String(dec.decodeBuffer(encodedPassword));
 225  0
                     sun.misc.BASE64Encoder enc = new sun.misc.BASE64Encoder();
 226  0
                     encodedPassword = enc.encode(pass.getBytes());
 227  0
                     conn.setRequestProperty("Proxy-Authorization", "Basic " + encodedPassword);
 228  0
                 } catch (IOException ex) {
 229  0
                     Log.logErrorRB("LOG_DECODING_ERROR");
 230  0
                     Log.log(ex);
 231  0
                 }
 232  
              }
 233  
 
 234  3890864
             conn.setDoInput(true);
 235  3890864
             conn.setDoOutput(true);
 236  
 
 237  3890864
             OutputStream cout = conn.getOutputStream();
 238  3890864
             cout.write(pout.toByteArray());
 239  3890864
             cout.flush();
 240  
 
 241  3890864
             if (conn.getResponseCode() != HttpURLConnection.HTTP_OK) {
 242  0
                 throw new IOException(conn.getResponseMessage());
 243  
             }
 244  3890864
             String contentType = conn.getHeaderField("Content-Type");
 245  3890864
             int cp = contentType != null ? contentType.indexOf(CHARSET_MARK) : -1;
 246  3890864
             String charset = cp >= 0 ? contentType.substring(cp + CHARSET_MARK.length()) : "ISO8859-1";
 247  3890864
             ByteArrayOutputStream res = new ByteArrayOutputStream();
 248  3890864
             InputStream in = conn.getInputStream();
 249  
             try {
 250  3890864
                 LFileCopy.copy(in, res);
 251  
             } finally {
 252  3890864
                 in.close();
 253  3890864
             }
 254  3890864
             return new String(res.toByteArray(), charset);
 255  
         } finally {
 256  3890864
             conn.disconnect();
 257  
         }
 258  
     }
 259  
 }