/**
  * @param args
  *     <p>args[0] - URLsFile args[1] - Source language args[2] - Destination language args[3] -
  *     File urls translated args[4...] - Corpus with tweets
  * @throws InterruptedException
  */
 public static void main(String[] args) {
   if (args.length < 5) {
     System.out.println("Formato de invocación erróneo. El formato es:");
     System.out.println(
         "java -jar TranslateURLs.jar Fichero_urls Idioma_origen Idioma_destino Fichero_urls_traducidas Resto_ficheros_tweets");
     return;
   }
   List<String> transURL = new ArrayList<String>();
   List<String> transURLContent = new ArrayList<String>();
   Tweets tweets = new Tweets();
   for (int i = 4; i < args.length; i++) {
     tweets.getTweet().addAll(TweetLoader.LoadFromXML(args[i], false).getTweet());
   }
   URLUtils.loadFromFile(args[0]);
   int topTweets = tweets.getTweet().size();
   // Genero el paquete de URLs susceptibles de ser traducidas
   for (int indTweet = 0; indTweet < topTweets; indTweet++) {
     Tweet tweet = tweets.getTweet().get(indTweet);
     // if (tweet.getLang().compareTo(args[1])==0)
     {
       List<String> urls = Tokenizer.getURLs(tweet.getContent(), "http://", Tokenizer.REG_URLS);
       int topURL = urls.size();
       for (int indURL = 0; indURL < topURL; indURL++) {
         transURL.add(urls.get(indURL));
         transURLContent.add(URLUtils.getURLContent(urls.get(indURL)));
       }
     }
   }
   // estas 3 lineas borrar
   URLUtils.setContents(transURLContent);
   URLUtils.setUrls(transURL);
   URLUtils.saveToFile(args[3]);
   /*
   //Me he quedado sólo con las URL del idioma que deseo traducir. Ahora troceo y preparo para traducir
   		int files = TranslateURLs.creatTranslatedFiles(transURLContent, args[1], args[2]);
   		TranslateURLs.jointFiles("tempurl", files, "urljoint");
   		transURLContent = TranslateURLs.createTranslatedCorpus("urljoint");
   		URLUtils.setContents(transURLContent);
   		int topURL = transURL.size();
   		for (int indURL=0; indURL<topURL; indURL++)
   		{
   			transURL.set(indURL, args[2]+"_"+transURL.get(indURL));
   		}
   		URLUtils.setUrls(transURL);
   		URLUtils.saveToFile(args[3]);
   		*/
 }
Ejemplo n.º 2
0
  /**
   * Append given parameters to the query string of the url
   *
   * @param url the url to append parameters to
   * @param params any map
   * @return new url with parameters on query string
   */
  public static String appendParametersToQueryString(String url, Map<String, String> params) {
    String queryString = URLUtils.formURLEncodeMap(params);
    if (queryString.length() == 0) return url;

    // Check if there are parameters in the url already and use '&' instead of '?'
    url += url.indexOf(QUERY_STRING_SEPARATOR) != -1 ? PARAM_SEPARATOR : QUERY_STRING_SEPARATOR;
    url += queryString;
    return url;
  }