/** * @param args * <p>args[0] - URLsFile args[1] - Source language args[2] - Destination language args[3] - * File urls translated args[4...] - Corpus with tweets * @throws InterruptedException */ public static void main(String[] args) { if (args.length < 5) { System.out.println("Formato de invocación erróneo. El formato es:"); System.out.println( "java -jar TranslateURLs.jar Fichero_urls Idioma_origen Idioma_destino Fichero_urls_traducidas Resto_ficheros_tweets"); return; } List<String> transURL = new ArrayList<String>(); List<String> transURLContent = new ArrayList<String>(); Tweets tweets = new Tweets(); for (int i = 4; i < args.length; i++) { tweets.getTweet().addAll(TweetLoader.LoadFromXML(args[i], false).getTweet()); } URLUtils.loadFromFile(args[0]); int topTweets = tweets.getTweet().size(); // Genero el paquete de URLs susceptibles de ser traducidas for (int indTweet = 0; indTweet < topTweets; indTweet++) { Tweet tweet = tweets.getTweet().get(indTweet); // if (tweet.getLang().compareTo(args[1])==0) { List<String> urls = Tokenizer.getURLs(tweet.getContent(), "http://", Tokenizer.REG_URLS); int topURL = urls.size(); for (int indURL = 0; indURL < topURL; indURL++) { transURL.add(urls.get(indURL)); transURLContent.add(URLUtils.getURLContent(urls.get(indURL))); } } } // estas 3 lineas borrar URLUtils.setContents(transURLContent); URLUtils.setUrls(transURL); URLUtils.saveToFile(args[3]); /* //Me he quedado sólo con las URL del idioma que deseo traducir. Ahora troceo y preparo para traducir int files = TranslateURLs.creatTranslatedFiles(transURLContent, args[1], args[2]); TranslateURLs.jointFiles("tempurl", files, "urljoint"); transURLContent = TranslateURLs.createTranslatedCorpus("urljoint"); URLUtils.setContents(transURLContent); int topURL = transURL.size(); for (int indURL=0; indURL<topURL; indURL++) { transURL.set(indURL, args[2]+"_"+transURL.get(indURL)); } URLUtils.setUrls(transURL); URLUtils.saveToFile(args[3]); */ }
/** * Append given parameters to the query string of the url * * @param url the url to append parameters to * @param params any map * @return new url with parameters on query string */ public static String appendParametersToQueryString(String url, Map<String, String> params) { String queryString = URLUtils.formURLEncodeMap(params); if (queryString.length() == 0) return url; // Check if there are parameters in the url already and use '&' instead of '?' url += url.indexOf(QUERY_STRING_SEPARATOR) != -1 ? PARAM_SEPARATOR : QUERY_STRING_SEPARATOR; url += queryString; return url; }