public static void main(String[] args) { if (args.length < 1) { System.out.println("usage: C4_5TweetTopicCategorization <root_path>"); System.exit(-1); } String rootPath = args[0]; File dataFolder = new File(rootPath + "/data"); String resultFolderPath = rootPath + "/results/C4_5/"; CrisisMailer crisisMailer = CrisisMailer.getCrisisMailer(); Logger logger = Logger.getLogger(C4_5TweetTopicCategorization.class); PropertyConfigurator.configure(Constants.LOG4J_PROPERTIES_FILE_PATH); File resultFolder = new File(resultFolderPath); if (!resultFolder.exists()) resultFolder.mkdir(); CSVLoader csvLoader = new CSVLoader(); try { for (File dataSetName : dataFolder.listFiles()) { Instances data = null; try { csvLoader.setSource(dataSetName); csvLoader.setStringAttributes("2"); data = csvLoader.getDataSet(); } catch (IOException ioe) { logger.error(ioe); crisisMailer.sendEmailAlert(ioe); System.exit(-1); } data.setClassIndex(data.numAttributes() - 1); data.deleteWithMissingClass(); Instances vectorizedData = null; StringToWordVector stringToWordVectorFilter = new StringToWordVector(); try { stringToWordVectorFilter.setInputFormat(data); stringToWordVectorFilter.setAttributeIndices("2"); stringToWordVectorFilter.setIDFTransform(true); stringToWordVectorFilter.setLowerCaseTokens(true); stringToWordVectorFilter.setOutputWordCounts(false); stringToWordVectorFilter.setUseStoplist(true); vectorizedData = Filter.useFilter(data, stringToWordVectorFilter); vectorizedData.deleteAttributeAt(0); // System.out.println(vectorizedData); } catch (Exception exception) { logger.error(exception); crisisMailer.sendEmailAlert(exception); System.exit(-1); } J48 j48Classifier = new J48(); /* FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(stringToWordVectorFilter); filteredClassifier.setClassifier(j48Classifier); */ try { Evaluation eval = new Evaluation(vectorizedData); eval.crossValidateModel( j48Classifier, vectorizedData, 5, new Random(System.currentTimeMillis())); FileOutputStream resultOutputStream = new FileOutputStream(new File(resultFolderPath + dataSetName.getName())); resultOutputStream.write(eval.toSummaryString("=== Summary ===", false).getBytes()); resultOutputStream.write(eval.toMatrixString().getBytes()); resultOutputStream.write(eval.toClassDetailsString().getBytes()); resultOutputStream.close(); } catch (Exception exception) { logger.error(exception); crisisMailer.sendEmailAlert(exception); System.exit(-1); } } } catch (Exception exception) { logger.error(exception); crisisMailer.sendEmailAlert(exception); System.out.println(-1); } }
public static void main(String[] args) { PropertyConfigurator.configure(Constants.LOG4J_PROPERTIES_FILE_PATH); try { ConfigurationBuilder configurationBuilder = new ConfigurationBuilder(); configurationBuilder .setDebugEnabled(true) .setOAuthConsumerKey(Constants.OAUTH_CONSUMER_KEY) .setOAuthConsumerSecret(Constants.OAUTH_CONSUMER_SECRET) .setOAuthAccessToken(Constants.OAUTH_ACCESS_TOKEN) .setOAuthAccessTokenSecret(Constants.OAUTH_ACCESS_TOKEN_SECRET); TwitterFactory twitterFactory = new TwitterFactory(configurationBuilder.build()); LinkedBlockingQueue<String> categoryFetchingQueue = new LinkedBlockingQueue<String>(); PriorityBlockingQueue<UserCategoryMessage> userFetchingQueue = new PriorityBlockingQueue<UserCategoryMessage>(); LinkedBlockingQueue<TweetCategoryMessage> tweetStorageQueue = new LinkedBlockingQueue<TweetCategoryMessage>(); Thread suggestedCategoryThread = new GetSuggestedUserCategoriesThread(twitterFactory, categoryFetchingQueue); Thread usersInCategoryThread = new GetUserSuggestionsForSlugThread( twitterFactory, categoryFetchingQueue, userFetchingQueue, GlobalConstants.USER_LAST_TWEET_ID_TABLE_NAME); Thread tweetsForUserThread = new GetUserTimelineThread( twitterFactory, userFetchingQueue, tweetStorageQueue, GlobalConstants.USER_LAST_TWEET_ID_TABLE_NAME); Thread tweetStorageThread = new TweetStorageThread( tweetStorageQueue, GlobalConstants.CATEGORY_TWEET_STORAGE_TABLE_NAME); Thread queueMeasurementThread = new QueueMeasurementThread(categoryFetchingQueue, userFetchingQueue, tweetStorageQueue); suggestedCategoryThread.start(); usersInCategoryThread.start(); tweetsForUserThread.start(); tweetStorageThread.start(); queueMeasurementThread.start(); suggestedCategoryThread.join(); usersInCategoryThread.join(); tweetsForUserThread.join(); tweetStorageThread.join(); queueMeasurementThread.join(); } catch (InterruptedException interruptedException) { logger.warn("Exception while collector threads are joining", interruptedException); } catch (Exception unknownException) { logger.error("Unknown Exception while starting collector_categories", unknownException); crisisMailer.sendEmailAlert(unknownException); } logger.error("CategoryCollectorDriver has stopped of own free will"); crisisMailer.sendEmailAlert( "collector_categories: CategoryCollectorDriver has stopped of own free will"); }