/**
   * Utility function to get the probabilities of the languages likely
   *
   * @param text String containing the text which is to be detected
   * @return ArrayList<Language> containing the list of matching languages
   * @throws LangDetectException
   */
  public ArrayList<Language> detectLangs(String text) throws LangDetectException {

    Detector detector = DetectorFactory.create();
    detector.append(text);

    return detector.getProbabilities();
  }
 private String detectLanguage(String aDocumentText) throws AnalysisEngineProcessException {
   String language = "x-unspecified";
   try {
     Detector detector = DetectorFactory.create();
     detector.append(aDocumentText);
     language = detector.detect();
   } catch (LangDetectException e) {
     // "no features in text" might occur if a message composes for instance of a single
     // numeric value
     // we silently ignore this particular error message, but throw all other
     if (!isFeatureException(e)) {
       throw new AnalysisEngineProcessException(e);
     }
   }
   return language;
 }
  /**
   * Function to detect the language of a given string
   *
   * @param text String containing the text whose language is to be detected
   * @return Language code of the language in which the text is in (for e.g. "en" for english)
   * @throws LangDetectException
   */
  public static String detect(String text) throws LangDetectException {

    if (text == null) {

      return "";
    }

    Detector detector = DetectorFactory.create();
    detector.append(text);
    String s = "";

    try {
      s = detector.detect();

    } catch (Exception e) {

      return "";
    }

    return s;
  }
示例#4
0
  private static ArrayList<String> ProcessTimeLine(String user)
      throws InterruptedException, TwitterException {
    ArrayList<String> Tweets = new ArrayList<String>();

    ConfigurationBuilder cb = new ConfigurationBuilder();
    cb.setDebugEnabled(true)
        .setOAuthConsumerKey(KEY)
        .setOAuthConsumerSecret(SECRET)
        .setOAuthAccessToken(ACCESSTOKEN)
        .setOAuthAccessTokenSecret(ACCESSSECRET);
    cb.setJSONStoreEnabled(true);

    // gets Twitter instance with default credentials
    boolean bWait = true;
    Twitter twitter = new TwitterFactory(cb.build()).getInstance();
    do {
      try {
        Map<String, RateLimitStatus> oRT = twitter.getRateLimitStatus();
        RateLimitStatus rateLimit = oRT.get("/statuses/user_timeline");
        int remaining = rateLimit.getRemaining();
        System.out.print("(Remaining API calls: " + remaining + ")");
        int remainingTime = rateLimit.getSecondsUntilReset();

        if (remaining <= NUM_TWEETS / 200 + 1) {
          System.out.println("Waiting " + remainingTime + " seconds");
          Thread.sleep(remainingTime * 1000);
        } else bWait = false;

      } catch (Exception te) {
        if (te.toString().toLowerCase().contains("rate limit")
            && !te.toString().toLowerCase().contains("bad authentication data")) {
          System.out.println("Waiting 60s");
          Thread.sleep(60 * 1000);
        } else {
          bWait = false;
        }
      }
    } while (bWait);

    try {
      Detector detector = DetectorFactory.create();
      List<Status> statuses;

      int iPage = 1;
      int iTweets = 0;
      do {

        int iPageSize = 0;
        if (iTweets + 200 < NUM_TWEETS) {
          iPageSize = 200;
        } else {
          iPageSize = NUM_TWEETS - iTweets;
        }
        statuses = twitter.getUserTimeline(user, new Paging(iPage, iPageSize));

        for (Status status : statuses) {

          String sStatusId = "-1";
          try {
            if ((status.getRetweetedStatus() != null)
                && (status.getRetweetedStatus().getUser() != null)) {
              continue;
            }

            try {
              detector.append(Simplify(status.getText()));
              if (detector.detect().equalsIgnoreCase("es")) {
                String sStatusJSON = DataObjectFactory.getRawJSON(status);
                Tweets.add(sStatusJSON);
              }
            } catch (Exception exl) {
            }
          } catch (Exception ex) {
            System.out.println("ERROR in status id " + sStatusId);
          }

          iTweets++;
        }
        iPage++;
      } while (statuses.size() > 0 && iTweets < NUM_TWEETS);

    } catch (TwitterException te) {
      te.printStackTrace();
      System.out.println("Failed to get timeline: " + te.getMessage());
    } catch (Exception ex) {

    }

    System.out.println("..." + Tweets.size() + " tweets.");

    return Tweets;
  }
示例#5
0
  public static int searchForTwits(int id, String text, Connection conn, String getDate)
      throws TwitterException, SQLException, LangDetectException, ParseException {
    Twitter twitter = new TwitterFactory().getInstance();
    int countTweets = 0;
    int pageNumber = 1;
    int n = 0;

    do {
      Query query = new Query(text).rpp(100).page(pageNumber);
      QueryResult result = twitter.search(query);
      for (Tweet tweet : result.getTweets()) {

        java.util.Date date = tweet.getCreatedAt();
        Format formatter;
        formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        String newDate = formatter.format(date);

        DateFormat df = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        java.util.Date oldDate = df.parse(getDate);

        Statement input = conn.createStatement();

        countTweets++;

        if (date.after(oldDate)) {
          Detector detector = DetectorFactory.create();
          detector.append(tweet.getText());
          String lang;
          try {
            lang = detector.detect();

            if (lang.equals("lv") || lang.equals("ru")) {
              try {
                input.executeUpdate(
                    "INSERT INTO tweet "
                        + "VALUES (null, '"
                        + tweet.getId()
                        + "', '"
                        + tweet.getFromUser()
                        + "', '"
                        + tweet.getText().replace("'", "&rsquo;")
                        + "', '"
                        + newDate
                        + "', null, null, null)");
                input.executeUpdate(
                    "INSERT INTO tweet_brand "
                        + "VALUES (null, '"
                        + tweet.getId()
                        + "', '"
                        + id
                        + "')");
              } catch (SQLException ex) {
              }
              n++;
            } else continue;

          } catch (LangDetectException ex) {
          }
        } else continue;
      }
      pageNumber++;
      if (countTweets == 100) countTweets = 0;
      else break;
    } while (pageNumber != 16);

    System.out.print(text + " ");
    return n;
  }