private static void analyzeNext() {
    ArrayList previous = new ArrayList();
    String token = nextToken();
    analysis.add("", token, 0); // Insert 1st token directly.
    previous.add(token);

    while (true) {
      int nWords = Math.min(previous.size(), MAX);
      token = nextToken();
      if (token == PERIOD) break;

      for (int j = 1; j <= nWords; j++) {
        String s1 = concat(previous, j); // ({"a", "b", "c"}, 2) -> "b c"
        analysis.add(s1, token, j);
      }

      previous.add(token);
      if (previous.size() == MAX) previous.remove(0);
    }

    int nWords = Math.min(previous.size(), MAX);
    for (int j = 1; j < nWords; j++) {
      String s1 = concat(previous, j); // ({"a", "b", "c"}, 2) -> "b c"
      analysis.add(s1, PERIOD, j);
    }
  }
 public void calculateStatistics(Analysis analysis) { // Build frequency charts
   if (word.startsWith("re")) {
     analysis.inc("re");
   }
   if (word.startsWith("pre")) {
     analysis.inc("pre");
   }
   if (word.startsWith("pro")) {
     analysis.inc("pro");
     word = "*" + word + "*"; // Mark special words
   }
   if (word.length() > 4) {
     if (word.startsWith("de") && word.endsWith("ed")) {
       analysis.inc("de-ed");
       // word = word.upperCase();
     }
     if (word.startsWith("Vine")) {
       analysis.inc("markerWord");
       analysis.getMarkerWord(this);
     }
     if (word.startsWith("marker")) {
       analysis.inc("markerWord");
       analysis.getMarkerWord(this);
     }
   }
 }
  public static void main(String[] args) {
    //      if (args.length > 0) nSentencesToGenerate = Integer.parseInt(args[0]);
    if (args.length > 0) fileNames = args;

    long start = System.currentTimeMillis();

    if (fileNames.length == 0) {
      String s = RewriteManuscript.manuscriptString;
      System.out.println("******** Reading and analyzing manuscript string ********");
      analyze(s);
    }

    for (int i = 0; i < fileNames.length; i++) {
      String s = readFile(fileNames[i]);
      System.out.println("******** Reading and analyzing file: " + fileNames[i] + "********");
      analyze(s);
    }
    analysis.calculateStatistics();
    System.out.println("******** Creating a new masterpiece of literature... *******\n");
    generateSentences();

    long end = System.currentTimeMillis();
    long total = end - start;
    System.out.println("" + total + "ms");
  }
  @Inject
  public WordDelimiterTokenFilterFactory(
      Index index,
      @IndexSettings Settings indexSettings,
      Environment env,
      @Assisted String name,
      @Assisted Settings settings) {
    super(index, indexSettings, name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
      this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
      this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words", version);
    this.protoWords =
        protectedWords == null ? null : CharArraySet.copy(Lucene.VERSION, protectedWords);
    this.flags = flags;
  }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAAddressUnaryExpression(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAArrayDirectFunctionDeclarator(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseATypedefDeclarationDeclarationOrDefinition(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseASignedIntSignedIntSpecifier(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAOutputpinPin(this);
 }
Beispiel #10
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAProd(this);
 }
Beispiel #11
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseADefCommand(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAMultiParameterList(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAInterfaceFileType(this);
 }
Beispiel #14
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseANewSetExpr(this);
 }
Beispiel #15
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAStarQueryStar(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAMarchDate(this);
 }
Beispiel #17
0
 @Override
 public void apply(Switch sw) {
   ((Analysis) sw).caseAInputs(this);
 }
Beispiel #18
0
 /*
   Nomair A. Naeem, 7-FEB-05
   Part of Visitor Design Implementation for AST
   See: soot.dava.toolkits.base.AST.analysis For details
 */
 public void apply(Analysis a) {
   a.caseASTSwitchNode(this);
 }
Beispiel #19
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAUsageIs(this);
 }
 @Override
 public void apply(Switch sw) {
   ((Analysis) sw).caseAClassInitializerArrayCreationExpression(this);
 }
Beispiel #21
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseANullBaseType(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAAbstractModifier(this);
 }
Beispiel #23
0
 @Override
 public void apply(Switch sw) {
   ((Analysis) sw).caseAParaComandos(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseABothIntersectMultiplicativeExpr(this);
 }
 public void apply(Switch sw) {
   ((Analysis) sw).caseAModuleName(this);
 }
Beispiel #26
0
 @Override
 public void apply(Switch sw) {
   ((Analysis) sw).caseAIgnTokens(this);
 }
Beispiel #27
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAIfStatement(this);
 }
Beispiel #28
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAExtendsClassDecl(this);
 }
Beispiel #29
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseAOnelocal(this);
 }
Beispiel #30
0
 public void apply(Switch sw) {
   ((Analysis) sw).caseATextualExp(this);
 }