Пример #1
0
  /**
   * Capitalize first word at the beginning of a quote.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixQuoteCapitalization(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String prev = null, current = null;
    boolean firstQuote = true, shouldCapitalize = false;
    for (int i = 0; i < split.length; ++i) {
      prev = current;
      current = split[i];
      if (prev != null && prev.equals(",")) {
        if (current.equals("\"")) {
          if (firstQuote) {
            shouldCapitalize = true;
            firstQuote = false;
          } else {
            firstQuote = true;
          }
        }
      } else if (current.equals("\"")) {
        firstQuote = !firstQuote;
      } else if (shouldCapitalize && !StringUtil.isPunctuation(current)) {
        split[i] = StringUtil.capitalize(current);
        shouldCapitalize = false;
      }
    }

    return String.join(" ", split);
  }
Пример #2
0
  /**
   * Fix all words that should be capitalized upon preceding a certain string.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixPrecedingShouldCapitalize(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String prevPrev = null, prev = null, current = null;
    boolean shouldCapitalize = false;
    for (int i = 0; i < split.length; ++i) {
      prevPrev = prev;
      prev = current;
      current = split[i];
      if (prev != null && PRECEDING_SHOULD_CAPITALIZE.contains(prev)) {
        shouldCapitalize = true;
        if (prev.equals(".") && prevPrev != null && prevPrev.equals(".")) { // ellipses
          shouldCapitalize = false;
        } else if (!current.equals("\"")
            && !RegexUtil.matches(RegexEnum.MATCH_DIGIT_AND_S, current)) {
          if (!StringUtil.startsUpperCase(current)) {
            split[i] = StringUtil.capitalize(current);
          }
          shouldCapitalize = false;
        }
      } else if (shouldCapitalize) {
        if (!StringUtil.isPunctuation(current)) {
          split[i] = StringUtil.capitalize(current);
          shouldCapitalize = false;
        }
      }
    }

    return String.join(" ", split);
  }
Пример #3
0
  /**
   * {@link FixCapitalization} fix.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  public static String fix(String line, SubtitleObject so) {
    if (Fixer.exclude(line, so, MIN_TOKENS, ' ')) {
      return line;
    }

    String originalLine = line;

    boolean shouldCapitalizeFirstWord = line.equals(StringUtil.capitalize(line));

    String temp = line;

    line = fixProperNouns(line, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    line = fixAllUppercase(line, originalLine, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    line = fixPrecedingShouldCapitalize(line, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    if (shouldCapitalizeFirstWord) {
      line = StringUtil.capitalize(line);
      temp = SubtitleObject.clearMapIfChanged(so, temp, line);
    }

    if (line.contains("\"")) {
      line = fixQuoteCapitalization(line, so);
      temp = SubtitleObject.clearMapIfChanged(so, temp, line);
    }

    line = fixTitle(line, originalLine, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    return line;
  }
Пример #4
0
  /**
   * Fix all words that should be all uppercase and lowercases all words that are currently all
   * uppercase (but shouldn't be).
   *
   * @param line line to fix
   * @param originalLine original line
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixAllUppercase(String line, String originalLine, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String current = null;
    for (int i = 0; i < split.length; ++i) {
      current = split[i];
      if (ALL_UPPERCASE.contains(current)) {
        if (!current.equals(current.toUpperCase())) {
          split[i] = split[i].toUpperCase();
        }
      } else if (POTENTIAL_ALL_UPPERCASE.contains(current)) {
        if (originalLine.indexOf(current.toUpperCase()) == line.indexOf(current)) {
          split[i] = split[i].toUpperCase();
        }
      } else if (current.length() > 1
          && current.equals(current.toUpperCase())
          && !StringUtil.containsLettersAndNumbers(current)) {
        split[i] = split[i].toLowerCase();
      }
    }

    return String.join(" ", split);
  }
Пример #5
0
  /**
   * Fix all proper nouns.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixProperNouns(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String current = null;
    for (int i = 0; i < split.length; ++i) {
      current = split[i];
      if (!StringUtil.isPunctuation(current) && !StringUtil.containsLettersAndNumbers(current)) {
        if (StringUtil.startsUpperCase(current)
            && current.length() > 1
            && StringUtil.countUppercase(current) == 1) {
          if (!PROPER_NOUNS.contains(current) && !POTENTIAL_PROPER_NOUNS.contains(current)) {
            split[i] = current.toLowerCase();
          }
        } else {
          if (PROPER_NOUNS.contains(current)) {
            split[i] = StringUtil.capitalize(current);
          }
        }
      }
    }

    return String.join(" ", split);
  }