예제 #1
0
  /**
   * {@link FixCapitalization} fix.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  public static String fix(String line, SubtitleObject so) {
    if (Fixer.exclude(line, so, MIN_TOKENS, ' ')) {
      return line;
    }

    String originalLine = line;

    boolean shouldCapitalizeFirstWord = line.equals(StringUtil.capitalize(line));

    String temp = line;

    line = fixProperNouns(line, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    line = fixAllUppercase(line, originalLine, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    line = fixPrecedingShouldCapitalize(line, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    if (shouldCapitalizeFirstWord) {
      line = StringUtil.capitalize(line);
      temp = SubtitleObject.clearMapIfChanged(so, temp, line);
    }

    if (line.contains("\"")) {
      line = fixQuoteCapitalization(line, so);
      temp = SubtitleObject.clearMapIfChanged(so, temp, line);
    }

    line = fixTitle(line, originalLine, so);
    temp = SubtitleObject.clearMapIfChanged(so, temp, line);

    return line;
  }
예제 #2
0
  /**
   * Capitalize first word at the beginning of a quote.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixQuoteCapitalization(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String prev = null, current = null;
    boolean firstQuote = true, shouldCapitalize = false;
    for (int i = 0; i < split.length; ++i) {
      prev = current;
      current = split[i];
      if (prev != null && prev.equals(",")) {
        if (current.equals("\"")) {
          if (firstQuote) {
            shouldCapitalize = true;
            firstQuote = false;
          } else {
            firstQuote = true;
          }
        }
      } else if (current.equals("\"")) {
        firstQuote = !firstQuote;
      } else if (shouldCapitalize && !StringUtil.isPunctuation(current)) {
        split[i] = StringUtil.capitalize(current);
        shouldCapitalize = false;
      }
    }

    return String.join(" ", split);
  }
예제 #3
0
  /**
   * Fix all words that should be capitalized upon preceding a certain string.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixPrecedingShouldCapitalize(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String prevPrev = null, prev = null, current = null;
    boolean shouldCapitalize = false;
    for (int i = 0; i < split.length; ++i) {
      prevPrev = prev;
      prev = current;
      current = split[i];
      if (prev != null && PRECEDING_SHOULD_CAPITALIZE.contains(prev)) {
        shouldCapitalize = true;
        if (prev.equals(".") && prevPrev != null && prevPrev.equals(".")) { // ellipses
          shouldCapitalize = false;
        } else if (!current.equals("\"")
            && !RegexUtil.matches(RegexEnum.MATCH_DIGIT_AND_S, current)) {
          if (!StringUtil.startsUpperCase(current)) {
            split[i] = StringUtil.capitalize(current);
          }
          shouldCapitalize = false;
        }
      } else if (shouldCapitalize) {
        if (!StringUtil.isPunctuation(current)) {
          split[i] = StringUtil.capitalize(current);
          shouldCapitalize = false;
        }
      }
    }

    return String.join(" ", split);
  }
예제 #4
0
  /**
   * Fix all proper nouns.
   *
   * @param line line to fix
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixProperNouns(String line, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String current = null;
    for (int i = 0; i < split.length; ++i) {
      current = split[i];
      if (!StringUtil.isPunctuation(current) && !StringUtil.containsLettersAndNumbers(current)) {
        if (StringUtil.startsUpperCase(current)
            && current.length() > 1
            && StringUtil.countUppercase(current) == 1) {
          if (!PROPER_NOUNS.contains(current) && !POTENTIAL_PROPER_NOUNS.contains(current)) {
            split[i] = current.toLowerCase();
          }
        } else {
          if (PROPER_NOUNS.contains(current)) {
            split[i] = StringUtil.capitalize(current);
          }
        }
      }
    }

    return String.join(" ", split);
  }
예제 #5
0
  /**
   * Fix all words that should be all uppercase and lowercases all words that are currently all
   * uppercase (but shouldn't be).
   *
   * @param line line to fix
   * @param originalLine original line
   * @param so {@link SubtitleObject}
   * @return fixed line
   */
  private static String fixAllUppercase(String line, String originalLine, SubtitleObject so) {
    String[] split = so.split(RegexEnum.SPACE, line);
    String current = null;
    for (int i = 0; i < split.length; ++i) {
      current = split[i];
      if (ALL_UPPERCASE.contains(current)) {
        if (!current.equals(current.toUpperCase())) {
          split[i] = split[i].toUpperCase();
        }
      } else if (POTENTIAL_ALL_UPPERCASE.contains(current)) {
        if (originalLine.indexOf(current.toUpperCase()) == line.indexOf(current)) {
          split[i] = split[i].toUpperCase();
        }
      } else if (current.length() > 1
          && current.equals(current.toUpperCase())
          && !StringUtil.containsLettersAndNumbers(current)) {
        split[i] = split[i].toLowerCase();
      }
    }

    return String.join(" ", split);
  }
예제 #6
0
  /** Apply fixes to text. */
  public void fix() {
    if (text == null || text.isEmpty()) {
      return;
    }
    text = text.trim();
    text = RemoveEmpty.fix(text, this);
    if (text.isEmpty()) {
      return;
    }

    // TODO: comment this

    text = RemoveEndingCharacter.fix(text, '<', '>', this);
    text = RemoveEndingCharacter.fix(text, '{', '}', this);
    text = RemoveEndingCharacter.fix(text, '[', ']', this);
    if (ManuelFix.MANUEL_FIX.containsKey(text)) {
      text = ManuelFix.MANUEL_FIX.get(text);
      return;
    } else if (DoNotFix.DO_NOT_FIX.contains(text)) {
      return;
    }

    if (text.startsWith("#") && !text.endsWith("#")) {
      text += " #";
    }
    text = RemoveEndingCharacter.fix(text, '(', ')', this);
    text = FixNonTraditionalStrings.fix(text, this);

    // TODO: messy (see "...with the .22?")
    if (text.startsWith("-") && !text.startsWith("- ")) {
      text = "- " + text.substring(1);
      splitMap.clear(); // TODO: clean up
    }
    text = FixEllipses.fix(text, this);
    text = FixSingleDoubleQuotes.fix(text, this);

    String result = "";
    int removedNames = 0;
    String originalText = text;

    text = FixUnbalancedDashes.fix(text);
    boolean fixedUnbalancedDashes = !originalText.equals(text);

    if (!text.startsWith("-")) {
      // text = text.replace("\n", " "); // TODO: explain this
    }

    String temp;
    for (String line : RegexUtil.split(RegexEnum.NEWLINE, text)) { // TODO: if contains punctuation
      line = line.trim();
      splitMap.clear();
      line = PrepareLine.fix(line, this);
      split(RegexEnum.SPACE, line);
      temp = line;
      line = RemoveCharacterName.fix(line, this);
      if (!temp.equals(line)) {
        ++removedNames;
      }
      line = FixSpelling.fix(line, this);
      if (!SrtFixerConfig.isToggleCorrectCapitalization()) {
        line =
            FixToUppercase.fix(
                line, this); // TODO: remove this once all bugs in FixCapitalization is fixed
      }
      line = ChangeLsToIs.fix(line, this);
      line = FixDashes.fix(line, this);
      line = RemoveEmpty.fix(line, this);
      if (SrtFixerConfig.isToggleCorrectCapitalization()) {
        line = FixCapitalization.fix(line, this);
      }
      line = FixAmpersand.fix(line, this);
      line = FixHeight.fix(line, this);
      line = FixWebsites.fix(line, this);
      line = FixTime.fix(line, this);
      line = FixNumbers.fix(line, this);
      line = FixAbbreviations.fix(line, this);
      line = FixContractions.fix(line, this);
      line = FixAcronym.fix(line, this);
      line = FixLetterS.fix(line, this);
      line = FixMisplacedQuotes.fix(line, this);
      line = FixSpacing.fix(line, this);
      line = FixCommonErrors.fix(line, this);
      line = FixEnding.fix(line, this);

      if (!line.isEmpty()) {
        line = FixMultilineDashes.fix(result, line); // TODO: why doesn't this work???
        if (!result.isEmpty()) {
          result = FixMultilineDashes.fix(line, result);
        }
        StringBuilder builder = new StringBuilder();
        if (removedNames == 2) {
          builder.append("- ");
          builder.append(result);
          builder.append("- ");
          builder.append(line);
          result = builder.toString();
          continue;
        }
        builder.append(result);
        builder.append(line);
        builder.append('\n');
        result = builder.toString();
      }
    }

    result = FixEllipses.fix(result, this);
    result = result.trim();

    result = FixMultilineQuotes.fix(result, this);

    result = FixThreeLines.fix(result);
    result = FixTwoLines.fix(result);
    result = FixOneLine.fix(result);

    if (StringUtil.count(originalText, '\n') <= 1
        && SubtitleUtil.isApproximatelyEqual(result, originalText)
        && !fixedUnbalancedDashes) {
      result = originalText;
    }

    text = result;
  }