/** * {@link FixCapitalization} fix. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ public static String fix(String line, SubtitleObject so) { if (Fixer.exclude(line, so, MIN_TOKENS, ' ')) { return line; } String originalLine = line; boolean shouldCapitalizeFirstWord = line.equals(StringUtil.capitalize(line)); String temp = line; line = fixProperNouns(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); line = fixAllUppercase(line, originalLine, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); line = fixPrecedingShouldCapitalize(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); if (shouldCapitalizeFirstWord) { line = StringUtil.capitalize(line); temp = SubtitleObject.clearMapIfChanged(so, temp, line); } if (line.contains("\"")) { line = fixQuoteCapitalization(line, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); } line = fixTitle(line, originalLine, so); temp = SubtitleObject.clearMapIfChanged(so, temp, line); return line; }
/** * Capitalize first word at the beginning of a quote. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixQuoteCapitalization(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String prev = null, current = null; boolean firstQuote = true, shouldCapitalize = false; for (int i = 0; i < split.length; ++i) { prev = current; current = split[i]; if (prev != null && prev.equals(",")) { if (current.equals("\"")) { if (firstQuote) { shouldCapitalize = true; firstQuote = false; } else { firstQuote = true; } } } else if (current.equals("\"")) { firstQuote = !firstQuote; } else if (shouldCapitalize && !StringUtil.isPunctuation(current)) { split[i] = StringUtil.capitalize(current); shouldCapitalize = false; } } return String.join(" ", split); }
/** * Fix all words that should be capitalized upon preceding a certain string. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixPrecedingShouldCapitalize(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String prevPrev = null, prev = null, current = null; boolean shouldCapitalize = false; for (int i = 0; i < split.length; ++i) { prevPrev = prev; prev = current; current = split[i]; if (prev != null && PRECEDING_SHOULD_CAPITALIZE.contains(prev)) { shouldCapitalize = true; if (prev.equals(".") && prevPrev != null && prevPrev.equals(".")) { // ellipses shouldCapitalize = false; } else if (!current.equals("\"") && !RegexUtil.matches(RegexEnum.MATCH_DIGIT_AND_S, current)) { if (!StringUtil.startsUpperCase(current)) { split[i] = StringUtil.capitalize(current); } shouldCapitalize = false; } } else if (shouldCapitalize) { if (!StringUtil.isPunctuation(current)) { split[i] = StringUtil.capitalize(current); shouldCapitalize = false; } } } return String.join(" ", split); }
/** * Fix all proper nouns. * * @param line line to fix * @param so {@link SubtitleObject} * @return fixed line */ private static String fixProperNouns(String line, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String current = null; for (int i = 0; i < split.length; ++i) { current = split[i]; if (!StringUtil.isPunctuation(current) && !StringUtil.containsLettersAndNumbers(current)) { if (StringUtil.startsUpperCase(current) && current.length() > 1 && StringUtil.countUppercase(current) == 1) { if (!PROPER_NOUNS.contains(current) && !POTENTIAL_PROPER_NOUNS.contains(current)) { split[i] = current.toLowerCase(); } } else { if (PROPER_NOUNS.contains(current)) { split[i] = StringUtil.capitalize(current); } } } } return String.join(" ", split); }
/** * Fix all words that should be all uppercase and lowercases all words that are currently all * uppercase (but shouldn't be). * * @param line line to fix * @param originalLine original line * @param so {@link SubtitleObject} * @return fixed line */ private static String fixAllUppercase(String line, String originalLine, SubtitleObject so) { String[] split = so.split(RegexEnum.SPACE, line); String current = null; for (int i = 0; i < split.length; ++i) { current = split[i]; if (ALL_UPPERCASE.contains(current)) { if (!current.equals(current.toUpperCase())) { split[i] = split[i].toUpperCase(); } } else if (POTENTIAL_ALL_UPPERCASE.contains(current)) { if (originalLine.indexOf(current.toUpperCase()) == line.indexOf(current)) { split[i] = split[i].toUpperCase(); } } else if (current.length() > 1 && current.equals(current.toUpperCase()) && !StringUtil.containsLettersAndNumbers(current)) { split[i] = split[i].toLowerCase(); } } return String.join(" ", split); }
/** Apply fixes to text. */ public void fix() { if (text == null || text.isEmpty()) { return; } text = text.trim(); text = RemoveEmpty.fix(text, this); if (text.isEmpty()) { return; } // TODO: comment this text = RemoveEndingCharacter.fix(text, '<', '>', this); text = RemoveEndingCharacter.fix(text, '{', '}', this); text = RemoveEndingCharacter.fix(text, '[', ']', this); if (ManuelFix.MANUEL_FIX.containsKey(text)) { text = ManuelFix.MANUEL_FIX.get(text); return; } else if (DoNotFix.DO_NOT_FIX.contains(text)) { return; } if (text.startsWith("#") && !text.endsWith("#")) { text += " #"; } text = RemoveEndingCharacter.fix(text, '(', ')', this); text = FixNonTraditionalStrings.fix(text, this); // TODO: messy (see "...with the .22?") if (text.startsWith("-") && !text.startsWith("- ")) { text = "- " + text.substring(1); splitMap.clear(); // TODO: clean up } text = FixEllipses.fix(text, this); text = FixSingleDoubleQuotes.fix(text, this); String result = ""; int removedNames = 0; String originalText = text; text = FixUnbalancedDashes.fix(text); boolean fixedUnbalancedDashes = !originalText.equals(text); if (!text.startsWith("-")) { // text = text.replace("\n", " "); // TODO: explain this } String temp; for (String line : RegexUtil.split(RegexEnum.NEWLINE, text)) { // TODO: if contains punctuation line = line.trim(); splitMap.clear(); line = PrepareLine.fix(line, this); split(RegexEnum.SPACE, line); temp = line; line = RemoveCharacterName.fix(line, this); if (!temp.equals(line)) { ++removedNames; } line = FixSpelling.fix(line, this); if (!SrtFixerConfig.isToggleCorrectCapitalization()) { line = FixToUppercase.fix( line, this); // TODO: remove this once all bugs in FixCapitalization is fixed } line = ChangeLsToIs.fix(line, this); line = FixDashes.fix(line, this); line = RemoveEmpty.fix(line, this); if (SrtFixerConfig.isToggleCorrectCapitalization()) { line = FixCapitalization.fix(line, this); } line = FixAmpersand.fix(line, this); line = FixHeight.fix(line, this); line = FixWebsites.fix(line, this); line = FixTime.fix(line, this); line = FixNumbers.fix(line, this); line = FixAbbreviations.fix(line, this); line = FixContractions.fix(line, this); line = FixAcronym.fix(line, this); line = FixLetterS.fix(line, this); line = FixMisplacedQuotes.fix(line, this); line = FixSpacing.fix(line, this); line = FixCommonErrors.fix(line, this); line = FixEnding.fix(line, this); if (!line.isEmpty()) { line = FixMultilineDashes.fix(result, line); // TODO: why doesn't this work??? if (!result.isEmpty()) { result = FixMultilineDashes.fix(line, result); } StringBuilder builder = new StringBuilder(); if (removedNames == 2) { builder.append("- "); builder.append(result); builder.append("- "); builder.append(line); result = builder.toString(); continue; } builder.append(result); builder.append(line); builder.append('\n'); result = builder.toString(); } } result = FixEllipses.fix(result, this); result = result.trim(); result = FixMultilineQuotes.fix(result, this); result = FixThreeLines.fix(result); result = FixTwoLines.fix(result); result = FixOneLine.fix(result); if (StringUtil.count(originalText, '\n') <= 1 && SubtitleUtil.isApproximatelyEqual(result, originalText) && !fixedUnbalancedDashes) { result = originalText; } text = result; }