@Nullable( "null means no luck, otherwise it's tuple(guessed encoding, hint about content if was unable to guess, BOM)") public static Trinity<Charset, CharsetToolkit.GuessedEncoding, byte[]> guessFromContent( @NotNull VirtualFile virtualFile, @NotNull byte[] content, int length) { Charset defaultCharset = ObjectUtils.notNull( EncodingManager.getInstance().getEncoding(virtualFile, true), CharsetToolkit.getDefaultSystemCharset()); CharsetToolkit toolkit = GUESS_UTF ? new CharsetToolkit(content, defaultCharset) : null; String detectedFromBytes = null; try { if (GUESS_UTF) { toolkit.setEnforce8Bit(true); Charset charset = toolkit.guessFromBOM(); if (charset != null) { detectedFromBytes = AUTO_DETECTED_FROM_BOM; byte[] bom = ObjectUtils.notNull(CharsetToolkit.getMandatoryBom(charset), CharsetToolkit.UTF8_BOM); return Trinity.create(charset, null, bom); } CharsetToolkit.GuessedEncoding guessed = toolkit.guessFromContent(length); if (guessed == CharsetToolkit.GuessedEncoding.VALID_UTF8) { detectedFromBytes = "auto-detected from bytes"; return Trinity.create( CharsetToolkit.UTF8_CHARSET, guessed, null); // UTF detected, ignore all directives } if (guessed == CharsetToolkit.GuessedEncoding.SEVEN_BIT) { return Trinity.create(null, guessed, null); } } return null; } finally { setCharsetWasDetectedFromBytes(virtualFile, detectedFromBytes); } }
public static void setDetectedFromBytesFlagBack( @NotNull VirtualFile virtualFile, @NotNull byte[] content) { if (virtualFile.getBOM() == null) { guessFromContent(virtualFile, content, content.length); } else { // prevent file to be reloaded in other encoding after save with BOM setCharsetWasDetectedFromBytes(virtualFile, AUTO_DETECTED_FROM_BOM); } }
@NotNull private static Pair.NonNull<Charset, byte[]> doDetectCharsetAndSetBOM( @NotNull VirtualFile virtualFile, @NotNull byte[] content, boolean saveBOM, @NotNull FileType fileType) { @NotNull Charset charset = virtualFile.isCharsetSet() ? virtualFile.getCharset() : detectCharset(virtualFile, content, fileType); Pair.NonNull<Charset, byte[]> bomAndCharset = getCharsetAndBOM(content, charset); final byte[] bom = bomAndCharset.second; if (saveBOM && bom.length != 0) { virtualFile.setBOM(bom); setCharsetWasDetectedFromBytes(virtualFile, AUTO_DETECTED_FROM_BOM); } return bomAndCharset; }