public TimedTextObject parseFile(String fileName, String[] inputString) throws IOException { TimedTextObject tto = new TimedTextObject(); Caption caption = new Caption(); int captionNumber = 1; boolean allGood; // the file name is saved tto.fileName = fileName; int lineCounter = 0; int stringIndex = 0; String line; try { line = getLine(inputString, stringIndex++); while (line != null && stringIndex < inputString.length) { line = line.trim(); lineCounter++; // if its a blank line, ignore it, otherwise... if (!line.isEmpty()) { allGood = false; // the first thing should be an increasing number try { int num = Integer.parseInt(line); if (num != captionNumber) throw new Exception(); else { captionNumber++; allGood = true; } } catch (Exception e) { tto.warnings += captionNumber + " expected at line " + lineCounter; tto.warnings += "\n skipping to next line\n\n"; } if (allGood) { // we go to next line, here the begin and end time should be found try { lineCounter++; line = getLine(inputString, stringIndex++).trim(); String start = line.substring(0, 12); String end = line.substring(line.length() - 12, line.length()); Time time = new Time("hh:mm:ss,ms", start); caption.start = time; time = new Time("hh:mm:ss,ms", end); caption.end = time; } catch (Exception e) { tto.warnings += "incorrect time format at line " + lineCounter; allGood = false; } } if (allGood) { // we go to next line where the caption text starts lineCounter++; line = getLine(inputString, stringIndex++).trim(); String text = ""; while (!line.isEmpty() && stringIndex < inputString.length) { text += line + "<br />"; line = getLine(inputString, stringIndex++).trim(); lineCounter++; } caption.content = text; int key = caption.start.mseconds; // in case the key is already there, we increase it by a millisecond, since no // duplicates are allowed while (tto.captions.containsKey(key)) key++; if (key != caption.start.mseconds) tto.warnings += "caption with same start time found...\n\n"; // we add the caption. tto.captions.put(key, caption); } // we go to next blank while (!line.isEmpty() && stringIndex < inputString.length) { line = getLine(inputString, stringIndex++).trim(); lineCounter++; } caption = new Caption(); } if (stringIndex < inputString.length) { line = getLine(inputString, stringIndex++); } } } catch (NullPointerException e) { tto.warnings += "unexpected end of file, maybe last caption is not complete.\n\n"; } tto.built = true; return tto; }
public TimedTextObject parseFile(String fileName, InputStream is) throws IOException, FatalParsingException { TimedTextObject tto = new TimedTextObject(); tto.fileName = fileName; DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder dBuilder; try { dBuilder = dbFactory.newDocumentBuilder(); Document doc = dBuilder.parse(is); doc.getDocumentElement().normalize(); // we recover the metadata Node node = doc.getElementsByTagName("ttm:title").item(0); if (node != null) tto.title = node.getTextContent(); node = doc.getElementsByTagName("ttm:copyright").item(0); if (node != null) tto.copyrigth = node.getTextContent(); node = doc.getElementsByTagName("ttm:desc").item(0); if (node != null) tto.description = node.getTextContent(); // we recover the styles NodeList styleN = doc.getElementsByTagName("style"); // we recover the timed text elements NodeList captionsN = doc.getElementsByTagName("p"); // regions of the layout could also be recovered this way tto.warnings += "Styling attributes are only recognized inside a style definition, to be referenced later in the captions.\n\n"; // we parse the styles for (int i = 0; i < styleN.getLength(); i++) { Style style = new Style(Style.defaultID()); node = styleN.item(i); NamedNodeMap attr = node.getAttributes(); // we get the id Node currentAtr = attr.getNamedItem("id"); if (currentAtr != null) style.iD = currentAtr.getNodeValue(); currentAtr = attr.getNamedItem("xml:id"); if (currentAtr != null) style.iD = currentAtr.getNodeValue(); // we get the style it may be based upon currentAtr = attr.getNamedItem("style"); if (currentAtr != null) if (tto.styling.containsKey(currentAtr.getNodeValue())) style = new Style(style.iD, tto.styling.get(currentAtr.getNodeValue())); // we check for background color currentAtr = attr.getNamedItem("tts:backgroundColor"); if (currentAtr != null) style.backgroundColor = parseColor(currentAtr.getNodeValue(), tto); // we check for color currentAtr = attr.getNamedItem("tts:color"); if (currentAtr != null) style.color = parseColor(currentAtr.getNodeValue(), tto); // we check for font family currentAtr = attr.getNamedItem("tts:fontFamily"); if (currentAtr != null) style.font = currentAtr.getNodeValue(); // we check for font size currentAtr = attr.getNamedItem("tts:fontSize"); if (currentAtr != null) style.fontSize = currentAtr.getNodeValue(); // we check for italics currentAtr = attr.getNamedItem("tts:fontStyle"); if (currentAtr != null) if (currentAtr.getNodeValue().equalsIgnoreCase("italic") || currentAtr.getNodeValue().equalsIgnoreCase("oblique")) style.italic = true; else if (currentAtr.getNodeValue().equalsIgnoreCase("normal")) style.italic = false; // we check for bold currentAtr = attr.getNamedItem("tts:fontWeight"); if (currentAtr != null) if (currentAtr.getNodeValue().equalsIgnoreCase("bold")) style.bold = true; else if (currentAtr.getNodeValue().equalsIgnoreCase("normal")) style.bold = false; // we check opacity (to set the alpha) currentAtr = attr.getNamedItem("tts:opacity"); if (currentAtr != null) { try { // a number between 1.0 and 0 float alpha = Float.parseFloat(currentAtr.getNodeValue()); if (alpha > 1) alpha = 1; else if (alpha < 0) alpha = 0; String aa = Integer.toHexString((int) (alpha * 255)); if (aa.length() < 2) aa = "0" + aa; style.color = style.color.substring(0, 6) + aa; style.backgroundColor = style.backgroundColor.substring(0, 6) + aa; } catch (NumberFormatException e) { // ignore the alpha } } // we check for text align currentAtr = attr.getNamedItem("tts:textAlign"); if (currentAtr != null) if (currentAtr.getNodeValue().equalsIgnoreCase("left") || currentAtr.getNodeValue().equalsIgnoreCase("start")) style.textAlign = "bottom-left"; else if (currentAtr.getNodeValue().equalsIgnoreCase("right") || currentAtr.getNodeValue().equalsIgnoreCase("end")) style.textAlign = "bottom-right"; // we check for underline currentAtr = attr.getNamedItem("tts:textDecoration"); if (currentAtr != null) if (currentAtr.getNodeValue().equalsIgnoreCase("underline")) style.underline = true; else if (currentAtr.getNodeValue().equalsIgnoreCase("noUnderline")) style.underline = false; // we add the style tto.styling.put(style.iD, style); } // we parse the captions for (int i = 0; i < captionsN.getLength(); i++) { Caption caption = new Caption(); caption.content = ""; boolean validCaption = true; node = captionsN.item(i); NamedNodeMap attr = node.getAttributes(); // we get the begin time Node currentAtr = attr.getNamedItem("begin"); // if no begin is present, 0 is assumed caption.start = new Time("", ""); caption.end = new Time("", ""); if (currentAtr != null) caption.start.mseconds = parseTimeExpression(currentAtr.getNodeValue(), tto, doc); // we get the end time, if present, duration is ignored, otherwise end is calculated from // duration currentAtr = attr.getNamedItem("end"); if (currentAtr != null) caption.end.mseconds = parseTimeExpression(currentAtr.getNodeValue(), tto, doc); else { currentAtr = attr.getNamedItem("dur"); if (currentAtr != null) caption.end.mseconds = caption.start.mseconds + parseTimeExpression(currentAtr.getNodeValue(), tto, doc); else // no end or duration, invalid format, caption is discarded validCaption = false; } // we get the style currentAtr = attr.getNamedItem("style"); if (currentAtr != null) { Style style = tto.styling.get(currentAtr.getNodeValue()); if (style != null) caption.style = style; else // unrecognized style tto.warnings += "unrecoginzed style referenced: " + currentAtr.getNodeValue() + "\n\n"; } // we save the text NodeList textN = node.getChildNodes(); for (int j = 0; j < textN.getLength(); j++) { if (textN.item(j).getNodeName().equals("#text")) caption.content += textN.item(j).getTextContent().trim(); else if (textN.item(j).getNodeName().equals("br")) caption.content += "<br />"; } // is this check worth it? if (caption.content.replaceAll("<br />", "").trim().isEmpty()) validCaption = false; // and save the caption if (validCaption) { int key = caption.start.mseconds; // in case the key is already there, we increase it by a millisecond, since no duplicates // are allowed while (tto.captions.containsKey(key)) key++; tto.captions.put(key, caption); } } } catch (Exception e) { e.printStackTrace(); // this could be a fatal error... throw new FatalParsingException("Error during parsing: " + e.getMessage()); } tto.built = true; return tto; }