public ArrayList<String> parseXML() throws Exception { ArrayList<String> ret = new ArrayList<String>(); handshake(); URL url = new URL( "http://mangaonweb.com/page.do?cdn=" + cdn + "&cpn=book.xml&crcod=" + crcod + "&rid=" + (int) (Math.random() * 10000)); String page = DownloaderUtils.getPage(url.toString(), "UTF-8", cookies); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); InputSource is = new InputSource(new StringReader(page)); Document d = builder.parse(is); Element doc = d.getDocumentElement(); NodeList pages = doc.getElementsByTagName("page"); total = pages.getLength(); for (int i = 0; i < pages.getLength(); i++) { Element e = (Element) pages.item(i); ret.add(e.getAttribute("path")); } return (ret); }
private QueryResult gatherResultInfoForSelectQuery( String queryString, int queryNr, boolean sorted, Document doc, String[] rows) { Element root = doc.getRootElement(); // Get head information Element child = root.getChild("head", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); // Get result rows (<head>) List headChildren = child.getChildren( "variable", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); Iterator it = headChildren.iterator(); ArrayList<String> headList = new ArrayList<String>(); while (it.hasNext()) { headList.add(((Element) it.next()).getAttributeValue("name")); } List resultChildren = root.getChild("results", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")) .getChildren( "result", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); int nrResults = resultChildren.size(); QueryResult queryResult = new QueryResult(queryNr, queryString, nrResults, sorted, headList); it = resultChildren.iterator(); while (it.hasNext()) { Element resultElement = (Element) it.next(); String result = ""; // get the row values and paste it together to one String for (int i = 0; i < rows.length; i++) { List bindings = resultElement.getChildren( "binding", Namespace.getNamespace("http://www.w3.org/2005/sparql-results#")); String rowName = rows[i]; for (int j = 0; j < bindings.size(); j++) { Element binding = (Element) bindings.get(j); if (binding.getAttributeValue("name").equals(rowName)) if (result.equals("")) result += rowName + ": " + ((Element) binding.getChildren().get(0)).getTextNormalize(); else result += "\n" + rowName + ": " + ((Element) binding.getChildren().get(0)).getTextNormalize(); } } queryResult.addResult(result); } return queryResult; }
public static void parseDocumentFragment(Reader reader, XMLReceiver xmlReceiver) throws SAXException { try { final XMLReader xmlReader = newSAXParser(XMLUtils.ParserConfiguration.PLAIN).getXMLReader(); xmlReader.setContentHandler(new XMLFragmentReceiver(xmlReceiver)); final ArrayList<Reader> readers = new ArrayList<Reader>(3); readers.add(new StringReader("<root>")); readers.add(reader); readers.add(new StringReader("</root>")); xmlReader.parse(new InputSource(new SequenceReader(readers.iterator()))); } catch (IOException e) { throw new OXFException(e); } }
private void updateRecord(DataRecord r, ArrayList<String> fieldsInInport) { try { DataRecord rorig = (versionized ? dataAccess.getValidAt(r.getKey(), validAt) : dataAccess.get(r.getKey())); if (rorig == null) { logImportFailed( r, International.getString("Keine gültige Version des Datensatzes gefunden."), null); return; } // has the import record an InvalidFrom field? long invalidFrom = (versionized ? getInvalidFrom(r) : -1); if (invalidFrom <= rorig.getValidFrom()) { invalidFrom = -1; } boolean changed = false; for (int i = 0; i < fields.length; i++) { Object o = r.get(fields[i]); if ((o != null || fieldsInInport.contains(fields[i])) && !r.isKeyField(fields[i]) && !fields[i].equals(DataRecord.LASTMODIFIED) && !fields[i].equals(DataRecord.VALIDFROM) && !fields[i].equals(DataRecord.INVALIDFROM) && !fields[i].equals(DataRecord.INVISIBLE) && !fields[i].equals(DataRecord.DELETED)) { Object obefore = rorig.get(fields[i]); rorig.set(fields[i], o); if ((o != null && !o.equals(obefore)) || (o == null && obefore != null)) { changed = true; } } } if (invalidFrom <= 0) { long myValidAt = getValidFrom(r); if (!versionized || updMode.equals(UPDMODE_UPDATEVALIDVERSION) || rorig.getValidFrom() == myValidAt) { if (changed) { dataAccess.update(rorig); } setCurrentWorkDone(++importCount); } if (versionized && updMode.equals(UPPMODE_CREATENEWVERSION) && rorig.getValidFrom() != myValidAt) { if (changed) { dataAccess.addValidAt(rorig, myValidAt); } setCurrentWorkDone(++importCount); } } else { dataAccess.changeValidity(rorig, rorig.getValidFrom(), invalidFrom); setCurrentWorkDone(++importCount); } } catch (Exception e) { logImportFailed(r, e.toString(), e); } }
// Add/Insert public int add(Script script) { // Find the correct spot to add it alphabetically int i, limit; for (i = 0, limit = scripts.size(); i < limit; i++) { Script scriptTemp = (Script) scripts.get(i); if (scriptTemp.getName().compareTo(script.getName()) >= 0) { break; } } scripts.add(i, script); // Update the table fireTableRowsInserted(i, i); return i; }
public boolean isNameUnique(String name) { for (int i = 0, limit = scripts.size(); i < limit; i++) { if (name.equals(get(i).getName())) { return false; } } return true; }
/** Generates array of XContour from local contours and modules. Used for TTF building. */ private XContour[] toContours() { XContour[] retval; ArrayList<XContour> list = new ArrayList<>(); XContour[] contours = m_glyph.getBody().getContour(); for (int i = 0; i < contours.length; i++) { EContour contour = (EContour) contours[i]; list.add(contour.toQuadratic()); } // for i XModule[] modules = m_glyph.getBody().getModule(); for (int i = 0; i < modules.length; i++) { EModuleInvoke module = (EModuleInvoke) modules[i]; // push and pop happens inside toContour list.add(module.toContour(new AffineTransform())); } // for i if (list.size() == 0) return null; retval = new XContour[list.size()]; for (int i = 0; i < list.size(); i++) { retval[i] = list.get(i); } // for i return retval; }
public TTGlyph toSimpleGlyph() { // convert the file into array of contours XContour[] contours = toContours(); if ((contours == null) && (!isRequiredGlyph())) { return null; } // if TTGlyph retval = new TTGlyph(); retval.setSimple(true); retval.setAdvanceWidth(getAdvanceWidth()); if (contours == null) { return retval; } // if ArrayList<EContourPoint> points = new ArrayList<>(); for (int i = 0; i < contours.length; i++) { XContour contour = contours[i]; XContourPoint[] contourPoints = contour.getContourPoint(); for (int j = 0; j < contourPoints.length; j++) { points.add((EContourPoint) contourPoints[j]); } // for j retval.addEndPoint(points.size() - 1); } // for i for (EContourPoint point : points) { loadContourPoint(retval, point); } // for point boolean hasGridfit = false; // I need int i here. for (int i = 0; i < points.size(); i++) { EContourPoint point = points.get(i); if (!point.isRounded()) { continue; } // if hasGridfit = true; loadGridfit(retval, point, i); } // for i if (hasGridfit) { retval.addInstruction(TTGlyph.IUP1); retval.addInstruction(TTGlyph.IUP0); } // if // I need int i here. for (int i = 0; i < points.size(); i++) { EContourPoint point = points.get(i); if (point.getHint().length == 0) { continue; } // if loadHint(retval, point, i); } // for i return retval; }
public int indexOf(String name) { for (int i = 0, limit = scripts.size(); i < limit; i++) { Script script = get(i); if (script.getName().equals(name)) { return i; } } return -1; }
public boolean download(DownloadListener dl) throws Exception { // 1) get crcod, cdn, and cookies handshake(); // 2) get XML ArrayList<String> paths = parseXML(); dl.setTotal(getTotal()); // 3) get pages byte[] key = { 99, 49, 51, 53, 100, 54, 56, 56, 57, 57, 99, 56, 50, 54, 99, 101, 100, 55, 99, 52, 57, 98, 99, 55, 54, 97, 97, 57, 52, 56, 57, 48 }; BlowFishKey bfkey = new BlowFishKey(key); for (int i = 0; i < paths.size(); i++) { if (dl.isDownloadAborted()) return (true); // rid is just a random number from 0-9999 URL url = new URL( "http://mangaonweb.com/page.do?cdn=" + cdn + "&cpn=" + paths.get(i) + "&crcod=" + crcod + "&rid=" + (int) (Math.random() * 10000)); byte[] encrypted = downloadByteArray(url); bfkey.decrypt(encrypted, 0); RandomAccessFile output = new RandomAccessFile(dl.downloadPath(this, i), "rw"); output.write(encrypted); output.close(); dl.downloadIncrement(this); } dl.downloadFinished(this); return (true); }
public void endElement(String uri, String sName, String qName) { if (qName.equals("PROPSTABLE")) { } else if (qName.equals("PROPSROW")) { if (PROPSROW_DESC == null || (curPROPSROW_DESC != null && curPROPSROW_DESC.equals(PROPSROW_DESC))) propsAl.add(curProps); } else { // LogUtil.fine("/"+qName); textFlag = false; } }
/** * Populates LOCALES list with contents of xml. * * @param list the configuration list */ private static void parseLocales(NodeList list) { for (int i = 0; i < list.getLength(); ++i) { Node node = list.item(i); NamedNodeMap attributes = node.getAttributes(); String label = ((Attr) attributes.getNamedItem("label")).getValue(); String code = ((Attr) attributes.getNamedItem("isoCode")).getValue(); String dictLocation = ((Attr) attributes.getNamedItem("dictionaryUrl")).getValue(); try { LOCALES.add(new Locale(label, code, new URL(dictLocation))); } catch (MalformedURLException exc) { logger.warn( "Unable to parse dictionary location of " + label + " (" + dictLocation + ")", exc); } } }
public void endElement(String uri, String localName, String qname) { super.endElement(uri, localName, qname); if (record != null && localName.equals(DataRecord.ENCODING_RECORD)) { // end of record if (dataImport.importRecord(record, fieldsInImport)) { count++; } record = null; fieldsInImport = null; } String fieldValue = getFieldValue(); if (record != null && fieldValue != null) { // end of field try { if (textImport) { if (!record.setFromText(fieldName, fieldValue.trim())) { dataImport.logImportWarning( record, "Value '" + fieldValue + "' for Field '" + fieldName + "' corrected to '" + record.getAsText(fieldName) + "'"); } } else { record.set(fieldName, fieldValue.trim()); } String[] equivFields = record.getEquivalentFields(fieldName); for (String f : equivFields) { fieldsInImport.add(f); } } catch (Exception esetvalue) { dataImport.logImportWarning( record, "Cannot set value '" + fieldValue + "' for Field '" + fieldName + "': " + esetvalue.toString()); } } }
/** * this is fired when a tag start event is found on an xml document * * @param uri namespace for tag being processed * @param localName tag name * @param qName fully qualified name for tag * @param attributes tag attributes * @throws SAXException if parsing fails */ @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { // if a class is being parsed if (qName.equalsIgnoreCase(getTagName())) { // set class name from xml attributes mClassName = attributes.getValue(TAG_ATTRIBUTES.name.toString()); // if an attribute is being parsed } else if (qName.equalsIgnoreCase(AttributeDescriptor.TAG_NAME)) { // create an attribute descriptor AttributeDescriptor attribute = new AttributeDescriptor(); // add an attribute to this class mAttributes.add(attribute); // let the attribute parse itself mCtx.pushHandler(attribute); // forward event to new handler attribute.startElement(uri, localName, qName, attributes); } }
public String getText() { String text = accumulator.toString(); if (text.trim().length() == 0) { return ""; } /*text = text.replace("\n", " "); text = text.replace(" ", " ");*/ if (counting) { /* StringTokenizer st = new StringTokenizer(text, delimiters, true); int count = 0; while(st.hasMoreTokens()) { String token = st.nextToken().trim(); if (token.length() == 0) { continue; } count++; } */ int i = currentPatentIndex; int count = text.length(); while (i < patents.size()) { PatentItem currentPatent = patents.get(i); if (currentPatent != null) { int startOffset = currentPatent.getOffsetBegin(); int endOffset = currentPatent.getOffsetEnd(); if ((startOffset >= offset) && (endOffset <= offset + count)) { String context = currentPatent.getContext(); /*System.out.println("OFFSET: " + offset); System.out.println("count: " + count); System.out.println("startOffset: " + startOffset); System.out.println("endOffset: " + endOffset); System.out.println("context: " + context); System.out.println("text: " + text);*/ String target = ""; if (context.charAt(0) == ' ') { target = " <ref type=\"patent\">" + context.substring(1, context.length()) + "</ref>"; } else { target = "<ref type=\"patent\">" + context + "</ref>"; } text = text.replace(context, target); currentPatentIndex = i; } } i++; } // i = currentArticleIndex; i = 0; while (i < articles.size()) { BibDataSet currentArticle = articles.get(i); if (currentArticle != null) { List<Integer> offsets = currentArticle.getOffsets(); int startOffset = -1; int endOffset = -1; String context = currentArticle.getRawBib().trim(); if (offsets.size() > 0) { if (offsets.get(0) != null) { startOffset = offsets.get(0).intValue(); /*StringTokenizer stt = new StringTokenizer(context, delimiters, true); int count2 = 0; while(stt.hasMoreTokens()) { String token2 = stt.nextToken().trim(); if (token2.length() == 0) { continue; } count2++; }*/ // endOffset = offsets.get(1).intValue(); endOffset = startOffset + context.length(); } } // if ( (startOffset >= offset) && (endOffset <= offset+count) ) { if ((startOffset >= offset)) { /*System.out.println("OFFSET: " + offset); System.out.println("count: " + count); System.out.println("startOffset: " + startOffset); System.out.println("endOffset: " + endOffset); System.out.println("context: " + context); System.out.println("text: " + text);*/ String target = " <ref type=\"npl\">" + context + "</ref> "; text = text.replace(context, target); currentArticleIndex = i; } } i++; } offset += count; } return text; }
// Getters public Script get(int i) { return (Script) scripts.get(i); }
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws SAXException { if (qName.equals("date")) { accumulator.setLength(0); } else if (qName.equals("ref") || qName.equals("bibl")) { String refString = getRefText(); refString = refString.replace("\n", " "); refString = refString.replace("\t", " "); refString = refString.replace(" ", " "); if (npl && ref) { if (referencesNPL == null) referencesNPL = new ArrayList<String>(); referencesNPL.add(refString); refFound = true; if (nplReferences) nbNPLRef++; } else if (ref) { if (referencesPatent == null) { referencesPatent = new HashMap<String, ArrayList<String>>(); } ArrayList<String> refss = referencesPatent.get(currentFileName); if (refss == null) { refss = new ArrayList<String>(); } refss.add(refString); referencesPatent.put(currentFileName, refss); refFound = true; if (patentReferences) { nbPatentRef++; } } if (refFound) { // we tokenize the text // ArrayList<String> tokens = TextUtilities.segment(refString, // "[("+TextUtilities.punctuations); // StringTokenizer st = new StringTokenizer(refString, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(refString); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } int i = 0; // String token = null; // for(String token : tokens) { // while (st.hasMoreTokens()) { for (String token : tokenizations) { // token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } try { accumulatedText.append(token + "\t"); allContent.append(token + " "); if (npl) { if (nplReferences) { if (i == 0) { // accumulatedText.append("refNPLBegin\n"); accumulatedText.append("I-<refNPL>\n"); } else if (token == null) { // accumulatedText.append("refNPLEnd\n"); accumulatedText.append("E-<refNPL>\n"); } else { accumulatedText.append("<refNPL>\n"); } } else accumulatedText.append("<other>\n"); } else { if (patentReferences) { if (i == 0) accumulatedText.append("I-<refPatent>\n"); else if (token == null) accumulatedText.append("E-<refPatent>\n"); else accumulatedText.append("<refPatent>\n"); } else accumulatedText.append("<other>\n"); } } catch (Exception e) { // e.printStackTrace(); throw new GrobidException("An exception occured while running Grobid.", e); } i++; } } ref = false; } else if (qName.equals("classification-ipcr")) { accumulator.setLength(0); } else if (qName.equals("classification-symbol")) { accumulator.setLength(0); } else if (qName.equals("abstract")) { accumulator.setLength(0); } else if (qName.equals("heading")) { accumulator.append(" "); } else if (qName.equals("description")) { if (refFound) { String content = getText(); // we tokenize the text // ArrayList<String> tokens = TextUtilities.segment(content, // "[("+TextUtilities.punctuations); // StringTokenizer st = new StringTokenizer(content, delimiters, true); List<String> tokenizations = new ArrayList<String>(); try { // TBD: pass a language object to the tokenize method call tokenizations = analyzer.tokenize(content); } catch (Exception e) { LOGGER.debug("Tokenization for XML patent document has failed."); } int i = 0; // String token = null; // for(String token : tokens) { // while (st.hasMoreTokens()) { for (String token : tokenizations) { // token = st.nextToken().trim(); if ((token.trim().length() == 0) || (token.equals(" ")) || (token.equals("\t")) || (token.equals("\n")) || (token.equals("\r"))) { continue; } // we print only a window of N words if ((i > N) && (N != -1)) { // break; token = token.trim(); if (token.length() > 0) { accumulatedText.append(token + "\t" + "<ignore>\n"); allContent.append(token + " "); } } else { try { token = token.trim(); if (token.length() > 0) { accumulatedText.append(token + "\t" + "<other>\n"); allContent.append(token + " "); } } catch (Exception e) { throw new GrobidException("An exception occured while running Grobid.", e); } } i++; } accumulator.setLength(0); refFound = false; } } else if (qName.equals("patcit")) { // we register the citation, the citation context will be marked in a later stage if (citations == null) citations = new ArrayList<String>(); citations.add(cited_number); accumulator.setLength(0); } else if (qName.equals("invention-title")) { accumulator.setLength(0); } else if (qName.equals("applicants")) { accumulator.setLength(0); } else if (qName.equals("inventors")) { accumulator.setLength(0); } else if (qName.equals("document-id")) { accumulator.setLength(0); } else if (qName.equals("legal-status")) { accumulator.setLength(0); } else if (qName.equals("bibliographic-data")) { accumulator.setLength(0); } else if (qName.equals("doc-number")) { accumulator.setLength(0); } else if (qName.equals("country")) { accumulator.setLength(0); } else if (qName.equals("kind")) { accumulator.setLength(0); } else if (qName.equals("classification-symbol")) { accumulator.setLength(0); } else if (qName.equals("classification-ecla")) { accumulator.setLength(0); } else if (qName.equals("patent-document") || qName.equals("fulltext-document")) { String allString = allContent.toString(); journalsPositions = lexicon.inJournalNames(allString); abbrevJournalsPositions = lexicon.inAbbrevJournalNames(allString); conferencesPositions = lexicon.inConferenceNames(allString); publishersPositions = lexicon.inPublisherNames(allString); allContent = null; allString = null; } else if (qName.equals("row")) { accumulator.append(" "); } else if (qName.equals("p")) { accumulator.append("\n"); } }
// Misc Accessors public int getSize() { return scripts.size(); }
public static PetriNet convert(ConfigurableEPC baseEPC) { HashMap<EPCFunction, Transition> functionActivityMapping; HashMap<EPCConnector, Place> xorconnectorChoiceMapping; // HV: Initialize the mappings. functionActivityMapping = new HashMap<EPCFunction, Transition>(); xorconnectorChoiceMapping = new HashMap<EPCConnector, Place>(); // Check to use the weights if necessary // HV: Add both mappings. On completion, these will be filledd. PetriNet petrinet = EPCToPetriNetConverter.convert( baseEPC, new HashMap(), functionActivityMapping, xorconnectorChoiceMapping); HashSet visible = new HashSet(); // HV: The next block is taken care of by the functionActivityMapping // below. /* * Iterator it = petrinet.getTransitions().iterator(); while * (it.hasNext()) { Transition t = (Transition) it.next(); if (t.object * instanceof EPCFunction) { // if (t.getLogEvent() != null) { // Add * transitions with LogEvent (i.e. referring to functions) * visible.add(t); } } */ // HV: Prevent the places mapped onto from being reduced. visible.addAll(functionActivityMapping.values()); visible.addAll(xorconnectorChoiceMapping.values()); Message.add(visible.toString(), Message.DEBUG); Iterator it = petrinet.getPlaces().iterator(); while (it.hasNext()) { Place p = (Place) it.next(); if (p.inDegree() * p.outDegree() == 0) { // Add Initial and final places to visible, i.e. places that // refer to in and output events visible.add(p); } } // Reduce the PetriNet with Murata rules, while keeping the visible ones PetriNetReduction pnred = new PetriNetReduction(); pnred.setNonReducableNodes(visible); HashMap pnMap = new HashMap(); // Used to map pre-reduction nodes to // post-reduction nodes. PetriNet reduced = pnred.reduce(petrinet, pnMap); if (reduced != petrinet) { // Update both mappings from pre-reduction nodes to post-reduction // nodes. HashMap<EPCFunction, Transition> newFunctionActivityMapping = new HashMap<EPCFunction, Transition>(); for (EPCFunction function : functionActivityMapping.keySet()) { Transition transition = (Transition) functionActivityMapping.get(function); if (pnMap.keySet().contains(transition)) { newFunctionActivityMapping.put(function, (Transition) pnMap.get(transition)); } } functionActivityMapping = newFunctionActivityMapping; HashMap<EPCConnector, Place> newXorconnectorChoiceMapping = new HashMap<EPCConnector, Place>(); for (EPCConnector connector : xorconnectorChoiceMapping.keySet()) { Place place = (Place) xorconnectorChoiceMapping.get(connector); if (pnMap.keySet().contains(place)) { newXorconnectorChoiceMapping.put(connector, (Place) pnMap.get(place)); } } xorconnectorChoiceMapping = newXorconnectorChoiceMapping; } reduced.makeClusters(); // filter the \nunknown:normal ArrayList<Transition> alTrans = reduced.getVisibleTasks(); for (int i = 0; i < alTrans.size(); i++) { Transition t = alTrans.get(i); String id = t.getIdentifier(); int idx = id.indexOf("\\nunknown:normal"); if (idx > 0) { id = id.substring(0, idx); } // �˴������ֵ��ѯ�滻���е�label String mappedId = htDict.get(id); if (mappedId != null) { t.setIdentifier(mappedId); } else { t.setIdentifier(id); } } return reduced; }
public int runCsvImport() { int count = 0; try { int linecnt = 0; String[] header = null; ArrayList<String> fieldsInImport = new ArrayList<String>(); BufferedReader f = new BufferedReader(new InputStreamReader(new FileInputStream(filename), encoding)); String s; DataRecord dummyRecord = storageObject.createNewRecord(); while ((s = f.readLine()) != null) { s = s.trim(); if (s.length() == 0) { continue; } Vector<String> fields = splitFields(s); if (fields.size() > 0) { if (linecnt == 0) { // header header = new String[fields.size()]; for (int i = 0; i < fields.size(); i++) { header[i] = fields.get(i); if (header[i].startsWith("#") && header[i].endsWith("#") && header.length > 2) { header[i] = header[i].substring(1, header[i].length() - 1).trim(); overrideKeyField = header[i]; } String[] equivFields = dummyRecord.getEquivalentFields(header[i]); for (String ef : equivFields) { fieldsInImport.add(ef); } } } else { // fields DataRecord r = storageObject.createNewRecord(); for (int i = 0; i < header.length; i++) { String value = (fields.size() > i ? fields.get(i) : null); if (value != null && value.length() > 0) { try { if (!r.setFromText(header[i], value.trim())) { logImportWarning( r, "Value '" + value + "' for Field '" + header[i] + "' corrected to '" + r.getAsText(header[i]) + "'"); } } catch (Exception esetvalue) { logImportWarning( r, "Cannot set value '" + value + "' for Field '" + header[i] + "': " + esetvalue.toString()); } } } if (importRecord(r, fieldsInImport)) { count++; } } } linecnt++; } f.close(); } catch (Exception e) { logInfo(e.toString()); errorCount++; Logger.log(e); if (Daten.isGuiAppl()) { Dialog.error(e.toString()); } } return count; }
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (qName.equals("PAGE")) { int length = atts.getLength(); currentPage++; // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) {; } else if (name.equals("number")) { } else if (name.equals("width")) { } else if (name.equals("height")) { } } } /* * if (block != null) { blabla.append("\n"); * tokenizations.add("\n"); block.setText(blabla.toString()); * block.setNbTokens(nbTokens); doc.addBlock(block); } Block block0 * = new Block(); block0.setText("@PAGE\n"); block0.setNbTokens(0); * doc.addBlock(block0); */ /* * block = new Block(); blabla = new StringBuffer(); nbTokens = 0; * //blabla.append("\n@block\n"); tokenizations.add("\n"); */ } else if (qName.equals("BLOCK")) { block = new Block(); blabla = new StringBuffer(); nbTokens = 0; block.setPage(currentPage); // blabla.append("\n@block\n"); } else if (qName.equals("IMAGE")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("href")) { // if (images == null) // images = new ArrayList<String>(); images.add(value); } else if (name.equals("x")) { double x = Double.parseDouble(value); if (x != currentX) { currentX = x; } } else if (name.equals("y")) { double y = Double.parseDouble(value); if (y != currentY) { currentY = y; } } else if (name.equals("width")) { double width = Double.parseDouble(value); if (width != currentWidth) { currentWidth = width; } } else if (name.equals("height")) { double height = Double.parseDouble(value); if (height != currentHeight) { currentHeight = height; } } } } } else if (qName.equals("TEXT")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) { } else if (name.equals("x")) { } else if (name.equals("y")) { } else if (name.equals("width")) { } else if (name.equals("height")) { } } } } else if (qName.equals("TOKEN")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("id")) {; } else if (name.equals("font-name")) { if (!value.equals(currentFont)) { currentFont = value; blabla.append(" "); } } else if (name.equals("font-size")) { double fontSize = Double.parseDouble(value); if (fontSize != currentFontSize) { currentFontSize = fontSize; blabla.append(" "); } } else if (name.equals("bold")) { if (value.equals("yes")) { currentBold = true; } else { currentBold = false; } } else if (name.equals("italic")) { if (value.equals("yes")) { currentItalic = true; } else { currentItalic = false; } } else if (name.equals("font-color")) { if (!value.equals(colorFont)) { colorFont = value; } } else if (name.equals("rotation")) { if (value.equals("0")) currentRotation = false; else currentRotation = true; } else if (name.equals("x")) { double x = Double.parseDouble(value); if (x != currentX) { currentX = x; } } else if (name.equals("y")) { double y = Double.parseDouble(value); if (y != currentY) { currentY = y; } } else if (name.equals("base")) { double base = Double.parseDouble(value); } else if (name.equals("width")) { double width = Double.parseDouble(value); if (width != currentWidth) { currentWidth = width; } } else if (name.equals("height")) { double height = Double.parseDouble(value); if (height != currentHeight) { currentHeight = height; } } } } } else if (qName.equals("xi:include")) { int length = atts.getLength(); // Process each attribute for (int i = 0; i < length; i++) { // Get names and values for each attribute String name = atts.getQName(i); String value = atts.getValue(i); if ((name != null) && (value != null)) { if (name.equals("href")) { // if (images == null) // images = new ArrayList<String>(); images.add(value); } } } } // accumulator.setLength(0); }
// Remove public void remove(int i) { scripts.remove(i); fireTableRowsDeleted(i, i); }
public void endElement(java.lang.String uri, java.lang.String localName, java.lang.String qName) throws SAXException { // if (!qName.equals("TOKEN") && !qName.equals("BLOCK") && // !qName.equals("TEXT")) // System.out.println(qName); if (qName.equals("TEXT")) { blabla.append("\n"); LayoutToken token = new LayoutToken(); token.setText("\n"); block.addToken(token); accumulator.setLength(0); tokenizations.add("\n"); } else if (qName.equals("METADATA")) { accumulator.setLength(0); } else if (qName.equals("TOKEN")) { String tok0 = TextUtilities.clean(getText()); if (block.getStartToken() == -1) { block.setStartToken(tokenizations.size()); } if (tok0.length() > 0) { StringTokenizer st = new StringTokenizer(tok0, TextUtilities.fullPunctuations, true); boolean diaresis = false; boolean accent = false; boolean keepLast = false; while (st.hasMoreTokens()) { diaresis = false; accent = false; keepLast = false; String tok = st.nextToken(); if (tok.length() > 0) { LayoutToken token = new LayoutToken(); if ((previousToken != null) && (tok != null) && (previousToken.length() > 0) && (tok.length() > 0) && blabla.length() > 0) { Character leftChar = previousTok.getText().charAt(previousTok.getText().length() - 1); Character rightChar = tok.charAt(0); ModifierClass leftClass = classifyChar(leftChar); ModifierClass rightClass = classifyChar(rightChar); ModifierClass modifierClass = ModifierClass.NOT_A_MODIFIER; if (leftClass != ModifierClass.NOT_A_MODIFIER || rightClass != ModifierClass.NOT_A_MODIFIER) { Character baseChar = null; Character modifierChar = null; if (leftClass != ModifierClass.NOT_A_MODIFIER) { if (rightClass != ModifierClass.NOT_A_MODIFIER) { // assert false; // keeping characters, but setting class // to not a modifier baseChar = leftChar; modifierChar = rightChar; modifierClass = ModifierClass.NOT_A_MODIFIER; } else { baseChar = rightChar; modifierChar = leftChar; modifierClass = leftClass; } } else { baseChar = leftChar; modifierChar = rightChar; modifierClass = rightClass; } String updatedChar = modifyCharacter(baseChar, modifierChar); tokenizations.remove(tokenizations.size() - 1); if (tokenizations.size() > 0) { tokenizations.remove(tokenizations.size() - 1); } blabla.deleteCharAt(blabla.length() - 1); if (blabla.length() > 0) { blabla.deleteCharAt(blabla.length() - 1); } removeLastCharacterIfPresent(previousTok); if (updatedChar != null) { blabla.append(updatedChar); previousTok.setText(previousTok.getText() + updatedChar); } blabla.append(tok.substring(1, tok.length())); previousTok.setText(previousTok.getText() + tok.substring(1, tok.length())); tokenizations.add(previousTok.getText()); diaresis = (modifierClass == ModifierClass.DIAERESIS || modifierClass == ModifierClass.NORDIC_RING || modifierClass == ModifierClass.CZECH_CARON || modifierClass == ModifierClass.TILDE || modifierClass == ModifierClass.CEDILLA); accent = (modifierClass == ModifierClass.ACUTE_ACCENT || modifierClass == ModifierClass.CIRCUMFLEX || modifierClass == ModifierClass.GRAVE_ACCENT); if (rightClass != ModifierClass.NOT_A_MODIFIER) { tok = ""; // resetting current token as it // is a single-item } } } if (tok != null) { // actually in certain cases, the extracted string under token can be a chunk of text // with separators that need to be preserved // tok = tok.replace(" ", ""); } if ((!diaresis) && (!accent)) { // blabla.append(" "); blabla.append(tok); token.setText(tok); tokenizations.add(tok); } else { tok = ""; keepLast = true; } /* * StringTokenizer st0 = new StringTokenizer(tok0, * TextUtilities.fullPunctuations, true); * while(st0.hasMoreTokens()) { String tok = * st0.nextToken(); tokenizations.add(tok); } * tokenizations.add(" "); */ /* * boolean punct1 = false; boolean punct2 = false; * boolean punct3 = false; String content = null; int i * = 0; for(; i<TextUtilities.punctuations.length(); * i++) { if (tok.length() > 0) { if * (tok.charAt(tok.length()-1) == * TextUtilities.punctuations.charAt(i)) { punct1 = * true; content = tok.substring(0, tok.length()-1); if * (tok.length() > 1) { int j = 0; for(; * j<TextUtilities.punctuations.length(); j++) { if * (tok.charAt(tok.length()-2) == * TextUtilities.punctuations.charAt(j)) { punct3 = * true; content = tok.substring(0, tok.length()-2); } } * } break; } } } if (tok.length() > 0) { if ( * (tok.startsWith("(")) && (tok.length() > 1) ) { if * ((punct3) && (tok.length() > 2)) content = * tok.substring(1, tok.length()-2); else if (punct1) * content = tok.substring(1, tok.length()-1); else * content = tok.substring(1, tok.length()); punct2 = * true; token.setText("("); } else if ( * (tok.startsWith("[")) && (tok.length() > 1) ) { if * ((punct3) && (tok.length() > 2)) content = * tok.substring(1, tok.length()-2); else if (punct1) * content = tok.substring(1, tok.length()-1); else * content = tok.substring(1, tok.length()); punct2 = * true; token.setText("["); } else if ( * (tok.startsWith("\"")) && (tok.length() > 1) ) { if * ((punct3) && (tok.length() > 2)) content = * tok.substring(1, tok.length()-2); else if (punct1) * content = tok.substring(1, tok.length()-1); else * content = tok.substring(1, tok.length()); punct2 = * true; token.setText("\""); } } */ if (currentRotation) currentFontSize = currentFontSize / 2; /* * if (punct2) { if (currentFont != null) * token.setFont(currentFont.toLowerCase()); else * token.setFont("default"); * token.setItalic(currentItalic); * token.setBold(currentBold); * token.setRotation(currentRotation); * token.setColorFont(colorFont); token.setX(currentX); * token.setY(currentY); token.setWidth(currentWidth); * token.setHeight(currentHeight); * token.setFontSize(currentFontSize); * block.addToken(token); * * token = new LayoutToken(); token.setText(content); } * if (punct1) { token.setText(content); if (currentFont * != null) token.setFont(currentFont.toLowerCase()); * else token.setFont("default"); * token.setItalic(currentItalic); * token.setBold(currentBold); * token.setRotation(currentRotation); * token.setColorFont(colorFont); token.setX(currentX); * token.setY(currentY); token.setWidth(currentWidth); * token.setHeight(currentHeight); * token.setFontSize(currentFontSize); * block.addToken(token); * * if (punct3) { token = new LayoutToken(); * token.setText(""+tok.charAt(tok.length()-2)); if * (currentFont != null) * token.setFont(currentFont.toLowerCase()); else * token.setFont("default"); * token.setItalic(currentItalic); * token.setBold(currentBold); * token.setRotation(currentRotation); * token.setColorFont(colorFont); token.setX(currentX); * token.setY(currentY); token.setWidth(currentWidth); * token.setHeight(currentHeight); * token.setFontSize(currentFontSize); * block.addToken(token); } * * token = new LayoutToken(); * token.setText(""+tok.charAt(tok.length()-1)); } */ if (currentFont != null) token.setFont(currentFont.toLowerCase()); else token.setFont("default"); token.setItalic(currentItalic); token.setBold(currentBold); token.setRotation(currentRotation); token.setColorFont(colorFont); token.setX(currentX); token.setY(currentY); token.setWidth(currentWidth); token.setHeight(currentHeight); token.setFontSize(currentFontSize); if (!diaresis && !accent) { block.addToken(token); } if (block.getFont() == null) { if (currentFont != null) block.setFont(currentFont.toLowerCase()); else token.setFont("default"); } if (nbTokens == 0) { block.setItalic(currentItalic); block.setBold(currentBold); } if (block.getColorFont() == null) block.setColorFont(colorFont); if (block.getX() == 0.0) block.setX(currentX); if (block.getY() == 0.0) block.setY(currentY); if (block.getWidth() == 0.0) block.setWidth(currentWidth); if (block.getHeight() == 0.0) block.setHeight(currentHeight); if (block.getFontSize() == 0.0) block.setFontSize(currentFontSize); if (!diaresis && !accent) { previousToken = tok; previousTok = token; } else { previousToken = previousTok.getText(); } nbTokens++; accumulator.setLength(0); } } if (tokenizations.size() > 0) { String justBefore = tokenizations.get(tokenizations.size() - 1); if (!justBefore.endsWith("-")) { tokenizations.add(" "); blabla.append(" "); } } } block.setEndToken(tokenizations.size()); } else if (qName.equals("PAGE")) { // page marker are usefull to detect headers (same first line(s) // appearing on each page) if (block != null) { blabla.append("\n"); tokenizations.add("\n"); block.setText(blabla.toString()); block.setNbTokens(nbTokens); doc.addBlock(block); } Block block0 = new Block(); block0.setText("@PAGE\n"); block0.setNbTokens(0); block0.setPage(currentPage); doc.addBlock(block0); block = new Block(); block.setPage(currentPage); blabla = new StringBuffer(); nbTokens = 0; // blabla.append("\n@block\n"); tokenizations.add("\n"); } else if (qName.equals("IMAGE")) { if (block != null) { blabla.append("\n"); block.setText(blabla.toString()); block.setNbTokens(nbTokens); doc.addBlock(block); } block = new Block(); block.setPage(currentPage); blabla = new StringBuffer(); if (images.size() > 0) { blabla.append("@IMAGE " + images.get(images.size() - 1) + "\n"); } block.setText(blabla.toString()); block.setNbTokens(nbTokens); if (block.getX() == 0.0) block.setX(currentX); if (block.getY() == 0.0) block.setY(currentY); if (block.getWidth() == 0.0) block.setWidth(currentWidth); if (block.getHeight() == 0.0) block.setHeight(currentHeight); doc.addBlock(block); blabla = new StringBuffer(); nbTokens = 0; block = new Block(); block.setPage(currentPage); } /* * else if (qName.equals("VECTORIALIMAGES")) { if (block != null) { * blabla.append("\n"); block.setText(blabla.toString()); * block.setNbTokens(nbTokens); doc.addBlock(block); } block = new * Block(); block.setPage(currentPage); blabla = new StringBuffer(); * blabla.append("@IMAGE " + "vectorial \n"); * block.setText(blabla.toString()); block.setNbTokens(nbTokens); if * (block.getX() == 0.0) block.setX(currentX); if (block.getY() == 0.0) * block.setY(currentY); if (block.getWidth() == 0.0) * block.setWidth(currentWidth); if (block.getHeight() == 0.0) * block.setHeight(currentHeight); doc.addBlock(block); blabla = new * StringBuffer(); nbTokens = 0; block = new Block(); * block.setPage(currentPage); } */ else if (qName.equals("BLOCK")) { blabla.append("\n"); tokenizations.add("\n"); block.setText(blabla.toString()); block.setNbTokens(nbTokens); block.setWidth(currentX - block.getX() + currentWidth); block.setHeight(currentY - block.getY() + currentHeight); doc.addBlock(block); // blabla = new StringBuffer(); nbTokens = 0; block = null; } else if (qName.equals("xi:include")) { if (block != null) { blabla.append("\n"); block.setText(blabla.toString()); block.setNbTokens(nbTokens); doc.addBlock(block); } block = new Block(); block.setPage(currentPage); blabla = new StringBuffer(); blabla.append("@IMAGE " + images.get(images.size() - 1) + "\n"); block.setText(blabla.toString()); block.setNbTokens(nbTokens); doc.addBlock(block); blabla = new StringBuffer(); nbTokens = 0; block = new Block(); block.setPage(currentPage); } /* * else if (qName.equals("DOCUMENT")) { * System.out.println(blabla.toString()); } */ }