/** * Add a request to the List of requests looking in this subindex * * @param request */ void addRequest(FindRequest request) { if (fetchStatus == FetchStatus.FETCHED) request.setStage(FindRequest.Stages.PARSE); else request.setStage(FindRequest.Stages.FETCHSUBINDEX); synchronized (waitingOnSubindex) { waitingOnSubindex.add(request); } }
/** * Puts request into the dependency List of either the main index or the subindex depending on * whether the main index is availiable * * @param request * @throws freenet.client.FetchException * @throws java.net.MalformedURLException */ private synchronized void setdependencies(FindRequest request) throws FetchException, MalformedURLException { // Logger.normal(this, "setting dependencies for "+request+" on "+this.toString()); if (fetchStatus != FetchStatus.FETCHED) { waitingOnMainIndex.add(request); request.setStage(FindRequest.Stages.FETCHROOT); startFetch(false); } else { request.setStage(FindRequest.Stages.FETCHSUBINDEX); SubIndex subindex = getSubIndex(request.getSubject()); subindex.addRequest(request); // Logger.normal(this, "STarting "+getSubIndex(request.getSubject())+" to look for // "+request.getSubject()); if (executor != null) executor.execute(subindex, "Subindex:" + subindex.getFileName()); else (new Thread(subindex, "Subindex:" + subindex.getFileName())).start(); } }
public void parseSubIndex() throws TaskAbortException { synchronized (parsingSubindex) { // Transfer all requests waiting on this subindex to the parsing list synchronized (waitingOnSubindex) { parsingSubindex.addAll(waitingOnSubindex); waitingOnSubindex.removeAll(parsingSubindex); } // Set status of all those about to be parsed to PARSE for (FindRequest r : parsingSubindex) r.setStage(FindRequest.Stages.PARSE); // Multi-stage parse to minimise memory usage. // Stage 1: Extract the declaration (first tag), copy everything before "<files " to one // bucket, plus everything after "</files>". // Copy the declaration, plus everything between the two (inclusive) to another bucket. Bucket mainBucket, filesBucket; try { InputStream is = bucket.getInputStream(); mainBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); filesBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); OutputStream mainOS = new BufferedOutputStream(mainBucket.getOutputStream()); OutputStream filesOS = new BufferedOutputStream(filesBucket.getOutputStream()); // OutputStream mainOS = new BufferedOutputStream(new FileOutputStream("main.tmp")); // OutputStream filesOS = new BufferedOutputStream(new FileOutputStream("files.tmp")); BufferedInputStream bis = new BufferedInputStream(is); byte greaterThan = ">".getBytes("UTF-8")[0]; byte[] filesPrefix = "<files ".getBytes("UTF-8"); byte[] filesPrefixAlt = "<files>".getBytes("UTF-8"); assert (filesPrefix.length == filesPrefixAlt.length); byte[] filesEnd = "</files>".getBytes("UTF-8"); final int MODE_SEEKING_DECLARATION = 1; final int MODE_SEEKING_FILES = 2; final int MODE_COPYING_FILES = 3; final int MODE_COPYING_REST = 4; int mode = MODE_SEEKING_DECLARATION; int b; byte[] declarationBuf = new byte[100]; int declarationPtr = 0; byte[] prefixBuffer = new byte[filesPrefix.length]; int prefixPtr = 0; byte[] endBuffer = new byte[filesEnd.length]; int endPtr = 0; while ((b = bis.read()) != -1) { if (mode == MODE_SEEKING_DECLARATION) { if (declarationPtr == declarationBuf.length) throw new TaskAbortException("Could not split up XML: declaration too long", null); declarationBuf[declarationPtr++] = (byte) b; mainOS.write(b); filesOS.write(b); if (b == greaterThan) { mode = MODE_SEEKING_FILES; } } else if (mode == MODE_SEEKING_FILES) { if (prefixPtr != prefixBuffer.length) { prefixBuffer[prefixPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesPrefix, prefixBuffer) || Fields.byteArrayEqual(filesPrefixAlt, prefixBuffer)) { mode = MODE_COPYING_FILES; filesOS.write(prefixBuffer); filesOS.write(b); } else { mainOS.write(prefixBuffer[0]); System.arraycopy(prefixBuffer, 1, prefixBuffer, 0, prefixBuffer.length - 1); prefixBuffer[prefixBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_FILES) { if (endPtr != endBuffer.length) { endBuffer[endPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesEnd, endBuffer)) { mode = MODE_COPYING_REST; filesOS.write(endBuffer); mainOS.write(b); } else { filesOS.write(endBuffer[0]); System.arraycopy(endBuffer, 1, endBuffer, 0, endBuffer.length - 1); endBuffer[endBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_REST) { mainOS.write(b); } } if (mode != MODE_COPYING_REST) throw new TaskAbortException("Could not split up XML: Last mode was " + mode, null); mainOS.close(); filesOS.close(); } catch (IOException e) { throw new TaskAbortException("Could not split XML: ", e); } if (logMINOR) Logger.minor(this, "Finished splitting XML"); try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); SAXParser saxParser = factory.newSAXParser(); // Stage 2: Parse the first bucket, find the keyword we want, find the file id's. InputStream is = mainBucket.getInputStream(); StageTwoHandler stageTwoHandler = new StageTwoHandler(); saxParser.parse(is, stageTwoHandler); if (logMINOR) Logger.minor(this, "Finished stage two XML parse"); is.close(); // Stage 3: Parse the second bucket, extract the <file>'s for the specific ID's. is = filesBucket.getInputStream(); StageThreeHandler stageThreeHandler = new StageThreeHandler(); saxParser.parse(is, stageThreeHandler); if (logMINOR) Logger.minor(this, "Finished stage three XML parse"); is.close(); Logger.minor(this, "parsing finished " + parsingSubindex.toString()); for (FindRequest findRequest : parsingSubindex) { findRequest.setFinished(); } parsingSubindex.clear(); } catch (Exception err) { Logger.error(this, "Error parsing " + filename, err); throw new TaskAbortException("Could not parse XML: ", err); } } }