public synchronized void run() { try { while (waitingOnSubindex.size() > 0) { if (fetchStatus == FetchStatus.UNFETCHED || fetchStatus == FetchStatus.FAILED) { try { fetchStatus = FetchStatus.FETCHING; // TODO tidy the fetch stuff bucket = Util.fetchBucket(indexuri + filename, hlsc); fetchStatus = FetchStatus.FETCHED; } catch (Exception e) { // TODO tidy the exceptions // java.net.MalformedURLException // freenet.client.FetchException String msg = indexuri + filename + " could not be opened: " + e.toString(); Logger.error(this, msg, e); throw new TaskAbortException(msg, e); } } else if (fetchStatus == FetchStatus.FETCHED) { parseSubIndex(); } else { break; } } } catch (TaskAbortException e) { fetchStatus = FetchStatus.FAILED; this.error = e; Logger.error(this, "Dropping from subindex run loop", e); for (FindRequest r : parsingSubindex) r.setError(e); for (FindRequest r : waitingOnSubindex) r.setError(e); } }
/** * Add a request to the List of requests looking in this subindex * * @param request */ void addRequest(FindRequest request) { if (fetchStatus == FetchStatus.FETCHED) request.setStage(FindRequest.Stages.PARSE); else request.setStage(FindRequest.Stages.FETCHSUBINDEX); synchronized (waitingOnSubindex) { waitingOnSubindex.add(request); } }
/** Called on failed/canceled fetch */ public void onFailure(FetchException e, ClientGetter state, ObjectContainer container) { fetchFailures++; if (fetchFailures < 20 && e.newURI != null) { try { if (logMINOR) Logger.minor(this, "Trying new URI: " + e.newURI); indexuri = e.newURI.setMetaString(new String[] {""}).toString(); if (origEdition != -1 && e.newURI.getEdition() < origEdition) { Logger.error( this, "Redirect to earlier edition?!?!?!?: " + e.newURI.getEdition() + " from " + origEdition); } else { if (logMINOR) Logger.minor(this, "Trying new URI: " + e.newURI + " : " + indexuri); startFetch(true); if (updateHook != null) updateHook.update(updateContext, indexuri); return; } } catch (FetchException ex) { e = ex; } catch (MalformedURLException ex) { Logger.error(this, "what?", ex); } } fetchStatus = FetchStatus.FAILED; for (FindRequest findRequest : waitingOnMainIndex) { findRequest.setError( new TaskAbortException("Failure fetching rootindex of " + toString(), e)); } Logger.error(this, "Fetch failed on " + toString() + " -- state = " + state, e); }
@Override public void startElement( String nameSpaceURI, String localName, String rawName, Attributes attrs) throws SAXException { if (requests.size() == 0 && (wordMatches == null || wordMatches.size() == 0)) return; if (rawName == null) { rawName = localName; } String elt_name = rawName; if (elt_name.equals("keywords")) processingWord = true; /* * looks for the word in the given subindex file if the word is found then the parser * fetches the corresponding fileElements */ if (elt_name.equals("word")) { try { fileMatches.clear(); wordMatches = null; match = attrs.getValue("v"); if (requests != null) { for (Iterator<FindRequest> it = requests.iterator(); it.hasNext(); ) { FindRequest r = it.next(); if (match.equals(r.getSubject())) { if (wordMatches == null) wordMatches = new ArrayList<FindRequest>(); wordMatches.add(r); it.remove(); Logger.minor(this, "found word match " + wordMatches); } } if (wordMatches != null) { if (attrs.getValue("fileCount") != null) inWordFileCount = Integer.parseInt(attrs.getValue("fileCount")); thisWordMatch = new WordMatch(new ArrayList<FindRequest>(wordMatches), inWordFileCount); } } } catch (Exception e) { throw new SAXException(e); } } if (elt_name.equals("file")) { if (processingWord == true && wordMatches != null) { try { id = attrs.getValue("id"); characters = new StringBuilder(); } catch (Exception e) { Logger.error(this, "Index format may be outdated " + e.toString(), e); } } } }
/** * Puts request into the dependency List of either the main index or the subindex depending on * whether the main index is availiable * * @param request * @throws freenet.client.FetchException * @throws java.net.MalformedURLException */ private synchronized void setdependencies(FindRequest request) throws FetchException, MalformedURLException { // Logger.normal(this, "setting dependencies for "+request+" on "+this.toString()); if (fetchStatus != FetchStatus.FETCHED) { waitingOnMainIndex.add(request); request.setStage(FindRequest.Stages.FETCHROOT); startFetch(false); } else { request.setStage(FindRequest.Stages.FETCHSUBINDEX); SubIndex subindex = getSubIndex(request.getSubject()); subindex.addRequest(request); // Logger.normal(this, "STarting "+getSubIndex(request.getSubject())+" to look for // "+request.getSubject()); if (executor != null) executor.execute(subindex, "Subindex:" + subindex.getFileName()); else (new Thread(subindex, "Subindex:" + subindex.getFileName())).start(); } }
/** * process the bucket containing the main index file * * @param bucket */ private void processRequests(Bucket bucket) { try { InputStream is = bucket.getInputStream(); parse(is); is.close(); fetchStatus = FetchStatus.FETCHED; for (FindRequest req : waitingOnMainIndex) setdependencies(req); waitingOnMainIndex.clear(); } catch (Exception e) { fetchStatus = FetchStatus.FAILED; for (FindRequest findRequest : waitingOnMainIndex) { findRequest.setError(new TaskAbortException("Failure parsing " + toString(), e)); } Logger.error(this, indexuri, e); } finally { bucket.free(); } }
@SuppressWarnings({"unchecked", "rawtypes"}) private <T extends Request> RestRequest clientGeneratedRequest( Class<T> requestClass, ResourceMethod method, DataMap entityBody, RestClient.ContentType contentType, List<RestClient.AcceptType> acceptTypes) throws URISyntaxException { // massive setup... Client mockClient = EasyMock.createMock(Client.class); @SuppressWarnings({"rawtypes"}) Request<?> mockRequest = EasyMock.createMock(requestClass); RecordTemplate mockRecordTemplate = EasyMock.createMock(RecordTemplate.class); @SuppressWarnings({"rawtypes"}) RestResponseDecoder mockResponseDecoder = EasyMock.createMock(RestResponseDecoder.class); setCommonExpectations(mockRequest, method, mockRecordTemplate, mockResponseDecoder); if (method == ResourceMethod.BATCH_PARTIAL_UPDATE || method == ResourceMethod.BATCH_UPDATE) { buildInputForBatchPathAndUpdate(mockRequest); } else { EasyMock.expect(mockRequest.getInputRecord()).andReturn(mockRecordTemplate).times(2); EasyMock.expect(mockRequest.getResourceSpec()).andReturn(new ResourceSpecImpl()).once(); } if (method == ResourceMethod.GET) { EasyMock.expect(((GetRequest) mockRequest).getObjectId()).andReturn(null).once(); EasyMock.expect(((GetRequest) mockRequest).getResourceSpec()) .andReturn(new ResourceSpecImpl()) .once(); EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else if (method == ResourceMethod.BATCH_GET) { EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else if (method == ResourceMethod.ACTION) { EasyMock.expect(((ActionRequest) mockRequest).getId()).andReturn(null); EasyMock.expect(mockRequest.getMethodName()).andReturn("testAction"); } else if (method == ResourceMethod.FINDER) { EasyMock.expect(((FindRequest) mockRequest).getAssocKey()).andReturn(new CompoundKey()); EasyMock.expect(mockRequest.getMethodName()).andReturn("testFinder"); } else if (method == ResourceMethod.GET_ALL) { EasyMock.expect(((GetAllRequest) mockRequest).getAssocKey()).andReturn(new CompoundKey()); EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else if (method == ResourceMethod.UPDATE) { EasyMock.expect(((UpdateRequest) mockRequest).getResourceSpec()) .andReturn(new ResourceSpecImpl()) .once(); EasyMock.expect(((UpdateRequest) mockRequest).getId()).andReturn(null); EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else if (method == ResourceMethod.PARTIAL_UPDATE) { EasyMock.expect(mockRequest.getResourceSpec()).andReturn(new ResourceSpecImpl()).times(2); EasyMock.expect(((PartialUpdateRequest) mockRequest).getId()).andReturn(null); EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else if (method == ResourceMethod.DELETE) { EasyMock.expect(((DeleteRequest) mockRequest).getResourceSpec()) .andReturn(new ResourceSpecImpl()) .once(); EasyMock.expect(((DeleteRequest) mockRequest).getId()).andReturn(null); EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } else { EasyMock.expect(mockRequest.getMethodName()).andReturn(null); } EasyMock.expect(mockRecordTemplate.data()).andReturn(entityBody).once(); Capture<RestRequest> restRequestCapture = new Capture<RestRequest>(); EasyMock.expect(mockClient.getMetadata(new URI(HOST + SERVICE_NAME))) .andReturn(Collections.<String, Object>emptyMap()) .once(); mockClient.restRequest( EasyMock.capture(restRequestCapture), (RequestContext) EasyMock.anyObject(), (Callback<RestResponse>) EasyMock.anyObject()); EasyMock.expectLastCall().once(); EasyMock.replay(mockClient, mockRequest, mockRecordTemplate); // do work! RestClient restClient; if (acceptTypes == null) { restClient = new RestClient(mockClient, HOST); } else if (contentType == null) { restClient = new RestClient(mockClient, HOST, acceptTypes); } else { restClient = new RestClient(mockClient, HOST, contentType, acceptTypes); } restClient.sendRequest(mockRequest); return restRequestCapture.getValue(); }
@Override public void startElement( String nameSpaceURI, String localName, String rawName, Attributes attrs) throws SAXException { if (idToFileMatches.isEmpty()) return; if (rawName == null) { rawName = localName; } String elt_name = rawName; if (elt_name.equals("files")) { String fileCount = attrs.getValue("", "totalFileCount"); if (fileCount != null) totalFileCount = Integer.parseInt(fileCount); Logger.minor(this, "totalfilecount = " + totalFileCount); } if (elt_name.equals("file")) { try { String id = attrs.getValue("id"); ArrayList<FileMatch> matches = idToFileMatches.get(id); if (matches != null) { for (FileMatch match : matches) { String key = attrs.getValue("key"); int l = attrs.getLength(); String title = null; int wordCount = -1; if (l >= 3) { try { title = attrs.getValue("title"); } catch (Exception e) { Logger.error(this, "Index Format not compatible " + e.toString(), e); } try { String wordCountString = attrs.getValue("wordCount"); if (wordCountString != null) { wordCount = Integer.parseInt(attrs.getValue("wordCount")); } } catch (Exception e) { // Logger.minor(this, "No wordcount found " + e.toString(), e); } } for (FindRequest req : match.word.searches) { Set<TermPageEntry> result = req.getUnfinishedResult(); float relevance = 0; if (logDEBUG) Logger.debug( this, "termcount " + (match.termpositions == null ? 0 : match.termpositions.size()) + " filewordcount = " + wordCount); if (match.termpositions != null && match.termpositions.size() > 0 && wordCount > 0) { relevance = (float) (match.termpositions.size() / (float) wordCount); if (totalFileCount > 0 && match.word.inWordFileCount > 0) relevance *= Math.log((float) totalFileCount / (float) match.word.inWordFileCount); if (logDEBUG) Logger.debug( this, "Set relevance of " + title + " to " + relevance + " - " + key); } TermPageEntry pageEntry = new TermPageEntry( req.getSubject(), relevance, new FreenetURI(key), title, match.termpositions); result.add(pageEntry); // Logger.minor(this, "added "+inFileURI+ " to "+ match); } } } } catch (Exception e) { Logger.error( this, "File id and key could not be retrieved. May be due to format clash", e); } } }
StageTwoHandler() { this.requests = new ArrayList(parsingSubindex); for (FindRequest r : parsingSubindex) { r.setResult(new HashSet<TermPageEntry>()); } }
public void parseSubIndex() throws TaskAbortException { synchronized (parsingSubindex) { // Transfer all requests waiting on this subindex to the parsing list synchronized (waitingOnSubindex) { parsingSubindex.addAll(waitingOnSubindex); waitingOnSubindex.removeAll(parsingSubindex); } // Set status of all those about to be parsed to PARSE for (FindRequest r : parsingSubindex) r.setStage(FindRequest.Stages.PARSE); // Multi-stage parse to minimise memory usage. // Stage 1: Extract the declaration (first tag), copy everything before "<files " to one // bucket, plus everything after "</files>". // Copy the declaration, plus everything between the two (inclusive) to another bucket. Bucket mainBucket, filesBucket; try { InputStream is = bucket.getInputStream(); mainBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); filesBucket = pr.getNode().clientCore.tempBucketFactory.makeBucket(-1); OutputStream mainOS = new BufferedOutputStream(mainBucket.getOutputStream()); OutputStream filesOS = new BufferedOutputStream(filesBucket.getOutputStream()); // OutputStream mainOS = new BufferedOutputStream(new FileOutputStream("main.tmp")); // OutputStream filesOS = new BufferedOutputStream(new FileOutputStream("files.tmp")); BufferedInputStream bis = new BufferedInputStream(is); byte greaterThan = ">".getBytes("UTF-8")[0]; byte[] filesPrefix = "<files ".getBytes("UTF-8"); byte[] filesPrefixAlt = "<files>".getBytes("UTF-8"); assert (filesPrefix.length == filesPrefixAlt.length); byte[] filesEnd = "</files>".getBytes("UTF-8"); final int MODE_SEEKING_DECLARATION = 1; final int MODE_SEEKING_FILES = 2; final int MODE_COPYING_FILES = 3; final int MODE_COPYING_REST = 4; int mode = MODE_SEEKING_DECLARATION; int b; byte[] declarationBuf = new byte[100]; int declarationPtr = 0; byte[] prefixBuffer = new byte[filesPrefix.length]; int prefixPtr = 0; byte[] endBuffer = new byte[filesEnd.length]; int endPtr = 0; while ((b = bis.read()) != -1) { if (mode == MODE_SEEKING_DECLARATION) { if (declarationPtr == declarationBuf.length) throw new TaskAbortException("Could not split up XML: declaration too long", null); declarationBuf[declarationPtr++] = (byte) b; mainOS.write(b); filesOS.write(b); if (b == greaterThan) { mode = MODE_SEEKING_FILES; } } else if (mode == MODE_SEEKING_FILES) { if (prefixPtr != prefixBuffer.length) { prefixBuffer[prefixPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesPrefix, prefixBuffer) || Fields.byteArrayEqual(filesPrefixAlt, prefixBuffer)) { mode = MODE_COPYING_FILES; filesOS.write(prefixBuffer); filesOS.write(b); } else { mainOS.write(prefixBuffer[0]); System.arraycopy(prefixBuffer, 1, prefixBuffer, 0, prefixBuffer.length - 1); prefixBuffer[prefixBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_FILES) { if (endPtr != endBuffer.length) { endBuffer[endPtr++] = (byte) b; } else { if (Fields.byteArrayEqual(filesEnd, endBuffer)) { mode = MODE_COPYING_REST; filesOS.write(endBuffer); mainOS.write(b); } else { filesOS.write(endBuffer[0]); System.arraycopy(endBuffer, 1, endBuffer, 0, endBuffer.length - 1); endBuffer[endBuffer.length - 1] = (byte) b; } } } else if (mode == MODE_COPYING_REST) { mainOS.write(b); } } if (mode != MODE_COPYING_REST) throw new TaskAbortException("Could not split up XML: Last mode was " + mode, null); mainOS.close(); filesOS.close(); } catch (IOException e) { throw new TaskAbortException("Could not split XML: ", e); } if (logMINOR) Logger.minor(this, "Finished splitting XML"); try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); SAXParser saxParser = factory.newSAXParser(); // Stage 2: Parse the first bucket, find the keyword we want, find the file id's. InputStream is = mainBucket.getInputStream(); StageTwoHandler stageTwoHandler = new StageTwoHandler(); saxParser.parse(is, stageTwoHandler); if (logMINOR) Logger.minor(this, "Finished stage two XML parse"); is.close(); // Stage 3: Parse the second bucket, extract the <file>'s for the specific ID's. is = filesBucket.getInputStream(); StageThreeHandler stageThreeHandler = new StageThreeHandler(); saxParser.parse(is, stageThreeHandler); if (logMINOR) Logger.minor(this, "Finished stage three XML parse"); is.close(); Logger.minor(this, "parsing finished " + parsingSubindex.toString()); for (FindRequest findRequest : parsingSubindex) { findRequest.setFinished(); } parsingSubindex.clear(); } catch (Exception err) { Logger.error(this, "Error parsing " + filename, err); throw new TaskAbortException("Could not parse XML: ", err); } } }
/** Hears an event and updates those Requests waiting on this subindex fetch */ public void receive( ClientEvent ce, ObjectContainer maybeContainer, ClientContext context) { FindRequest.updateWithEvent(waitingOnSubindex, ce); // Logger.normal(this, "Updated with event : "+ce.getDescription()); }