private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException { assertEquals("total results", expectedCount, hits.length); for (int i = 0; i < hits.length; i++) { if (i < 10 || (i > 94 && i < 105)) { Document d = searcher.doc(hits[i].doc); assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD)); } } }
private void printHits(PrintWriter out, ScoreDoc[] hits, IndexSearcher searcher) throws IOException { out.println(hits.length + " total results\n"); for (int i = 0; i < hits.length; i++) { if (i < 10 || (i > 94 && i < 105)) { Document d = searcher.doc(hits[i].doc); out.println(i + " " + d.get(ID_FIELD)); } } }
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception { Directory directory = newDirectory(); Analyzer analyzer = new MockAnalyzer(random); IndexWriterConfig conf = newIndexWriterConfig(analyzer); final MergePolicy mp = conf.getMergePolicy(); mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0); IndexWriter writer = new IndexWriter(directory, conf); if (VERBOSE) { System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS); } for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES)); d.add(newTextField(ID_FIELD, Integer.toString(j), Field.Store.YES)); writer.addDocument(d); } writer.close(); // try a search without OR IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = newSearcher(reader); Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)); out.println("Query: " + query.toString(PRIORITY_FIELD)); if (VERBOSE) { System.out.println("TEST: search query=" + query); } final Sort sort = new Sort(SortField.FIELD_SCORE, new SortField(ID_FIELD, SortField.Type.INT)); ScoreDoc[] hits = searcher.search(query, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); // try a new search with OR searcher = newSearcher(reader); hits = null; BooleanQuery booleanQuery = new BooleanQuery(); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD); booleanQuery.add( new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD); out.println("Query: " + booleanQuery.toString(PRIORITY_FIELD)); hits = searcher.search(booleanQuery, null, MAX_DOCS, sort).scoreDocs; printHits(out, hits, searcher); checkHits(hits, MAX_DOCS, searcher); reader.close(); directory.close(); }
// private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException { private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException { int readerMax = reader.maxDoc(); DocScore[] newToOld = new DocScore[readerMax]; // use site, an indexed, un-tokenized field to get boost // byte[] boosts = reader.norms("site"); TODO MC /* TODO MC */ Document docMeta; Pattern includes = Pattern.compile("\\|"); String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, ""); String includeExtensions[] = includes.split(value); Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>(); for (int i = 0; i < includeExtensions.length; i++) { validExtensions.put(includeExtensions[i], true); System.out.println("extension boosted " + includeExtensions[i]); } /* TODO MC */ for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) { float score; if (reader.isDeleted(oldDoc)) { // score = 0.0f; score = -1f; // TODO MC } else { // score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC /* TODO MC */ docMeta = searcher.doc(oldDoc); if (validExtensions.get(docMeta.get("subType")) == null) { // searched extensions will have higher scores score = -0.5f; } else { score = Integer.parseInt(docMeta.get("inlinks")); /* if (score==0) { score=0.001f; // TODO MC - to not erase } */ } /* TODO MC */ // System.out.println("Score for old document "+oldDoc+" is "+score+" and type // "+docMeta.get("subType")); // TODO MC debug remove } DocScore docScore = new DocScore(); docScore.doc = oldDoc; docScore.score = score; newToOld[oldDoc] = docScore; } System.out.println("Sorting " + newToOld.length + " documents."); Arrays.sort(newToOld); // HeapSorter.sort(newToOld); // TODO MC - due to the lack of space /* TODO MC int[] oldToNew = new int[readerMax]; for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC oldToNew[docScore.oldDoc] = newDoc; // TODO MC } */ /* TODO MC * for (int newDoc = 0; newDoc < readerMax; newDoc++) { DocScore docScore = newToOld[newDoc]; System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove } * TODO MC */ // return oldToNew; TODO MC return newToOld; // TODO MC }