Example #1
0
  /**
   * Returns a number of random songs.
   *
   * @param criteria Search criteria.
   * @return List of random songs.
   */
  public List<MediaFile> getRandomSongs(RandomSearchCriteria criteria) {
    List<MediaFile> result = new ArrayList<MediaFile>();

    IndexReader reader = null;
    try {
      reader = createIndexReader(SONG);
      Searcher searcher = new IndexSearcher(reader);

      BooleanQuery query = new BooleanQuery();
      query.add(
          new TermQuery(new Term(FIELD_MEDIA_TYPE, MediaFile.MediaType.MUSIC.name().toLowerCase())),
          BooleanClause.Occur.MUST);
      if (criteria.getGenre() != null) {
        String genre = normalizeGenre(criteria.getGenre());
        query.add(new TermQuery(new Term(FIELD_GENRE, genre)), BooleanClause.Occur.MUST);
      }
      if (criteria.getFromYear() != null || criteria.getToYear() != null) {
        NumericRangeQuery<Integer> rangeQuery =
            NumericRangeQuery.newIntRange(
                FIELD_YEAR, criteria.getFromYear(), criteria.getToYear(), true, true);
        query.add(rangeQuery, BooleanClause.Occur.MUST);
      }

      List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
      for (MusicFolder musicFolder : criteria.getMusicFolders()) {
        musicFolderQueries.add(
            new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath())));
      }
      query.add(
          new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])),
          BooleanClause.Occur.MUST);

      TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE);
      List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs);
      Random random = new Random(System.currentTimeMillis());

      while (!scoreDocs.isEmpty() && result.size() < criteria.getCount()) {
        int index = random.nextInt(scoreDocs.size());
        Document doc = searcher.doc(scoreDocs.remove(index).doc);
        int id = Integer.valueOf(doc.get(FIELD_ID));
        try {
          addIfNotNull(mediaFileService.getMediaFile(id), result);
        } catch (Exception x) {
          LOG.warn("Failed to get media file " + id);
        }
      }

    } catch (Throwable x) {
      LOG.error("Failed to search or random songs.", x);
    } finally {
      FileUtil.closeQuietly(reader);
    }
    return result;
  }
 // Make sure the documents returned by the search match the expected list
 // Copied from TestSort.java
 private void assertMatches(Searcher searcher, Query query, Sort sort, String expectedResult)
     throws IOException {
   ScoreDoc[] result = searcher.search(query, null, 1000, sort).scoreDocs;
   StringBuilder buff = new StringBuilder(10);
   int n = result.length;
   for (int i = 0; i < n; ++i) {
     Document doc = searcher.doc(result[i].doc);
     String[] v = doc.getValues("tracer");
     for (int j = 0; j < v.length; ++j) {
       buff.append(v[j]);
     }
   }
   assertEquals(expectedResult, buff.toString());
 }
Example #3
0
  public HitDetails getDetails(Hit hit) throws IOException {

    Document doc = luceneSearcher.doc(Integer.valueOf(hit.getUniqueKey()));

    List docFields = doc.getFields();
    String[] fields = new String[docFields.size()];
    String[] values = new String[docFields.size()];
    for (int i = 0; i < docFields.size(); i++) {
      Field field = (Field) docFields.get(i);
      fields[i] = field.name();
      values[i] = field.stringValue();
    }

    return new HitDetails(fields, values);
  }
Example #4
0
  private Document findByKey(IndexReader reader, Field keyField)
      throws ParseException, IOException {
    Searcher searcher = new IndexSearcher(reader);
    QueryParser queryParser = new QueryParser(luceneVersion, keyField.name(), queryAnalyzer);
    queryParser.setDefaultOperator(QueryParser.Operator.AND);

    String queryString = keyField.name() + ":" + keyField.stringValue();
    Query query = queryParser.parse(queryString);

    TopDocs docs = searcher.search(query, 10000);
    ScoreDoc[] scoreDocs = docs.scoreDocs;
    if (scoreDocs.length != 1) {
      return null;
    }

    ScoreDoc doc = scoreDocs[0];
    return searcher.doc(doc.doc);
  }
Example #5
0
  public void run() {

    try {

      String searchQuery =
          (new BufferedReader(new InputStreamReader(searchSocket.getInputStream())))
              .readLine()
              .trim();

      IndexReader reader = writer.getReader();
      Searcher searcher = new IndexSearcher(reader);

      QueryParser indexParser = new QueryParser(Version.LUCENE_30, "data", analyzer);

      SortField hitSortField = new SortField("date", SortField.LONG);
      Sort hitSort = new Sort(hitSortField);

      TopFieldDocs hits = searcher.search(indexParser.parse(searchQuery), null, 1000, hitSort);

      PrintWriter searchReply = new PrintWriter(searchSocket.getOutputStream(), true);

      searchReply.println(hits.totalHits + " Hits for " + searchQuery);

      for (int i = 0; i < hits.totalHits; i++) {
        Document document = searcher.doc(hits.scoreDocs[i].doc);

        String host = document.get("hostname");
        String date = document.get("date");
        String data = document.get("data");

        searchReply.print("host: " + host + ", date: " + date + ", data: " + data + "\n\n");
      }

      searchReply.close();
      searcher.close();
      reader.close();
      searchSocket.close();
    } catch (Exception ex) {
      System.out.print("Exception: " + ex + "\n");
    }
  }
Example #6
0
  /**
   * Returns a number of random albums, using ID3 tag.
   *
   * @param count Number of albums to return.
   * @param musicFolders Only return albums from these folders.
   * @return List of random albums.
   */
  public List<Album> getRandomAlbumsId3(int count, List<MusicFolder> musicFolders) {
    List<Album> result = new ArrayList<Album>();

    IndexReader reader = null;
    try {
      reader = createIndexReader(ALBUM_ID3);
      Searcher searcher = new IndexSearcher(reader);

      List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
      for (MusicFolder musicFolder : musicFolders) {
        musicFolderQueries.add(
            new SpanTermQuery(
                new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId()))));
      }
      Query query =
          new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()]));
      TopDocs topDocs = searcher.search(query, null, Integer.MAX_VALUE);
      List<ScoreDoc> scoreDocs = Lists.newArrayList(topDocs.scoreDocs);
      Random random = new Random(System.currentTimeMillis());

      while (!scoreDocs.isEmpty() && result.size() < count) {
        int index = random.nextInt(scoreDocs.size());
        Document doc = searcher.doc(scoreDocs.remove(index).doc);
        int id = Integer.valueOf(doc.get(FIELD_ID));
        try {
          addIfNotNull(albumDao.getAlbum(id), result);
        } catch (Exception x) {
          LOG.warn("Failed to get album file " + id, x);
        }
      }

    } catch (Throwable x) {
      LOG.error("Failed to search for random albums.", x);
    } finally {
      FileUtil.closeQuietly(reader);
    }
    return result;
  }
 /**
  * Do the search.
  *
  * @param conn the database connection
  * @param text the query
  * @param limit the limit
  * @param offset the offset
  * @param data whether the raw data should be returned
  * @return the result set
  */
 protected static ResultSet search(
     Connection conn, String text, int limit, int offset, boolean data) throws SQLException {
   SimpleResultSet result = createResultSet(data);
   if (conn.getMetaData().getURL().startsWith("jdbc:columnlist:")) {
     // this is just to query the result set columns
     return result;
   }
   if (text == null || text.trim().length() == 0) {
     return result;
   }
   try {
     IndexAccess access = getIndexAccess(conn);
     /*## LUCENE2 ##
     access.modifier.flush();
     String path = getIndexPath(conn);
     IndexReader reader = IndexReader.open(path);
     Analyzer analyzer = new StandardAnalyzer();
     Searcher searcher = new IndexSearcher(reader);
     QueryParser parser = new QueryParser(LUCENE_FIELD_DATA, analyzer);
     Query query = parser.parse(text);
     Hits hits = searcher.search(query);
     int max = hits.length();
     if (limit == 0) {
         limit = max;
     }
     for (int i = 0; i < limit && i + offset < max; i++) {
         Document doc = hits.doc(i + offset);
         float score = hits.score(i + offset);
     //*/
     // ## LUCENE3 ##
     // take a reference as the searcher may change
     Searcher searcher = access.searcher;
     // reuse the same analyzer; it's thread-safe;
     // also allows subclasses to control the analyzer used.
     Analyzer analyzer = access.writer.getAnalyzer();
     QueryParser parser = new QueryParser(Version.LUCENE_30, LUCENE_FIELD_DATA, analyzer);
     Query query = parser.parse(text);
     // Lucene 3 insists on a hard limit and will not provide
     // a total hits value. Take at least 100 which is
     // an optimal limit for Lucene as any more
     // will trigger writing results to disk.
     int maxResults = (limit == 0 ? 100 : limit) + offset;
     TopDocs docs = searcher.search(query, maxResults);
     if (limit == 0) {
       limit = docs.totalHits;
     }
     for (int i = 0, len = docs.scoreDocs.length;
         i < limit && i + offset < docs.totalHits && i + offset < len;
         i++) {
       ScoreDoc sd = docs.scoreDocs[i + offset];
       Document doc = searcher.doc(sd.doc);
       float score = sd.score;
       // */
       String q = doc.get(LUCENE_FIELD_QUERY);
       if (data) {
         int idx = q.indexOf(" WHERE ");
         JdbcConnection c = (JdbcConnection) conn;
         Session session = (Session) c.getSession();
         Parser p = new Parser(session);
         String tab = q.substring(0, idx);
         ExpressionColumn expr = (ExpressionColumn) p.parseExpression(tab);
         String schemaName = expr.getOriginalTableAliasName();
         String tableName = expr.getColumnName();
         q = q.substring(idx + " WHERE ".length());
         Object[][] columnData = parseKey(conn, q);
         result.addRow(schemaName, tableName, columnData[0], columnData[1], score);
       } else {
         result.addRow(q, score);
       }
     }
     /*## LUCENE2 ##
     // TODO keep it open if possible
     reader.close();
     //*/
   } catch (Exception e) {
     throw convertException(e);
   }
   return result;
 }
Example #8
0
  public SearchResult search(
      SearchCriteria criteria, List<MusicFolder> musicFolders, IndexType indexType) {
    SearchResult result = new SearchResult();
    int offset = criteria.getOffset();
    int count = criteria.getCount();
    result.setOffset(offset);

    IndexReader reader = null;
    try {
      reader = createIndexReader(indexType);
      Searcher searcher = new IndexSearcher(reader);
      Analyzer analyzer = new SubsonicAnalyzer();

      MultiFieldQueryParser queryParser =
          new MultiFieldQueryParser(
              LUCENE_VERSION, indexType.getFields(), analyzer, indexType.getBoosts());

      BooleanQuery query = new BooleanQuery();
      query.add(queryParser.parse(analyzeQuery(criteria.getQuery())), BooleanClause.Occur.MUST);

      List<SpanTermQuery> musicFolderQueries = new ArrayList<SpanTermQuery>();
      for (MusicFolder musicFolder : musicFolders) {
        if (indexType == ALBUM_ID3 || indexType == ARTIST_ID3) {
          musicFolderQueries.add(
              new SpanTermQuery(
                  new Term(FIELD_FOLDER_ID, NumericUtils.intToPrefixCoded(musicFolder.getId()))));
        } else {
          musicFolderQueries.add(
              new SpanTermQuery(new Term(FIELD_FOLDER, musicFolder.getPath().getPath())));
        }
      }
      query.add(
          new SpanOrQuery(musicFolderQueries.toArray(new SpanQuery[musicFolderQueries.size()])),
          BooleanClause.Occur.MUST);

      TopDocs topDocs = searcher.search(query, null, offset + count);
      result.setTotalHits(topDocs.totalHits);

      int start = Math.min(offset, topDocs.totalHits);
      int end = Math.min(start + count, topDocs.totalHits);
      for (int i = start; i < end; i++) {
        Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
        switch (indexType) {
          case SONG:
          case ARTIST:
          case ALBUM:
            MediaFile mediaFile = mediaFileService.getMediaFile(Integer.valueOf(doc.get(FIELD_ID)));
            addIfNotNull(mediaFile, result.getMediaFiles());
            break;
          case ARTIST_ID3:
            Artist artist = artistDao.getArtist(Integer.valueOf(doc.get(FIELD_ID)));
            addIfNotNull(artist, result.getArtists());
            break;
          case ALBUM_ID3:
            Album album = albumDao.getAlbum(Integer.valueOf(doc.get(FIELD_ID)));
            addIfNotNull(album, result.getAlbums());
            break;
          default:
            break;
        }
      }

    } catch (Throwable x) {
      LOG.error("Failed to execute Lucene search.", x);
    } finally {
      FileUtil.closeQuietly(reader);
    }
    return result;
  }
Example #9
0
  /**
   * Index the fileset.
   *
   * @exception IOException if Lucene I/O exception TODO: refactor!!!!!
   */
  private void indexDocs() throws IOException {
    Date start = new Date();

    boolean create = overwrite;
    // If the index directory doesn't exist,
    // create it and force create mode
    if (indexDir.mkdirs() && !overwrite) {
      create = true;
    }

    FSDirectory dir = FSDirectory.open(indexDir);
    try {
      Searcher searcher = null;
      boolean checkLastModified = false;
      if (!create) {
        try {
          searcher = new IndexSearcher(dir, true);
          checkLastModified = true;
        } catch (IOException ioe) {
          log("IOException: " + ioe.getMessage());
          // Empty - ignore, which indicates to index all
          // documents
        }
      }

      log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);

      IndexWriterConfig conf =
          new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
              .setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setUseCompoundFile(useCompoundIndex);
      lmp.setMergeFactor(mergeFactor);
      IndexWriter writer = new IndexWriter(dir, conf);
      int totalFiles = 0;
      int totalIndexed = 0;
      int totalIgnored = 0;
      try {

        for (int i = 0; i < rcs.size(); i++) {
          ResourceCollection rc = rcs.elementAt(i);
          if (rc.isFilesystemOnly()) {
            Iterator resources = rc.iterator();
            while (resources.hasNext()) {
              Resource r = (Resource) resources.next();
              if (!r.isExists() || !(r instanceof FileResource)) {
                continue;
              }

              totalFiles++;

              File file = ((FileResource) r).getFile();

              if (!file.exists() || !file.canRead()) {
                throw new BuildException(
                    "File \"" + file.getAbsolutePath() + "\" does not exist or is not readable.");
              }

              boolean indexIt = true;

              if (checkLastModified) {
                Term pathTerm = new Term("path", file.getPath());
                TermQuery query = new TermQuery(pathTerm);
                ScoreDoc[] hits = searcher.search(query, null, 1).scoreDocs;

                // if document is found, compare the
                // indexed last modified time with the
                // current file
                // - don't index if up to date
                if (hits.length > 0) {
                  Document doc = searcher.doc(hits[0].doc);
                  String indexModified = doc.get("modified").trim();
                  if (indexModified != null) {
                    long lastModified = 0;
                    try {
                      lastModified = DateTools.stringToTime(indexModified);
                    } catch (ParseException e) {
                      // if modified time is not parsable, skip
                    }
                    if (lastModified == file.lastModified()) {
                      // TODO: remove existing document
                      indexIt = false;
                    }
                  }
                }
              }

              if (indexIt) {
                try {
                  log("Indexing " + file.getPath(), Project.MSG_VERBOSE);
                  Document doc = handler.getDocument(file);

                  if (doc == null) {
                    totalIgnored++;
                  } else {
                    // Add the path of the file as a field named "path".  Use a Keyword field, so
                    // that the index stores the path, and so that the path is searchable
                    doc.add(
                        new Field(
                            "path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));

                    // Add the last modified date of the file a field named "modified".  Use a
                    // Keyword field, so that it's searchable, but so that no attempt is made
                    // to tokenize the field into words.
                    doc.add(
                        new Field(
                            "modified",
                            DateTools.timeToString(
                                file.lastModified(), DateTools.Resolution.MILLISECOND),
                            Field.Store.YES,
                            Field.Index.NOT_ANALYZED));

                    writer.addDocument(doc);
                    totalIndexed++;
                  }
                } catch (DocumentHandlerException e) {
                  throw new BuildException(e);
                }
              }
            }
            // for j
          }
          // if (fs != null)
        }
        // for i

        writer.optimize();
      }
      // try
      finally {
        // always make sure everything gets closed,
        // no matter how we exit.
        writer.close();
        if (searcher != null) {
          searcher.close();
        }
      }

      Date end = new Date();

      log(
          totalIndexed
              + " out of "
              + totalFiles
              + " indexed ("
              + totalIgnored
              + " ignored) in "
              + (end.getTime() - start.getTime())
              + " milliseconds");
    } finally {
      dir.close();
    }
  }
  // private static int[] oldToNew(IndexReader reader, Searcher searcher) throws IOException {
  private static DocScore[] newToOld(IndexReader reader, Searcher searcher) throws IOException {
    int readerMax = reader.maxDoc();
    DocScore[] newToOld = new DocScore[readerMax];

    // use site, an indexed, un-tokenized field to get boost
    // byte[] boosts = reader.norms("site"); TODO MC
    /* TODO MC */
    Document docMeta;
    Pattern includes = Pattern.compile("\\|");
    String value = NutchConfiguration.create().get(INCLUDE_EXTENSIONS_KEY, "");
    String includeExtensions[] = includes.split(value);
    Hashtable<String, Boolean> validExtensions = new Hashtable<String, Boolean>();
    for (int i = 0; i < includeExtensions.length; i++) {
      validExtensions.put(includeExtensions[i], true);
      System.out.println("extension boosted " + includeExtensions[i]);
    }
    /* TODO MC */

    for (int oldDoc = 0; oldDoc < readerMax; oldDoc++) {
      float score;
      if (reader.isDeleted(oldDoc)) {
        // score = 0.0f;
        score = -1f; // TODO MC
      } else {
        // score = Similarity.decodeNorm(boosts[oldDoc]); TODO MC
        /* TODO MC */
        docMeta = searcher.doc(oldDoc);
        if (validExtensions.get(docMeta.get("subType"))
            == null) { // searched extensions will have higher scores
          score = -0.5f;
        } else {
          score = Integer.parseInt(docMeta.get("inlinks"));
          /*
          if (score==0) {
          	score=0.001f; // TODO MC - to not erase
          }
          */
        }
        /* TODO MC */
        // System.out.println("Score for old document "+oldDoc+" is "+score+" and type
        // "+docMeta.get("subType")); // TODO MC debug remove
      }
      DocScore docScore = new DocScore();
      docScore.doc = oldDoc;
      docScore.score = score;
      newToOld[oldDoc] = docScore;
    }

    System.out.println("Sorting " + newToOld.length + " documents.");
    Arrays.sort(newToOld);
    // HeapSorter.sort(newToOld); // TODO MC - due to the lack of space

    /* TODO MC
    int[] oldToNew = new int[readerMax];
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
      DocScore docScore = newToOld[newDoc];
      //oldToNew[docScore.oldDoc] = docScore.score > 0.0f ? newDoc : -1; // TODO MC
      oldToNew[docScore.oldDoc] = newDoc; // TODO MC
    }
    */

    /* TODO MC *
    for (int newDoc = 0; newDoc < readerMax; newDoc++) {
    	DocScore docScore = newToOld[newDoc];
    	System.out.println("Score for new document "+newDoc+" is "+docScore.score); // TODO MC debug remove
    }
    * TODO MC */

    // return oldToNew; TODO MC
    return newToOld; // TODO MC
  }