public static void main(String[] args) { try { byte[] b = FileUtils.read(new File(args[0])); torrentParser parser = new torrentParser(); Document[] d = parser.parse( new DigestURL("http://localhost/test.torrent"), null, StandardCharsets.UTF_8.name(), new VocabularyScraper(), 0, new ByteArrayInputStream(b)); Condenser c = new Condenser(d[0], null, true, true, LibraryProvider.dymLib, false, false, 0); Map<String, Word> w = c.words(); for (Map.Entry<String, Word> e : w.entrySet()) System.out.println("Word: " + e.getKey() + " - " + e.getValue().posInText); } catch (final IOException e) { e.printStackTrace(); } catch (final Parser.Failure e) { e.printStackTrace(); } catch (final InterruptedException e) { e.printStackTrace(); } }
@Override public Document[] parse( final DigestURL location, final String mimeType, final String charset, final VocabularyScraper scraper, final int timezoneOffset, final InputStream source) throws Parser.Failure, InterruptedException { byte[] b = null; try { b = FileUtils.read(source); } catch (final IOException e1) { throw new Parser.Failure(e1.toString(), location); } final BDecoder bd = new BDecoder(b); final BObject bo = bd.parse(); if (bo == null) throw new Parser.Failure("BDecoder.parse returned null", location); if (bo.getType() != BType.dictionary) throw new Parser.Failure("BDecoder object is not a dictionary", location); final Map<String, BObject> map = bo.getMap(); final BObject commento = map.get("comment"); final String comment = (commento == null) ? "" : UTF8.String(commento.getString()); // Date creation = new Date(map.get("creation date").getInteger()); final BObject infoo = map.get("info"); final StringBuilder filenames = new StringBuilder(80); String title = ""; if (infoo != null) { final Map<String, BObject> info = infoo.getMap(); final BObject fileso = info.get("files"); if (fileso != null) { final List<BObject> filelist = fileso.getList(); for (final BObject fo : filelist) { final BObject patho = fo.getMap().get("path"); if (patho != null) { final List<BObject> l = patho.getList(); // one file may have several names for (final BObject fl : l) { filenames.append(fl.toString()).append(" "); } } } } final BObject nameo = info.get("name"); if (nameo != null) title = UTF8.String(nameo.getString()); } if (title == null || title.isEmpty()) title = MultiProtocolURL.unescape(location.getFileName()); return new Document[] { new Document( location, mimeType, charset, this, null, null, singleList(title), // title comment, // author location.getHost(), null, null, 0.0d, 0.0d, filenames.toString(), null, null, null, false, new Date()) }; }