private void addDependencies(HashSet<byte[]> deps, CommandData data) { for (byte[] dep : data.dependencies) { deps.add(dep); } for (byte[] dep : data.runbundles) { deps.add(dep); } }
public void registerListener(int id, MessageListener m) { HashSet listenerSet = (HashSet) idTable.get(new Integer(id)); if (listenerSet == null) { listenerSet = new HashSet(); idTable.put(new Integer(id), listenerSet); } listenerSet.add(m); log.info("New Listener for id=" + id); }
public static void GetDownloadIndex() throws Exception { BufferedReader in = new BufferedReader(new InputStreamReader(ylink.openStream())); // BufferedWriter out = new BufferedWriter(new FileWriter("output.html")); int b; String op = null; while ((b = in.read()) != -1) { // out.write(b); op = op + (char) b; } String[] list = op.split("url="); for (String i : list) { if (i.startsWith("https")) { set.add(i.split(",")[0]); } } String url = null, itag = null, quality_label = null, type = null; for (String i : set) { String temp = i; temp = temp.replace("\\", "\\\\"); for (String sdata : temp.split("\\\\")) { if (sdata.startsWith("https")) url = sdata; } if (url != null) { if (url.startsWith("https")) { url = url.split(" ")[0]; url = URLDecoder.decode(url, "UTF-8"); if (url.contains("mime=")) type = (url.split("mime=")[1]).split("&")[0]; if (url.contains("itag")) itag = (url.split("itag=")[1]).split("&")[0]; if (url.contains("&gir=yes")) { if (type.startsWith("video")) video_set.add(url + "\ntype: " + type + "\nitag: " + itag); else if (type.startsWith("audio")) audio_set.add(url + "\ntype: " + type + "\nitag: " + itag); } else av_set.add(url + "\ntype: " + type + "\nitag: " + itag); } } } }
public void testStoreAuEmptyState() throws Exception { HashSet strCol = new HashSet(); strCol.add("test"); AuState origState = new AuState(mau, repository); repository.storeAuState(origState); AuState loadedState = repository.loadAuState(); assertEquals(-1, loadedState.getLastCrawlTime()); assertEquals(-1, loadedState.getLastCrawlAttempt()); assertEquals(-1, loadedState.getLastCrawlResult()); assertEquals("Unknown code -1", loadedState.getLastCrawlResultMsg()); assertEquals(-1, loadedState.getLastTopLevelPollTime()); assertEquals(-1, loadedState.getLastPollStart()); assertEquals(-1, loadedState.getLastPollResult()); assertEquals(null, loadedState.getLastPollResultMsg()); assertEquals(-1, loadedState.getLastPoPPoll()); assertEquals(-1, loadedState.getLastPoPPollResult()); assertEquals(-1, loadedState.getLastLocalHashScan()); assertEquals(0, loadedState.getNumAgreePeersLastPoR()); assertEquals(0, loadedState.getNumWillingRepairers()); assertEquals(0, loadedState.getNumCurrentSuspectVersions()); assertEmpty(loadedState.getCdnStems()); loadedState.addCdnStem("http://this.is.new/"); assertEquals(ListUtil.list("http://this.is.new/"), loadedState.getCdnStems()); loadedState.addCdnStem("http://this.is.new/"); assertEquals(ListUtil.list("http://this.is.new/"), loadedState.getCdnStems()); assertEquals(0, loadedState.getPollDuration()); assertEquals(0, loadedState.getClockssSubscriptionStatus()); assertEquals(null, loadedState.getAccessType()); assertEquals(SubstanceChecker.State.Unknown, loadedState.getSubstanceState()); assertEquals(null, loadedState.getFeatureVersion(Plugin.Feature.Substance)); assertEquals(null, loadedState.getFeatureVersion(Plugin.Feature.Metadata)); assertEquals(-1, loadedState.getLastMetadataIndex()); assertEquals(0, loadedState.getLastContentChange()); assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId()); }
/** Loads the lexicon and morph files. */ public void init(URL lexiconUrl, URL morphUrl) throws IOException { List<Family> lexicon = null; List<MorphItem> morph = null; List<MacroItem> macroModel = null; // load category families (lexicon), morph forms and macros lexicon = getLexicon(lexiconUrl); Pair<List<MorphItem>, List<MacroItem>> morphInfo = getMorph(morphUrl); morph = morphInfo.a; macroModel = morphInfo.b; // index words; also index stems to words, as default preds // store indexed coarticulation attrs too _words = new GroupMap<Word, MorphItem>(); _predToWords = new GroupMap<String, Word>(); _coartAttrs = new HashSet<String>(); _indexedCoartAttrs = new HashSet<String>(); for (MorphItem morphItem : morph) { Word surfaceWord = morphItem.getSurfaceWord(); _words.put(surfaceWord, morphItem); _predToWords.put(morphItem.getWord().getStem(), surfaceWord); if (morphItem.isCoart()) { Word indexingWord = morphItem.getCoartIndexingWord(); _words.put(indexingWord, morphItem); Pair<String, String> first = indexingWord.getSurfaceAttrValPairs().next(); _indexedCoartAttrs.add(first.a); for (Iterator<Pair<String, String>> it = surfaceWord.getSurfaceAttrValPairs(); it.hasNext(); ) { Pair<String, String> p = it.next(); _coartAttrs.add(p.a); } } } // index entries based on stem+pos _stems = new GroupMap<String, Object>(); _posToEntries = new GroupMap<String, EntriesItem[]>(); // index entries by supertag+pos, for supertagging _stagToEntries = new GroupMap<String, EntriesItem>(); // also index rels and coart rels to preds _relsToPreds = new GroupMap<String, String>(); _coartRelsToPreds = new GroupMap<String, String>(); // and gather list of attributes used per atomic category type _catsToAttrs = new GroupMap<String, String>(); _lfAttrs = new HashSet<String>(); // and remember family and ent, names, for checking excluded list on morph items HashSet<String> familyAndEntryNames = new HashSet<String>(); // index each family for (Family family : lexicon) { familyAndEntryNames.add(family.getName()); EntriesItem[] entries = family.getEntries(); DataItem[] data = family.getData(); // for generic use when we get an unknown stem // from the morphological analyzer if (!family.isClosed()) { _posToEntries.put(family.getPOS(), entries); } // scan through entries for (int j = 0; j < entries.length; j++) { // index EntriesItem eItem = entries[j]; _stagToEntries.put(eItem.getSupertag() + family.getPOS(), eItem); if (eItem.getStem().length() > 0) { _stems.put(eItem.getStem() + family.getPOS(), eItem); } try { // gather features eItem.getCat().forall(gatherAttrs); // record names familyAndEntryNames.add(eItem.getName()); familyAndEntryNames.add(eItem.getQualifiedName()); } catch (RuntimeException exc) { System.err.println("exception for: " + family.getName() + ": " + exc); } } // scan through data for (int j = 0; j < data.length; j++) { DataItem dItem = data[j]; _stems.put( dItem.getStem() + family.getPOS(), new Pair<DataItem, EntriesItem[]>(dItem, entries)); // index non-default preds to words if (!dItem.getStem().equals(dItem.getPred())) { Collection<Word> words = (Collection<Word>) _predToWords.get(dItem.getStem()); if (words == null) { if (!openlex) { System.out.print("Warning: couldn't find words for pred '"); System.out.println(dItem.getPred() + "' with stem '" + dItem.getStem() + "'"); } } else { for (Iterator<Word> it = words.iterator(); it.hasNext(); ) { _predToWords.put(dItem.getPred(), it.next()); } } } } // index rels to preds // nb: this covers relational (eg @x<GenRel>e) and featural (eg @e<tense>past) // elementary predications List<String> indexRels = new ArrayList<String>(3); String familyIndexRel = family.getIndexRel(); if (familyIndexRel.length() > 0) { indexRels.add(familyIndexRel); } for (int j = 0; j < entries.length; j++) { EntriesItem eItem = entries[j]; String indexRel = eItem.getIndexRel(); if (indexRel.length() > 0 && !indexRel.equals(familyIndexRel)) { indexRels.add(indexRel); } } for (Iterator<String> it = indexRels.iterator(); it.hasNext(); ) { String indexRel = it.next(); // nb: not indexing on entries items, b/c some stems are still defaults for (int j = 0; j < data.length; j++) { DataItem dItem = data[j]; _relsToPreds.put(indexRel, dItem.getPred()); } } // index coart rels (features, really) to preds String coartRel = family.getCoartRel(); if (coartRel.length() > 0) { for (int j = 0; j < data.length; j++) { _coartRelsToPreds.put(coartRel, data[j].getPred()); } } } // index the macros _macros = new GroupMap<String, FeatureStructure>(); // nb: could just index MacroItem objects for feature structures too; // this might be a bit cleaner, but life is short _macroItems = new HashMap<String, MacroItem>(); for (MacroItem mi : macroModel) { String macName = mi.getName(); FeatureStructure[] specs = mi.getFeatureStructures(); for (int j = 0; j < specs.length; j++) { _macros.put(macName, specs[j]); } // this is for handling LF part of macros _macroItems.put(macName, mi); } // with morph items, check POS, macro names, excluded list for xref for (MorphItem morphItem : morph) { Word w = morphItem.getWord(); if (!openlex && !_stems.containsKey(w.getStem() + w.getPOS()) && !_posToEntries.containsKey(w.getPOS())) { System.err.println( "Warning: no entries for stem '" + w.getStem() + "' and POS '" + w.getPOS() + "' found for word '" + w + "'"); } String[] macroNames = morphItem.getMacros(); for (int j = 0; j < macroNames.length; j++) { if (!_macroItems.containsKey(macroNames[j])) { System.err.println( "Warning: macro " + macroNames[j] + " not found for word '" + morphItem.getWord() + "'"); } } String[] excludedNames = morphItem.getExcluded(); for (int j = 0; j < excludedNames.length; j++) { if (!familyAndEntryNames.contains(excludedNames[j])) { System.err.println( "Warning: excluded family or entry '" + excludedNames[j] + "' not found for word '" + morphItem.getWord() + "'"); } } } }
/** Adds a new sentence to the 'brain' */ public void add(String sentence) { sentence = sentence.trim(); ArrayList parts = new ArrayList(); char[] chars = sentence.toCharArray(); int i = 0; boolean punctuation = false; StringBuffer buffer = new StringBuffer(); while (i < chars.length) { char ch = chars[i]; if ((WORD_CHARS.indexOf(ch) >= 0) == punctuation) { punctuation = !punctuation; String token = buffer.toString(); if (token.length() > 0) { parts.add(token); } buffer = new StringBuffer(); // i++; continue; } buffer.append(ch); i++; } String lastToken = buffer.toString(); if (lastToken.length() > 0) { parts.add(lastToken); } if (parts.size() >= 4) { for (i = 0; i < parts.size() - 3; i++) { // System.out.println("\"" + parts.get(i) + "\""); Quad quad = new Quad( (String) parts.get(i), (String) parts.get(i + 1), (String) parts.get(i + 2), (String) parts.get(i + 3)); if (quads.containsKey(quad)) { quad = (Quad) quads.get(quad); } else { quads.put(quad, quad); } if (i == 0) { quad.setCanStart(true); } // else if (i == parts.size() - 4) { if (i == parts.size() - 4) { quad.setCanEnd(true); } for (int n = 0; n < 4; n++) { String token = (String) parts.get(i + n); if (!words.containsKey(token)) { words.put(token, new HashSet(1)); } HashSet set = (HashSet) words.get(token); set.add(quad); } if (i > 0) { String previousToken = (String) parts.get(i - 1); if (!previous.containsKey(quad)) { previous.put(quad, new HashSet(1)); } HashSet set = (HashSet) previous.get(quad); set.add(previousToken); } if (i < parts.size() - 4) { String nextToken = (String) parts.get(i + 4); if (!next.containsKey(quad)) { next.put(quad, new HashSet(1)); } HashSet set = (HashSet) next.get(quad); set.add(nextToken); } } } else { // Didn't learn anything. } }
public void testStoreAuState() throws Exception { HashSet strCol = new HashSet(); strCol.add("test"); AuState origState = new AuState( mau, 123000, 123123, 41, "woop woop", 321000, 222000, 3, "pollres", 12345, 456000, strCol, AuState.AccessType.OpenAccess, 2, 1.0, 1.0, SubstanceChecker.State.Yes, "SubstVer3", "MetadatVer7", 111444, 12345, 111222, // lastPoPPoll 7, // lastPoPPollResult 222333, // lastLocalHashScan 444777, // numAgreePeersLastPoR 777444, // numWillingRepairers 747474, // numCurrentSuspectVersions ListUtil.list("http://hos.t/pa/th"), repository); assertEquals("SubstVer3", origState.getFeatureVersion(Plugin.Feature.Substance)); assertEquals("MetadatVer7", origState.getFeatureVersion(Plugin.Feature.Metadata)); assertEquals(111444, origState.getLastMetadataIndex()); repository.storeAuState(origState); String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau); filePath += HistoryRepositoryImpl.AU_FILE_NAME; File xmlFile = new File(filePath); assertTrue(xmlFile.exists()); origState = null; AuState loadedState = repository.loadAuState(); assertEquals(123000, loadedState.getLastCrawlTime()); assertEquals(123123, loadedState.getLastCrawlAttempt()); assertEquals(41, loadedState.getLastCrawlResult()); assertEquals("woop woop", loadedState.getLastCrawlResultMsg()); assertEquals(321000, loadedState.getLastTopLevelPollTime()); assertEquals(222000, loadedState.getLastPollStart()); assertEquals(3, loadedState.getLastPollResult()); assertEquals("Inviting Peers", loadedState.getLastPollResultMsg()); assertEquals(111222, loadedState.getLastPoPPoll()); assertEquals(7, loadedState.getLastPoPPollResult()); assertEquals(222333, loadedState.getLastLocalHashScan()); assertEquals(444777, loadedState.getNumAgreePeersLastPoR()); assertEquals(777444, loadedState.getNumWillingRepairers()); assertEquals(747474, loadedState.getNumCurrentSuspectVersions()); assertEquals(ListUtil.list("http://hos.t/pa/th"), loadedState.getCdnStems()); loadedState.addCdnStem("http://this.is.new/"); assertEquals( ListUtil.list("http://hos.t/pa/th", "http://this.is.new/"), loadedState.getCdnStems()); assertEquals(12345, loadedState.getPollDuration()); assertEquals(2, loadedState.getClockssSubscriptionStatus()); assertEquals(AuState.AccessType.OpenAccess, loadedState.getAccessType()); assertEquals(SubstanceChecker.State.Yes, loadedState.getSubstanceState()); assertEquals("SubstVer3", loadedState.getFeatureVersion(Plugin.Feature.Substance)); assertEquals("MetadatVer7", loadedState.getFeatureVersion(Plugin.Feature.Metadata)); assertEquals(111444, loadedState.getLastMetadataIndex()); assertEquals(12345, loadedState.getLastContentChange()); assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId()); // check crawl urls Collection col = loadedState.getCrawlUrls(); Iterator colIter = col.iterator(); assertTrue(colIter.hasNext()); assertEquals("test", colIter.next()); assertFalse(colIter.hasNext()); }
@Override public HashSet<ScoredAnnotation> solveSa2W(String text) throws AnnotationException { HashSet<ScoredAnnotation> res; try { res = new HashSet<ScoredAnnotation>(); lastTime = Calendar.getInstance().getTimeInMillis(); URL wikiApi = new URL(url); String parameters = "references=true&repeatMode=all&minProbability=0.0&source=" + URLEncoder.encode(text, "UTF-8"); HttpURLConnection slConnection = (HttpURLConnection) wikiApi.openConnection(); slConnection.setRequestProperty("accept", "text/xml"); slConnection.setDoOutput(true); slConnection.setDoInput(true); slConnection.setRequestMethod("POST"); slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); slConnection.setRequestProperty("charset", "utf-8"); slConnection.setRequestProperty( "Content-Length", "" + Integer.toString(parameters.getBytes().length)); slConnection.setUseCaches(false); DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream()); wr.writeBytes(parameters); wr.flush(); wr.close(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(slConnection.getInputStream()); /* URL wikiApi = new URL(url+"?references=true&repeatMode=all&minProbability=0.0&source="+URLEncoder.encode(text, "UTF-8")); URLConnection wikiConnection = wikiApi.openConnection(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(wikiConnection.getInputStream()); */ lastTime = Calendar.getInstance().getTimeInMillis() - lastTime; XPathFactory xPathfactory = XPathFactory.newInstance(); XPath xpath = xPathfactory.newXPath(); XPathExpression idExpr = xpath.compile("//detectedTopic/@id"); XPathExpression weightExpr = xpath.compile("//detectedTopic/@weight"); XPathExpression referenceExpr = xpath.compile("//detectedTopic/references"); NodeList ids = (NodeList) idExpr.evaluate(doc, XPathConstants.NODESET); NodeList weights = (NodeList) weightExpr.evaluate(doc, XPathConstants.NODESET); NodeList references = (NodeList) referenceExpr.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < weights.getLength(); i++) { if (weights.item(i).getNodeType() != Node.TEXT_NODE) { int id = Integer.parseInt(ids.item(i).getNodeValue()); float weight = Float.parseFloat(weights.item(i).getNodeValue()); // System.out.println("ID="+ids.item(i).getNodeValue()+" weight="+weight); XPathExpression startExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@start"); XPathExpression endExpr = xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@end"); NodeList starts = (NodeList) startExpr.evaluate(references.item(i), XPathConstants.NODESET); NodeList ends = (NodeList) endExpr.evaluate(references.item(i), XPathConstants.NODESET); for (int j = 0; j < starts.getLength(); j++) { int start = Integer.parseInt(starts.item(j).getNodeValue()); int end = Integer.parseInt(ends.item(j).getNodeValue()); int len = end - start; res.add(new ScoredAnnotation(start, len, id, weight)); } } } } catch (Exception e) { e.printStackTrace(); throw new AnnotationException( "An error occurred while querying Wikipedia Miner API. Message: " + e.getMessage()); } return res; }