Exemplo n.º 1
0
 private void addDependencies(HashSet<byte[]> deps, CommandData data) {
   for (byte[] dep : data.dependencies) {
     deps.add(dep);
   }
   for (byte[] dep : data.runbundles) {
     deps.add(dep);
   }
 }
Exemplo n.º 2
0
  public void registerListener(int id, MessageListener m) {
    HashSet listenerSet = (HashSet) idTable.get(new Integer(id));

    if (listenerSet == null) {
      listenerSet = new HashSet();
      idTable.put(new Integer(id), listenerSet);
    }
    listenerSet.add(m);
    log.info("New Listener for id=" + id);
  }
Exemplo n.º 3
0
 public static void GetDownloadIndex() throws Exception {
   BufferedReader in = new BufferedReader(new InputStreamReader(ylink.openStream()));
   // BufferedWriter out = new BufferedWriter(new FileWriter("output.html"));
   int b;
   String op = null;
   while ((b = in.read()) != -1) {
     //	out.write(b);
     op = op + (char) b;
   }
   String[] list = op.split("url=");
   for (String i : list) {
     if (i.startsWith("https")) {
       set.add(i.split(",")[0]);
     }
   }
   String url = null, itag = null, quality_label = null, type = null;
   for (String i : set) {
     String temp = i;
     temp = temp.replace("\\", "\\\\");
     for (String sdata : temp.split("\\\\")) {
       if (sdata.startsWith("https")) url = sdata;
     }
     if (url != null) {
       if (url.startsWith("https")) {
         url = url.split(" ")[0];
         url = URLDecoder.decode(url, "UTF-8");
         if (url.contains("mime=")) type = (url.split("mime=")[1]).split("&")[0];
         if (url.contains("itag")) itag = (url.split("itag=")[1]).split("&")[0];
         if (url.contains("&gir=yes")) {
           if (type.startsWith("video"))
             video_set.add(url + "\ntype: " + type + "\nitag: " + itag);
           else if (type.startsWith("audio"))
             audio_set.add(url + "\ntype: " + type + "\nitag: " + itag);
         } else av_set.add(url + "\ntype: " + type + "\nitag: " + itag);
       }
     }
   }
 }
  public void testStoreAuEmptyState() throws Exception {
    HashSet strCol = new HashSet();
    strCol.add("test");
    AuState origState = new AuState(mau, repository);
    repository.storeAuState(origState);
    AuState loadedState = repository.loadAuState();
    assertEquals(-1, loadedState.getLastCrawlTime());
    assertEquals(-1, loadedState.getLastCrawlAttempt());
    assertEquals(-1, loadedState.getLastCrawlResult());
    assertEquals("Unknown code -1", loadedState.getLastCrawlResultMsg());
    assertEquals(-1, loadedState.getLastTopLevelPollTime());
    assertEquals(-1, loadedState.getLastPollStart());
    assertEquals(-1, loadedState.getLastPollResult());
    assertEquals(null, loadedState.getLastPollResultMsg());

    assertEquals(-1, loadedState.getLastPoPPoll());
    assertEquals(-1, loadedState.getLastPoPPollResult());
    assertEquals(-1, loadedState.getLastLocalHashScan());
    assertEquals(0, loadedState.getNumAgreePeersLastPoR());
    assertEquals(0, loadedState.getNumWillingRepairers());
    assertEquals(0, loadedState.getNumCurrentSuspectVersions());
    assertEmpty(loadedState.getCdnStems());
    loadedState.addCdnStem("http://this.is.new/");
    assertEquals(ListUtil.list("http://this.is.new/"), loadedState.getCdnStems());
    loadedState.addCdnStem("http://this.is.new/");
    assertEquals(ListUtil.list("http://this.is.new/"), loadedState.getCdnStems());

    assertEquals(0, loadedState.getPollDuration());
    assertEquals(0, loadedState.getClockssSubscriptionStatus());
    assertEquals(null, loadedState.getAccessType());
    assertEquals(SubstanceChecker.State.Unknown, loadedState.getSubstanceState());
    assertEquals(null, loadedState.getFeatureVersion(Plugin.Feature.Substance));
    assertEquals(null, loadedState.getFeatureVersion(Plugin.Feature.Metadata));
    assertEquals(-1, loadedState.getLastMetadataIndex());
    assertEquals(0, loadedState.getLastContentChange());
    assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId());
  }
Exemplo n.º 5
0
  /** Loads the lexicon and morph files. */
  public void init(URL lexiconUrl, URL morphUrl) throws IOException {

    List<Family> lexicon = null;
    List<MorphItem> morph = null;
    List<MacroItem> macroModel = null;

    // load category families (lexicon), morph forms and macros
    lexicon = getLexicon(lexiconUrl);
    Pair<List<MorphItem>, List<MacroItem>> morphInfo = getMorph(morphUrl);
    morph = morphInfo.a;
    macroModel = morphInfo.b;

    // index words; also index stems to words, as default preds
    // store indexed coarticulation attrs too
    _words = new GroupMap<Word, MorphItem>();
    _predToWords = new GroupMap<String, Word>();
    _coartAttrs = new HashSet<String>();
    _indexedCoartAttrs = new HashSet<String>();
    for (MorphItem morphItem : morph) {
      Word surfaceWord = morphItem.getSurfaceWord();
      _words.put(surfaceWord, morphItem);
      _predToWords.put(morphItem.getWord().getStem(), surfaceWord);
      if (morphItem.isCoart()) {
        Word indexingWord = morphItem.getCoartIndexingWord();
        _words.put(indexingWord, morphItem);
        Pair<String, String> first = indexingWord.getSurfaceAttrValPairs().next();
        _indexedCoartAttrs.add(first.a);
        for (Iterator<Pair<String, String>> it = surfaceWord.getSurfaceAttrValPairs();
            it.hasNext(); ) {
          Pair<String, String> p = it.next();
          _coartAttrs.add(p.a);
        }
      }
    }

    // index entries based on stem+pos
    _stems = new GroupMap<String, Object>();
    _posToEntries = new GroupMap<String, EntriesItem[]>();
    // index entries by supertag+pos, for supertagging
    _stagToEntries = new GroupMap<String, EntriesItem>();
    // also index rels and coart rels to preds
    _relsToPreds = new GroupMap<String, String>();
    _coartRelsToPreds = new GroupMap<String, String>();
    // and gather list of attributes used per atomic category type
    _catsToAttrs = new GroupMap<String, String>();
    _lfAttrs = new HashSet<String>();
    // and remember family and ent, names, for checking excluded list on morph items
    HashSet<String> familyAndEntryNames = new HashSet<String>();

    // index each family
    for (Family family : lexicon) {

      familyAndEntryNames.add(family.getName());
      EntriesItem[] entries = family.getEntries();
      DataItem[] data = family.getData();

      // for generic use when we get an unknown stem
      // from the morphological analyzer
      if (!family.isClosed()) {
        _posToEntries.put(family.getPOS(), entries);
      }

      // scan through entries
      for (int j = 0; j < entries.length; j++) {
        // index
        EntriesItem eItem = entries[j];
        _stagToEntries.put(eItem.getSupertag() + family.getPOS(), eItem);
        if (eItem.getStem().length() > 0) {
          _stems.put(eItem.getStem() + family.getPOS(), eItem);
        }
        try {
          // gather features
          eItem.getCat().forall(gatherAttrs);
          // record names
          familyAndEntryNames.add(eItem.getName());
          familyAndEntryNames.add(eItem.getQualifiedName());
        } catch (RuntimeException exc) {
          System.err.println("exception for: " + family.getName() + ": " + exc);
        }
      }

      // scan through data
      for (int j = 0; j < data.length; j++) {
        DataItem dItem = data[j];
        _stems.put(
            dItem.getStem() + family.getPOS(), new Pair<DataItem, EntriesItem[]>(dItem, entries));
        // index non-default preds to words
        if (!dItem.getStem().equals(dItem.getPred())) {
          Collection<Word> words = (Collection<Word>) _predToWords.get(dItem.getStem());
          if (words == null) {
            if (!openlex) {
              System.out.print("Warning: couldn't find words for pred '");
              System.out.println(dItem.getPred() + "' with stem '" + dItem.getStem() + "'");
            }
          } else {
            for (Iterator<Word> it = words.iterator(); it.hasNext(); ) {
              _predToWords.put(dItem.getPred(), it.next());
            }
          }
        }
      }

      // index rels to preds
      // nb: this covers relational (eg @x<GenRel>e) and featural (eg @e<tense>past)
      //     elementary predications
      List<String> indexRels = new ArrayList<String>(3);
      String familyIndexRel = family.getIndexRel();
      if (familyIndexRel.length() > 0) {
        indexRels.add(familyIndexRel);
      }
      for (int j = 0; j < entries.length; j++) {
        EntriesItem eItem = entries[j];
        String indexRel = eItem.getIndexRel();
        if (indexRel.length() > 0 && !indexRel.equals(familyIndexRel)) {
          indexRels.add(indexRel);
        }
      }
      for (Iterator<String> it = indexRels.iterator(); it.hasNext(); ) {
        String indexRel = it.next();
        // nb: not indexing on entries items, b/c some stems are still defaults
        for (int j = 0; j < data.length; j++) {
          DataItem dItem = data[j];
          _relsToPreds.put(indexRel, dItem.getPred());
        }
      }

      // index coart rels (features, really) to preds
      String coartRel = family.getCoartRel();
      if (coartRel.length() > 0) {
        for (int j = 0; j < data.length; j++) {
          _coartRelsToPreds.put(coartRel, data[j].getPred());
        }
      }
    }

    // index the macros
    _macros = new GroupMap<String, FeatureStructure>();
    // nb: could just index MacroItem objects for feature structures too;
    //     this might be a bit cleaner, but life is short
    _macroItems = new HashMap<String, MacroItem>();
    for (MacroItem mi : macroModel) {
      String macName = mi.getName();
      FeatureStructure[] specs = mi.getFeatureStructures();
      for (int j = 0; j < specs.length; j++) {
        _macros.put(macName, specs[j]);
      }
      // this is for handling LF part of macros
      _macroItems.put(macName, mi);
    }

    // with morph items, check POS, macro names, excluded list for xref
    for (MorphItem morphItem : morph) {
      Word w = morphItem.getWord();
      if (!openlex
          && !_stems.containsKey(w.getStem() + w.getPOS())
          && !_posToEntries.containsKey(w.getPOS())) {
        System.err.println(
            "Warning: no entries for stem '"
                + w.getStem()
                + "' and POS '"
                + w.getPOS()
                + "' found for word '"
                + w
                + "'");
      }
      String[] macroNames = morphItem.getMacros();
      for (int j = 0; j < macroNames.length; j++) {
        if (!_macroItems.containsKey(macroNames[j])) {
          System.err.println(
              "Warning: macro "
                  + macroNames[j]
                  + " not found for word '"
                  + morphItem.getWord()
                  + "'");
        }
      }
      String[] excludedNames = morphItem.getExcluded();
      for (int j = 0; j < excludedNames.length; j++) {
        if (!familyAndEntryNames.contains(excludedNames[j])) {
          System.err.println(
              "Warning: excluded family or entry '"
                  + excludedNames[j]
                  + "' not found for word '"
                  + morphItem.getWord()
                  + "'");
        }
      }
    }
  }
Exemplo n.º 6
0
  /** Adds a new sentence to the 'brain' */
  public void add(String sentence) {
    sentence = sentence.trim();
    ArrayList parts = new ArrayList();
    char[] chars = sentence.toCharArray();
    int i = 0;
    boolean punctuation = false;
    StringBuffer buffer = new StringBuffer();
    while (i < chars.length) {
      char ch = chars[i];
      if ((WORD_CHARS.indexOf(ch) >= 0) == punctuation) {
        punctuation = !punctuation;
        String token = buffer.toString();
        if (token.length() > 0) {
          parts.add(token);
        }
        buffer = new StringBuffer();
        // i++;
        continue;
      }
      buffer.append(ch);
      i++;
    }
    String lastToken = buffer.toString();
    if (lastToken.length() > 0) {
      parts.add(lastToken);
    }

    if (parts.size() >= 4) {
      for (i = 0; i < parts.size() - 3; i++) {
        // System.out.println("\"" + parts.get(i) + "\"");
        Quad quad =
            new Quad(
                (String) parts.get(i),
                (String) parts.get(i + 1),
                (String) parts.get(i + 2),
                (String) parts.get(i + 3));
        if (quads.containsKey(quad)) {
          quad = (Quad) quads.get(quad);
        } else {
          quads.put(quad, quad);
        }

        if (i == 0) {
          quad.setCanStart(true);
        }
        // else if (i == parts.size() - 4) {
        if (i == parts.size() - 4) {
          quad.setCanEnd(true);
        }

        for (int n = 0; n < 4; n++) {
          String token = (String) parts.get(i + n);
          if (!words.containsKey(token)) {
            words.put(token, new HashSet(1));
          }
          HashSet set = (HashSet) words.get(token);
          set.add(quad);
        }

        if (i > 0) {
          String previousToken = (String) parts.get(i - 1);
          if (!previous.containsKey(quad)) {
            previous.put(quad, new HashSet(1));
          }
          HashSet set = (HashSet) previous.get(quad);
          set.add(previousToken);
        }

        if (i < parts.size() - 4) {
          String nextToken = (String) parts.get(i + 4);
          if (!next.containsKey(quad)) {
            next.put(quad, new HashSet(1));
          }
          HashSet set = (HashSet) next.get(quad);
          set.add(nextToken);
        }
      }
    } else {
      // Didn't learn anything.
    }
  }
  public void testStoreAuState() throws Exception {
    HashSet strCol = new HashSet();
    strCol.add("test");
    AuState origState =
        new AuState(
            mau,
            123000,
            123123,
            41,
            "woop woop",
            321000,
            222000,
            3,
            "pollres",
            12345,
            456000,
            strCol,
            AuState.AccessType.OpenAccess,
            2,
            1.0,
            1.0,
            SubstanceChecker.State.Yes,
            "SubstVer3",
            "MetadatVer7",
            111444,
            12345,
            111222, // lastPoPPoll
            7, // lastPoPPollResult
            222333, // lastLocalHashScan
            444777, // numAgreePeersLastPoR
            777444, // numWillingRepairers
            747474, // numCurrentSuspectVersions
            ListUtil.list("http://hos.t/pa/th"),
            repository);

    assertEquals("SubstVer3", origState.getFeatureVersion(Plugin.Feature.Substance));
    assertEquals("MetadatVer7", origState.getFeatureVersion(Plugin.Feature.Metadata));
    assertEquals(111444, origState.getLastMetadataIndex());

    repository.storeAuState(origState);

    String filePath = LockssRepositoryImpl.mapAuToFileLocation(tempDirPath, mau);
    filePath += HistoryRepositoryImpl.AU_FILE_NAME;
    File xmlFile = new File(filePath);
    assertTrue(xmlFile.exists());

    origState = null;
    AuState loadedState = repository.loadAuState();
    assertEquals(123000, loadedState.getLastCrawlTime());
    assertEquals(123123, loadedState.getLastCrawlAttempt());
    assertEquals(41, loadedState.getLastCrawlResult());
    assertEquals("woop woop", loadedState.getLastCrawlResultMsg());
    assertEquals(321000, loadedState.getLastTopLevelPollTime());
    assertEquals(222000, loadedState.getLastPollStart());
    assertEquals(3, loadedState.getLastPollResult());
    assertEquals("Inviting Peers", loadedState.getLastPollResultMsg());

    assertEquals(111222, loadedState.getLastPoPPoll());
    assertEquals(7, loadedState.getLastPoPPollResult());
    assertEquals(222333, loadedState.getLastLocalHashScan());

    assertEquals(444777, loadedState.getNumAgreePeersLastPoR());
    assertEquals(777444, loadedState.getNumWillingRepairers());
    assertEquals(747474, loadedState.getNumCurrentSuspectVersions());
    assertEquals(ListUtil.list("http://hos.t/pa/th"), loadedState.getCdnStems());
    loadedState.addCdnStem("http://this.is.new/");
    assertEquals(
        ListUtil.list("http://hos.t/pa/th", "http://this.is.new/"), loadedState.getCdnStems());

    assertEquals(12345, loadedState.getPollDuration());
    assertEquals(2, loadedState.getClockssSubscriptionStatus());
    assertEquals(AuState.AccessType.OpenAccess, loadedState.getAccessType());
    assertEquals(SubstanceChecker.State.Yes, loadedState.getSubstanceState());
    assertEquals("SubstVer3", loadedState.getFeatureVersion(Plugin.Feature.Substance));
    assertEquals("MetadatVer7", loadedState.getFeatureVersion(Plugin.Feature.Metadata));
    assertEquals(111444, loadedState.getLastMetadataIndex());
    assertEquals(12345, loadedState.getLastContentChange());
    assertEquals(mau.getAuId(), loadedState.getArchivalUnit().getAuId());

    // check crawl urls
    Collection col = loadedState.getCrawlUrls();
    Iterator colIter = col.iterator();
    assertTrue(colIter.hasNext());
    assertEquals("test", colIter.next());
    assertFalse(colIter.hasNext());
  }
  @Override
  public HashSet<ScoredAnnotation> solveSa2W(String text) throws AnnotationException {
    HashSet<ScoredAnnotation> res;
    try {
      res = new HashSet<ScoredAnnotation>();
      lastTime = Calendar.getInstance().getTimeInMillis();

      URL wikiApi = new URL(url);
      String parameters =
          "references=true&repeatMode=all&minProbability=0.0&source="
              + URLEncoder.encode(text, "UTF-8");
      HttpURLConnection slConnection = (HttpURLConnection) wikiApi.openConnection();
      slConnection.setRequestProperty("accept", "text/xml");
      slConnection.setDoOutput(true);
      slConnection.setDoInput(true);
      slConnection.setRequestMethod("POST");
      slConnection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
      slConnection.setRequestProperty("charset", "utf-8");
      slConnection.setRequestProperty(
          "Content-Length", "" + Integer.toString(parameters.getBytes().length));
      slConnection.setUseCaches(false);

      DataOutputStream wr = new DataOutputStream(slConnection.getOutputStream());
      wr.writeBytes(parameters);
      wr.flush();
      wr.close();
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      DocumentBuilder builder = factory.newDocumentBuilder();
      Document doc = builder.parse(slConnection.getInputStream());

      /*			URL wikiApi = new URL(url+"?references=true&repeatMode=all&minProbability=0.0&source="+URLEncoder.encode(text, "UTF-8"));
      			URLConnection wikiConnection = wikiApi.openConnection();
      			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      			DocumentBuilder builder = factory.newDocumentBuilder();
      			Document doc = builder.parse(wikiConnection.getInputStream());
      */

      lastTime = Calendar.getInstance().getTimeInMillis() - lastTime;

      XPathFactory xPathfactory = XPathFactory.newInstance();
      XPath xpath = xPathfactory.newXPath();
      XPathExpression idExpr = xpath.compile("//detectedTopic/@id");
      XPathExpression weightExpr = xpath.compile("//detectedTopic/@weight");
      XPathExpression referenceExpr = xpath.compile("//detectedTopic/references");

      NodeList ids = (NodeList) idExpr.evaluate(doc, XPathConstants.NODESET);
      NodeList weights = (NodeList) weightExpr.evaluate(doc, XPathConstants.NODESET);
      NodeList references = (NodeList) referenceExpr.evaluate(doc, XPathConstants.NODESET);

      for (int i = 0; i < weights.getLength(); i++) {
        if (weights.item(i).getNodeType() != Node.TEXT_NODE) {
          int id = Integer.parseInt(ids.item(i).getNodeValue());
          float weight = Float.parseFloat(weights.item(i).getNodeValue());
          //					System.out.println("ID="+ids.item(i).getNodeValue()+" weight="+weight);
          XPathExpression startExpr =
              xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@start");
          XPathExpression endExpr =
              xpath.compile("//detectedTopic[@id=" + id + "]/references/reference/@end");
          NodeList starts =
              (NodeList) startExpr.evaluate(references.item(i), XPathConstants.NODESET);
          NodeList ends = (NodeList) endExpr.evaluate(references.item(i), XPathConstants.NODESET);
          for (int j = 0; j < starts.getLength(); j++) {
            int start = Integer.parseInt(starts.item(j).getNodeValue());
            int end = Integer.parseInt(ends.item(j).getNodeValue());
            int len = end - start;
            res.add(new ScoredAnnotation(start, len, id, weight));
          }
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
      throw new AnnotationException(
          "An error occurred while querying Wikipedia Miner API. Message: " + e.getMessage());
    }
    return res;
  }