Esempi in Java per SolrImporter

Linguaggio di programmazione: Java

Classe/tipologia: SolrImporter

Esempi su hotexamples.com: 2

SolrImporter in Java: 2 esempi trovati. Questi sono i migliori esempi reali in Java per SolrImporter, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

addDoc(2)

Esempio n. 1

Mostra file

File: CorpusHandler.java Progetto: hicannon/TAC-2013-KBP-English-Sentiment-Slot-Filling

  // @Override
  public void endElement(String qName) {
    // String qName = parser.getName();
    switch (qName.toLowerCase()) {
      case "post":
      case "text":
      case "headline":
        currentDoc.addField("content", contentBuffer.toString().trim());
        inContent = false;
        // Depending on memory constraints and document sizes,
        // maybe declare new one instead of virtual clear
        contentBuffer.setLength(0);
        break;
      case "docid":
        inDocId = false;
        baseID = otherBuffer.toString();
        otherBuffer.setLength(0);
        break;
      case "poster":
        currentDoc.addField("author", otherBuffer.toString().trim());
        inPoster = false;
        otherBuffer.setLength(0);
        break;
      case "postdate":
      case "dateline":
      case "datetime":
        otherBuffer.setLength(0);
        inDate = false;
        break;
      case "doc":
        assert (currentDoc != null);
        try {
          SolrImporter.addDoc(server, currentDoc);
        } catch (SolrServerException | IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }

        assert (!inDocId);
        assert (!inContent);
        assert (!inPoster);

        clear();
        break;
    }
  }

Esempio n. 2

Mostra file

File: CorpusHandler.java Progetto: hicannon/TAC-2013-KBP-English-Sentiment-Slot-Filling

  public void startElement(XmlPullParser parser, String wholeDoc) throws Exception {
    String qName = parser.getName();
    if (qName.equalsIgnoreCase("DOC")) {
      if (currentDoc != null) {
        throw new Exception("Nested Document taggs. Input file has problems.\n");
      } else {
        currentDoc = new SolrInputDocument();
        switch (parser.getAttributeCount()) {
          case 0:
            category = "web";
            break;
          case 1:
            category = "discussion";
            baseID = parser.getAttributeValue(0);
            currentDoc.addField("id", parser.getAttributeValue(0));
            break;
          case 2:
            category = "news";
            currentDoc.addField("id", parser.getAttributeValue(0));
            // TODO: process news type attribute if needed
            break;
        }
        currentDoc.addField("category", category);
        currentDoc.addField("whole_text", wholeDoc);
        if (category.equals("web")) {
          // TODO: call clean
        }
        /*Object[] preprocess = Preprocessor.Tokenize(StripXMLTags.strip(wholeDoc).toString());
        currentDoc.addField("offsets", preprocess[0]);
        currentDoc.addField("tokens", preprocess[1]);
        currentDoc.addField("tree", preprocess[2]);*/
      }
    } else if (qName.equalsIgnoreCase("DOCID")) {
      // Should happen in web documents only
      inDocId = true;
    } else if (qName.equalsIgnoreCase("DOCTYPE")) {
      // Should happen in web documents only
      // TODO: process if this element is needed
      // inDocType = true;
    } else if (qName.equalsIgnoreCase("DATETIME")) {
      inDate = true;
    } else if (qName.equalsIgnoreCase("DATELINE")) {
      inDate = true;
    } else if (qName.equalsIgnoreCase("POSTDATE")) {
      inDate = true;
    } else if (qName.equalsIgnoreCase("P")) {
      inContent = true;
    } else if (qName.equalsIgnoreCase("headline")) {
      inContent = true;
    } else if (qName.equalsIgnoreCase("post")) {
      assert (category.equals("discussion") || category.equals("web"));
      assert (baseID != null);

      if (currentDoc != null) { // Finish the previous doc
        if (category.equals("web") && currentDoc.getFieldValues("id") == null) {
          currentDoc.addField("id", baseID);
        }
        assert (currentDoc.getFieldValue("id") != null);

        try {
          SolrImporter.addDoc(server, currentDoc);

          /*//Make sure offsets are correct
          String rawText = SolrInterface.getRawDocument(baseID);
          int diff = StripXMLTags.verify(wholeDoc, new StringBuffer(rawText));
          if (diff!=-1){
          	System.out.println(diff+":'(");
          }*/
        } catch (SolrServerException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        } catch (IOException e) {
          // TODO Auto-generated catch block
          e.printStackTrace();
        }
        currentDoc = null;
      }

      currentDoc = new SolrInputDocument();
      currentDoc.addField("category", category);
      if (parser.getAttributeCount() == 3
          && parser.getAttributeName(0).equalsIgnoreCase("author")) {
        assert (category.equals("discussion"));
        currentDoc.addField("author", parser.getAttributeValue(0));
        // Append post id to doc id
        currentDoc.addField("id", baseID + "." + parser.getAttributeValue(2));
      } else {
        assert (category.equals("web"));
        // Append the poster number to the doc id
        currentDoc.addField("id", baseID + "." + (count++));
      }
      // TODO: datetime for discussions if needed

      inContent = true;
    } else if (qName.equalsIgnoreCase("POSTER")) {
      inPoster = true;
    } else if (qName.equalsIgnoreCase("BODY")) {
      // ignore since we populate fields inside
    } else if (qName.equalsIgnoreCase("TEXT")) {
      // ignore since we populate fields inside
    } else if (qName.equalsIgnoreCase("docs")) {
      // ignore
    } else if (qName.equalsIgnoreCase("QUOTE")) {
      // Note: Hack so that we have the things before the quote tag due to not XML compliant quote
      // tag crashing parser
      /*if (category.equalsIgnoreCase("web")){
      	//currentDoc.addField("content", contentBuffer.toString());
      	endElement("post");
      }*/
    } else if (qName.equals("a")) {
      // ignore
    } else if (qName.equals("img")) {
      // ignore
    } else {
      System.out.println("Unexpected node:" + qName);
    }
  }