Пример #1
0
  public static Document fromTaggerDocumentToDocument(TaggerDocument doc) {
    Document document = null;
    if (doc != null) {
      if (doc.getDoctype().equalsIgnoreCase(DocumentType.TWIITER_DOC)) {
        document = new Tweet();
      } else if (doc.getDoctype().equalsIgnoreCase(DocumentType.SMS_DOC)) {
        document = new SMS();
      } else if (doc.getDoctype().equalsIgnoreCase(DocumentType.FACEBOOK_DOC)) {
        document = new Facebook();
      }

      document.setDocumentID(doc.getDocumentID());
      document.setCrisisID(doc.getCrisisID());
      document.humanLabelCount = (doc.hasHumanLabels() == false) ? 0 : 1;
      document.setCrisisCode(doc.getCrisisCode());
      document.setLanguage(doc.getLanguage());

      WordSet wordSet = new WordSet();
      String text = doc.getWordFeatures();
      wordSet.addAll(FeatureExtractor.getWordsInStringWithBigrams(text, false));
      document.addFeatureSet(wordSet);

      document.setValueAsTrainingSample(doc.getValueAsTrainingSample());
      /*
      List<NominalLabelBC> labels = doc.getHumanLabels(NominalLabelBC.class);
      if (!labels.isEmpty()) {
      	for (NominalLabelBC label : labels) {
      		document.addLabel(label);
      	}
      }*/
    }
    return document;
  }