public static Document fromTaggerDocumentToDocument(TaggerDocument doc) { Document document = null; if (doc != null) { if (doc.getDoctype().equalsIgnoreCase(DocumentType.TWIITER_DOC)) { document = new Tweet(); } else if (doc.getDoctype().equalsIgnoreCase(DocumentType.SMS_DOC)) { document = new SMS(); } else if (doc.getDoctype().equalsIgnoreCase(DocumentType.FACEBOOK_DOC)) { document = new Facebook(); } document.setDocumentID(doc.getDocumentID()); document.setCrisisID(doc.getCrisisID()); document.humanLabelCount = (doc.hasHumanLabels() == false) ? 0 : 1; document.setCrisisCode(doc.getCrisisCode()); document.setLanguage(doc.getLanguage()); WordSet wordSet = new WordSet(); String text = doc.getWordFeatures(); wordSet.addAll(FeatureExtractor.getWordsInStringWithBigrams(text, false)); document.addFeatureSet(wordSet); document.setValueAsTrainingSample(doc.getValueAsTrainingSample()); /* List<NominalLabelBC> labels = doc.getHumanLabels(NominalLabelBC.class); if (!labels.isEmpty()) { for (NominalLabelBC label : labels) { document.addLabel(label); } }*/ } return document; }
public static TaggerDocument fromDocumentToTaggerDocument(Document doc) { TaggerDocument document = new TaggerDocument(); if (doc != null) { // NOTE: documentID needs to be set separately as Auto Generation ID from DB/Hibernate // Now copy the remaining fields document.setHasHumanLabels(doc.hasHumanLabels()); document.setCrisisID(doc.getCrisisID()); document.setCrisisCode(doc.getCrisisCode()); document.setReceivedAt( new java.sql.Timestamp(java.util.Calendar.getInstance().getTimeInMillis())); document.setLanguage(doc.getLanguage()); document.setDoctype(doc.getClass().getSimpleName().toString()); if (doc.getInputJson() != null) { document.setData(Helpers.escapeJson(doc.getInputJson().toString())); } else { document.setData(null); } if (doc.features != null) { document.setWordFeatures(DocumentJSONConverter.getFeaturesJson(WordSet.class, doc)); } document.setGeoFeatures(null); document.setValueAsTrainingSample(doc.getValueAsTrainingSample()); boolean val = Math.random() < (1.0 / 5.0) ? true : false; document.setIsEvaluationSet(val); /* List<NominalLabelBC> labels = doc.getHumanLabels(NominalLabelBC.class); if (!labels.isEmpty()) { List<NominalLabel> nbList = new ArrayList<NominalLabel>(); for (NominalLabelBC label : labels) { NominalLabel nb = new NominalLabel(label.getNominalLabelID()); nbList.add(nb); } document.setNominalLabelCollection(nbList); } else { document.setNominalLabelCollection(null); }*/ return document; } return null; }