/** * tiendv * * @param dimension : * @param numberVector * @param selectFeature : Cac feature duoc chon de chay * @return */ public static Instances buildVectorWithFeatures(int numberVector, Feature selectFeature) { Instances instances; // Value feature FastVector vectorFeature = new FastVector(selectFeature.getNumberSelectFeature()); FastVector vector = new FastVector(selectFeature.getNumberSelectFeature() + 1); // JaccardAuthorName if (selectFeature.getJcAuthorName() == true) { Attribute attAuthorSimilarity = new Attribute(Feature.JC_AUTHOR_NAME); vector.addElement(attAuthorSimilarity); } // JaccardAffiliation if (selectFeature.getJcAffiliation() == true) { Attribute attAffiliationSimilarity = new Attribute(Feature.JC_AFFILIATION); vector.addElement(attAffiliationSimilarity); } // Jaccard Coauthor name if (selectFeature.getJcCoAuthor() == true) { Attribute attCoauthorSimilarity = new Attribute(Feature.JC_CO_AUTHOR); vector.addElement(attCoauthorSimilarity); } // Jaccard KeywordS if (selectFeature.getJcKeyword() == true) { Attribute attKeywordSimilarity = new Attribute(Feature.JC_KEYWORD); vector.addElement(attKeywordSimilarity); } // Jaccard KeywordS if (selectFeature.getJcInterestingKeyword() == true) { Attribute attInterestSimilarity = new Attribute(Feature.JC_INTERESTING_KEYWORD); vector.addElement(attInterestSimilarity); } // Levenshtein Author Name if (selectFeature.getLevenshteinAuthorname() == true) { Attribute attLevenshteinAuthorname = new Attribute(Feature.LEVENSHTEIN_AUTHOR_NAME); vector.addElement(attLevenshteinAuthorname); } // Levenshtein Affiliation if (selectFeature.getLevenshteinAffiliaiton() == true) { Attribute attLevenshteinAffiliaiton = new Attribute(Feature.LEVENSHTEIN_AFFILIATION); vector.addElement(attLevenshteinAffiliaiton); } // Jaro Author Name if (selectFeature.getJaroAuthorName() == true) { Attribute attJaroAuthorName = new Attribute(Feature.JARO_AUTHOR_NAME); vector.addElement(attJaroAuthorName); } // Jaro Affiliation if (selectFeature.getJaroAffiliation() == true) { Attribute attJaroAffiliation = new Attribute(Feature.JARO_AFFILIATION); vector.addElement(attJaroAffiliation); } // Jarowinkler Author Name if (selectFeature.getJarowinklerAuthorName() == true) { Attribute attJaroWinklerAuthorName = new Attribute(Feature.JAROWINKLER_AUTHOR_NAME); vector.addElement(attJaroWinklerAuthorName); } // Jarowinkler Author Affiliation if (selectFeature.getJarowinklerAffiliaiton() == true) { Attribute attJaroWinklerAffiliation = new Attribute(Feature.JAROWIKLER_AFFILIATION); vector.addElement(attJaroWinklerAffiliation); } // smithWaterman Author AuthorName if (selectFeature.getSmithWatermanAuthorName() == true) { Attribute attSmithWatermanAuthorName = new Attribute(Feature.SMITHWATERMAN_AUTHOR_NAME); vector.addElement(attSmithWatermanAuthorName); } // smithWaterman Author Affiliation if (selectFeature.getSmithWatermanAffiliation() == true) { Attribute attSmithWatermanAffiliaiton = new Attribute(Feature.SMITHWATERMAN_AFFILIATION); vector.addElement(attSmithWatermanAffiliaiton); } // Monge-Elkan Author Name if (selectFeature.getMongeElkanAuthorName() == true) { Attribute attSmithMongeElkanAuthorName = new Attribute(Feature.MONGEELKAN_AUTHOR_NAME); vector.addElement(attSmithMongeElkanAuthorName); } // Monge-Elkan Author Affiliation if (selectFeature.getMongeElkanAuthorName() == true) { Attribute attSmithMongeElkanAuthorAffiliation = new Attribute(Feature.MONGEELKAN_AFFILIATION); vector.addElement(attSmithMongeElkanAuthorAffiliation); } // add more feature here // Declare the class attribute along with its values FastVector classValue = new FastVector(2); classValue.addElement("same"); classValue.addElement("diff"); Attribute classAttribute = new Attribute("Class", classValue); vector.addElement(classAttribute); instances = new Instances("AuthorName", vector, numberVector); instances.setClassIndex(selectFeature.getNumberSelectFeature()); // cai nay xem co dung khong return instances; }
/** * tiendv * * @param instancesData * @param pubA * @param pubB * @param Feature : select features * @param label * @return */ public static Instance calculateVectorWithSelectFeatures( Instances instancesData, Publication pubA, Publication pubB, Feature selectFeature, String label) { AuthorSimilarity authorSimilarity = new AuthorSimilarity(); AffiliationSimilarity affiliationSimilarity = new AffiliationSimilarity(); CoAuthorSimilarity coAuthorSimilarity = new CoAuthorSimilarity(); KeywordSimilarity keywordSimilarity = new KeywordSimilarity(); InterestKeywordSimilarity interestKeywordSimilarity = new InterestKeywordSimilarity(); int dimension = selectFeature.getNumberSelectFeature() + 1; Instance simple = new SparseInstance(dimension); if (selectFeature.getJcAuthorName() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JC_AUTHOR_NAME), authorSimilarity.makeJaccardSimilarity(pubA, pubB)); if (selectFeature.getJcAffiliation() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JC_AFFILIATION), affiliationSimilarity.makeJaccardSimilarity(pubA, pubB)); if (selectFeature.getJcCoAuthor() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JC_CO_AUTHOR), coAuthorSimilarity.makeJaccardSimilarity(pubA, pubB)); if (selectFeature.getJcKeyword() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JC_KEYWORD), keywordSimilarity.makeJaccardSimilarity(pubA, pubB)); if (selectFeature.getJcInterestingKeyword() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JC_INTERESTING_KEYWORD), interestKeywordSimilarity.makeJaccardSimilarity(pubA, pubB)); if (selectFeature.getLevenshteinAuthorname() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.LEVENSHTEIN_AUTHOR_NAME), authorSimilarity.makeLevenshteinSimilarity(pubA, pubB)); if (selectFeature.getLevenshteinAffiliaiton() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.LEVENSHTEIN_AFFILIATION), affiliationSimilarity.makeLevenshteinSimilarity(pubA, pubB)); if (selectFeature.getJaroAuthorName() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JARO_AUTHOR_NAME), authorSimilarity.makeJaroSimilarity(pubA, pubB)); if (selectFeature.getJaroAffiliation() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JARO_AFFILIATION), affiliationSimilarity.makeJaroSimilarity(pubA, pubB)); if (selectFeature.getJarowinklerAuthorName() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JAROWINKLER_AUTHOR_NAME), authorSimilarity.makeJaroWinklerSimilarity(pubA, pubB)); if (selectFeature.getJarowinklerAffiliaiton() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.JAROWIKLER_AFFILIATION), affiliationSimilarity.makeJaroWinklerSimilarity(pubA, pubB)); if (selectFeature.getSmithWatermanAuthorName() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.SMITHWATERMAN_AUTHOR_NAME), authorSimilarity.makeSmithWatermanSimilarity(pubA, pubB)); if (selectFeature.getSmithWatermanAffiliation() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.SMITHWATERMAN_AFFILIATION), affiliationSimilarity.makeSmithWatermanSimilarity(pubA, pubB)); if (selectFeature.getMongeElkanAuthorName() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.MONGEELKAN_AUTHOR_NAME), authorSimilarity.makeMongeElkanSimilarity(pubA, pubB)); if (selectFeature.getMongeElkanAffiliation() == true) simple.setValue( (Attribute) instancesData.attribute(Feature.MONGEELKAN_AFFILIATION), affiliationSimilarity.makeMongeElkanSimilarity(pubA, pubB)); // Add more feature here int sum = pubA.getMainAuthor().getAuthorResult() + pubB.getMainAuthor().getAuthorResult(); if (sum == 2) simple.setValue( (Attribute) instancesData.attribute(selectFeature.getNumberSelectFeature()), "same"); else simple.setValue( (Attribute) instancesData.attribute(selectFeature.getNumberSelectFeature()), "diff"); return simple; }