/** * Calculate each default InterestProfile * * @param author * @param interestProfileDefault * @param publicationClustersMap */ public void calculateEachInterestProfileDefault( Author author, Set<Interest> newInterests, InterestProfile interestProfileDefault, Map<String, PublicationClusterHelper> publicationClustersMap) { // get author interest profile Calendar calendar = Calendar.getInstance(); // default profile name [DEFAULT_PROFILENAME] String authorInterestProfileName = interestProfileDefault.getName(); // create new author interest profile for c-value AuthorInterestProfile authorInterestProfile = new AuthorInterestProfile(); authorInterestProfile.setCreated(calendar.getTime()); authorInterestProfile.setDescription( "Interest mining using " + interestProfileDefault.getName() + " algorithm"); authorInterestProfile.setName(authorInterestProfileName); // CorePhrase and WordFreq specific, according to Svetoslav Evtimov thesis // yearFactor Map format Map< Language-Year , value > // totalYearsFactor Map< Language, value > Map<String, Double> yearFactorMap = new HashMap<String, Double>(); Map<String, Double> totalYearsFactorMap = new HashMap<String, Double>(); // calculate some weighting factors // if ( interestProfileDefault.getName().toLowerCase().equals( "corephrase" ) || // interestProfileDefault.getName().toLowerCase().equals( "wordfreq" ) ) // { // yearFactorMap = CorePhraseAndWordFreqHelper.calculateYearFactor( publicationClustersMap, // 0.25 ); // totalYearsFactorMap = CorePhraseAndWordFreqHelper.calculateTotalYearsFactor( // publicationClustersMap ); // } // get the number of active extraction services int numberOfExtractionService = applicationService.getExtractionServices().size(); // loop to each cluster and calculate default profiles for (Map.Entry<String, PublicationClusterHelper> publicationClusterEntry : publicationClustersMap.entrySet()) { PublicationClusterHelper publicationCluster = publicationClusterEntry.getValue(); if (publicationCluster.getTermMap() == null || publicationCluster.getTermMap().isEmpty()) continue; // prepare variables AuthorInterest authorInterest = new AuthorInterest(); // assign author interest method if (interestProfileDefault.getName().toLowerCase().equals("cvalue")) { cValueInterestProfile.doCValueCalculation( authorInterest, newInterests, publicationCluster, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("corephrase")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); corePhraseInterestProfile.doCorePhraseCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("wordfreq")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); wordFreqInterestProfile.doWordFreqCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } // Put other default interest profiles else if (interestProfileDefault.getName().toLowerCase().equals("lda")) { } // at the end persist new interests // for ( Interest newInterest : newInterests ) // persistenceStrategy.getInterestDAO().persist( newInterest ); // check author interest calculation result if (authorInterest.getTermWeights() != null && !authorInterest.getTermWeights().isEmpty()) { authorInterest.setAuthorInterestProfile(authorInterestProfile); authorInterestProfile.addAuthorInterest(authorInterest); authorInterestProfile.setInterestProfile(interestProfileDefault); // persistenceStrategy.getAuthorInterestProfileDAO().persist( authorInterestProfile ); } } // at the end persist if (authorInterestProfile.getAuthorInterests() != null && !authorInterestProfile.getAuthorInterests().isEmpty()) { authorInterestProfile.setAuthor(author); author.addAuthorInterestProfiles(authorInterestProfile); persistenceStrategy.getAuthorDAO().persist(author); } }
/** * Calculated derived interest profile (Intersection and/or Union between interest profile) in an * author * * @param author * @param interestProfilesDerived */ private void calculateInterestProfilesDerived( Author author, List<InterestProfile> interestProfilesDerived) { // get authorInterest set on profile for (InterestProfile interestProfileDerived : interestProfilesDerived) { String[] derivedInterestProfileName = interestProfileDerived.getName().split("\\s+"); // at list profile name has three segment if (derivedInterestProfileName.length < 3) continue; // prepare variables AuthorInterestProfile authorInterestProfile1 = null; AuthorInterestProfile authorInterestProfile2 = null; AuthorInterestProfile authorInterestProfileResult = null; String operationType = null; for (String partOfProfileName : derivedInterestProfileName) { // ? sometimes problem on encoding if (partOfProfileName.equals("∩") || partOfProfileName.equals("?") || partOfProfileName.equals("+") || partOfProfileName.equals("∪")) { if (authorInterestProfileResult != null) { authorInterestProfile1 = authorInterestProfileResult; authorInterestProfileResult = null; } if (partOfProfileName.equals("∩") || partOfProfileName.equals("?") || partOfProfileName.equals("+")) operationType = "INTERSECTION"; else operationType = "UNION"; } else { if (authorInterestProfile1 == null) { authorInterestProfile1 = author.getSpecificAuthorInterestProfile(partOfProfileName); if (authorInterestProfile1 == null) { logger.error("AuthorInterestProfile " + partOfProfileName + " not found"); // continue to next derived author profile, if exist break; } } else { authorInterestProfile2 = author.getSpecificAuthorInterestProfile(partOfProfileName); if (authorInterestProfile2 == null) { logger.error("AuthorInterestProfile " + partOfProfileName + " not found"); // continue to next derived author profile, if exist break; } } // calculate and persist if (authorInterestProfile1 != null && authorInterestProfile2 != null && operationType != null) { if (operationType.equals("INTERSECTION")) authorInterestProfileResult = calculateIntersectionOfAuthorInterestProfiles( authorInterestProfile1, authorInterestProfile2, interestProfileDerived); else authorInterestProfileResult = calculateUnionOfAuthorInterestProfiles( authorInterestProfile1, authorInterestProfile2, interestProfileDerived); } } } // persist result if (authorInterestProfileResult != null && (authorInterestProfileResult.getAuthorInterests() != null && !authorInterestProfileResult.getAuthorInterests().isEmpty())) { authorInterestProfileResult.setAuthor(author); author.addAuthorInterestProfiles(authorInterestProfileResult); persistenceStrategy.getAuthorDAO().persist(author); persistenceStrategy.getAuthorInterestProfileDAO().persist(authorInterestProfileResult); } } }