/** * Create a cluster for publications, based on language and year * * @param author * @param publicationClustersMap */ public void constructPublicationClusterByLanguageAndYear( Author author, Map<String, PublicationClusterHelper> publicationClustersMap) { // fill publication clusters // prepare calendar for publication year Calendar calendar = Calendar.getInstance(); // get all publications from specific author and put it into cluster for (Publication publication : author.getPublications()) { // only proceed publication that have date, language and abstract if (publication.getAbstractText() == null || publication.getAbstractText().equals("")) continue; if (publication.getPublicationDate() == null) continue; if (publication.getLanguage() == null) continue; // get publication year calendar.setTime(publication.getPublicationDate()); // construct clusterMap key String clusterMapKey = publication.getLanguage() + calendar.get(Calendar.YEAR); // construct publication map if (publicationClustersMap.get(clusterMapKey) == null) { // not exist create new cluster PublicationClusterHelper publicationCluster = new PublicationClusterHelper(); publicationCluster.setLangauge(publication.getLanguage()); publicationCluster.setYear(calendar.get(Calendar.YEAR)); publicationCluster.addPublicationAndUpdate(publication); // add into map publicationClustersMap.put(clusterMapKey, publicationCluster); } else { // exist on map, get the cluster PublicationClusterHelper publicationCluster = publicationClustersMap.get(clusterMapKey); publicationCluster.addPublicationAndUpdate(publication); } } }
/** * Calculate each default InterestProfile * * @param author * @param interestProfileDefault * @param publicationClustersMap */ public void calculateEachInterestProfileDefault( Author author, Set<Interest> newInterests, InterestProfile interestProfileDefault, Map<String, PublicationClusterHelper> publicationClustersMap) { // get author interest profile Calendar calendar = Calendar.getInstance(); // default profile name [DEFAULT_PROFILENAME] String authorInterestProfileName = interestProfileDefault.getName(); // create new author interest profile for c-value AuthorInterestProfile authorInterestProfile = new AuthorInterestProfile(); authorInterestProfile.setCreated(calendar.getTime()); authorInterestProfile.setDescription( "Interest mining using " + interestProfileDefault.getName() + " algorithm"); authorInterestProfile.setName(authorInterestProfileName); // CorePhrase and WordFreq specific, according to Svetoslav Evtimov thesis // yearFactor Map format Map< Language-Year , value > // totalYearsFactor Map< Language, value > Map<String, Double> yearFactorMap = new HashMap<String, Double>(); Map<String, Double> totalYearsFactorMap = new HashMap<String, Double>(); // calculate some weighting factors // if ( interestProfileDefault.getName().toLowerCase().equals( "corephrase" ) || // interestProfileDefault.getName().toLowerCase().equals( "wordfreq" ) ) // { // yearFactorMap = CorePhraseAndWordFreqHelper.calculateYearFactor( publicationClustersMap, // 0.25 ); // totalYearsFactorMap = CorePhraseAndWordFreqHelper.calculateTotalYearsFactor( // publicationClustersMap ); // } // get the number of active extraction services int numberOfExtractionService = applicationService.getExtractionServices().size(); // loop to each cluster and calculate default profiles for (Map.Entry<String, PublicationClusterHelper> publicationClusterEntry : publicationClustersMap.entrySet()) { PublicationClusterHelper publicationCluster = publicationClusterEntry.getValue(); if (publicationCluster.getTermMap() == null || publicationCluster.getTermMap().isEmpty()) continue; // prepare variables AuthorInterest authorInterest = new AuthorInterest(); // assign author interest method if (interestProfileDefault.getName().toLowerCase().equals("cvalue")) { cValueInterestProfile.doCValueCalculation( authorInterest, newInterests, publicationCluster, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("corephrase")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); corePhraseInterestProfile.doCorePhraseCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } else if (interestProfileDefault.getName().toLowerCase().equals("wordfreq")) { Double yearFactor = yearFactorMap.get(publicationCluster.getLanguage() + publicationCluster.getYear()); Double totalYearFactor = totalYearsFactorMap.get(publicationCluster.getLanguage()); wordFreqInterestProfile.doWordFreqCalculation( authorInterest, newInterests, publicationCluster, yearFactor, totalYearFactor, numberOfExtractionService); } // Put other default interest profiles else if (interestProfileDefault.getName().toLowerCase().equals("lda")) { } // at the end persist new interests // for ( Interest newInterest : newInterests ) // persistenceStrategy.getInterestDAO().persist( newInterest ); // check author interest calculation result if (authorInterest.getTermWeights() != null && !authorInterest.getTermWeights().isEmpty()) { authorInterest.setAuthorInterestProfile(authorInterestProfile); authorInterestProfile.addAuthorInterest(authorInterest); authorInterestProfile.setInterestProfile(interestProfileDefault); // persistenceStrategy.getAuthorInterestProfileDAO().persist( authorInterestProfile ); } } // at the end persist if (authorInterestProfile.getAuthorInterests() != null && !authorInterestProfile.getAuthorInterests().isEmpty()) { authorInterestProfile.setAuthor(author); author.addAuthorInterestProfiles(authorInterestProfile); persistenceStrategy.getAuthorDAO().persist(author); } }