/** * Retain the number of segments which are to be preserved and return the remaining list of * segments. * * @param loadMetadataDetails * @param numberOfSegToBeRetained * @return */ private static List<LoadMetadataDetails> getValidLoadDetailsWithRetaining( List<LoadMetadataDetails> loadMetadataDetails, int numberOfSegToBeRetained) { List<LoadMetadataDetails> validList = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); for (LoadMetadataDetails segment : loadMetadataDetails) { if (segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) || segment .getLoadStatus() .equalsIgnoreCase(CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS) || segment.getLoadStatus().equalsIgnoreCase(CarbonCommonConstants.MARKED_FOR_UPDATE)) { validList.add(segment); } } // check if valid list is big enough for removing the number of seg to be retained. // last element int removingIndex = validList.size() - 1; for (int i = validList.size(); i > 0; i--) { if (numberOfSegToBeRetained == 0) { break; } // remove last segment validList.remove(removingIndex--); numberOfSegToBeRetained--; } return validList; }
/** * Identify the segments to be merged based on the Size in case of Major compaction. * * @param compactionSize * @param listOfSegmentsAfterPreserve * @param carbonLoadModel * @param partitionCount * @param storeLocation * @return */ private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSize( long compactionSize, List<LoadMetadataDetails> listOfSegmentsAfterPreserve, CarbonLoadModel carbonLoadModel, int partitionCount, String storeLocation) { List<LoadMetadataDetails> segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); CarbonTableIdentifier tableIdentifier = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getCarbonTableIdentifier(); // total length long totalLength = 0; // check size of each segment , sum it up across partitions for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) { String segId = segment.getLoadName(); // variable to store one segment size across partition. long sizeOfOneSegmentAcrossPartition = getSizeOfOneSegmentAcrossPartition(partitionCount, storeLocation, tableIdentifier, segId); // if size of a segment is greater than the Major compaction size. then ignore it. if (sizeOfOneSegmentAcrossPartition > (compactionSize * 1024 * 1024)) { // if already 2 segments have been found for merging then stop scan here and merge. if (segmentsToBeMerged.size() > 1) { break; } else { // if only one segment is found then remove the earlier one in list. // reset the total length to 0. segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); totalLength = 0; continue; } } totalLength += sizeOfOneSegmentAcrossPartition; // in case of major compaction the size doesnt matter. all the segments will be merged. if (totalLength < (compactionSize * 1024 * 1024)) { segmentsToBeMerged.add(segment); } else { // if already 2 segments have been found for merging then stop scan here and merge. if (segmentsToBeMerged.size() > 1) { break; } else { // if only one segment is found then remove the earlier one in list and put this. // reset the total length to the current identified segment. segmentsToBeMerged = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); segmentsToBeMerged.add(segment); totalLength = sizeOfOneSegmentAcrossPartition; } } } return segmentsToBeMerged; }
/** * For getting the comma separated valid segments for merging. * * @param loadMetadataDetails * @return */ public static String getValidSegments(List<LoadMetadataDetails> loadMetadataDetails) { StringBuilder builder = new StringBuilder(); for (LoadMetadataDetails segment : loadMetadataDetails) { // check if this load is an already merged load. if (null != segment.getMergedLoadName()) { builder.append(segment.getMergedLoadName() + ","); } else { builder.append(segment.getLoadName() + ","); } } builder.deleteCharAt(builder.length() - 1); return builder.toString(); }
/** * @param loadsOfSameDate * @param segment * @return */ private static Date initializeFirstSegment( List<LoadMetadataDetails> loadsOfSameDate, LoadMetadataDetails segment, SimpleDateFormat sdf) { String baselineLoadStartTime = segment.getLoadStartTime(); Date segDate1 = null; try { segDate1 = sdf.parse(baselineLoadStartTime); } catch (ParseException e) { LOGGER.error("Error while parsing segment start time" + e.getMessage()); } loadsOfSameDate.add(segment); return segDate1; }
/** * Identify the segments to be merged based on the segment count * * @param listOfSegmentsAfterPreserve * @return */ private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnSegCount( List<LoadMetadataDetails> listOfSegmentsAfterPreserve) { List<LoadMetadataDetails> mergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); List<LoadMetadataDetails> unMergedSegments = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); int[] noOfSegmentLevelsCount = CarbonProperties.getInstance().getCompactionSegmentLevelCount(); int level1Size = 0; int level2Size = 0; boolean first = true; for (int levelCount : noOfSegmentLevelsCount) { if (first) { level1Size = levelCount; first = false; } else { level2Size = levelCount; break; // breaking as we are doing only 2 levels } } int unMergeCounter = 0; int mergeCounter = 0; // check size of each segment , sum it up across partitions for (LoadMetadataDetails segment : listOfSegmentsAfterPreserve) { String segName = segment.getLoadName(); // if a segment is already merged 2 levels then it s name will become .2 // need to exclude those segments from minor compaction. // if a segment is major compacted then should not be considered for minor. if (segName.endsWith(CarbonCommonConstants.LEVEL2_COMPACTION_INDEX) || (segment.isMajorCompacted() != null && segment.isMajorCompacted().equalsIgnoreCase("true"))) { continue; } // check if the segment is merged or not if (!isMergedSegment(segName)) { // if it is an unmerged segment then increment counter unMergeCounter++; unMergedSegments.add(segment); if (unMergeCounter == (level1Size)) { return unMergedSegments; } } else { mergeCounter++; mergedSegments.add(segment); if (mergeCounter == (level2Size)) { return mergedSegments; } } } return new ArrayList<>(0); }
/** * This method will return the list of loads which are loaded at the same interval. This property * is configurable. * * @param listOfSegmentsBelowThresholdSize * @return */ private static List<LoadMetadataDetails> identifySegmentsToBeMergedBasedOnLoadedDate( List<LoadMetadataDetails> listOfSegmentsBelowThresholdSize) { List<LoadMetadataDetails> loadsOfSameDate = new ArrayList<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE); long numberOfDaysAllowedToMerge = 0; try { numberOfDaysAllowedToMerge = Long.parseLong( CarbonProperties.getInstance() .getProperty( CarbonCommonConstants.DAYS_ALLOWED_TO_COMPACT, CarbonCommonConstants.DEFAULT_DAYS_ALLOWED_TO_COMPACT)); if (numberOfDaysAllowedToMerge < 0 || numberOfDaysAllowedToMerge > 100) { LOGGER.error( "The specified value for property " + CarbonCommonConstants.DAYS_ALLOWED_TO_COMPACT + " is incorrect." + " Correct value should be in range of 0 -100. Taking the default value."); numberOfDaysAllowedToMerge = Long.parseLong(CarbonCommonConstants.DEFAULT_DAYS_ALLOWED_TO_COMPACT); } } catch (NumberFormatException e) { numberOfDaysAllowedToMerge = Long.parseLong(CarbonCommonConstants.DEFAULT_DAYS_ALLOWED_TO_COMPACT); } // if true then process loads according to the load date. if (numberOfDaysAllowedToMerge > 0) { // filter loads based on the loaded date boolean first = true; Date segDate1 = null; SimpleDateFormat sdf = new SimpleDateFormat(CarbonCommonConstants.CARBON_TIMESTAMP); for (LoadMetadataDetails segment : listOfSegmentsBelowThresholdSize) { if (first) { segDate1 = initializeFirstSegment(loadsOfSameDate, segment, sdf); first = false; continue; } String segmentDate = segment.getLoadStartTime(); Date segDate2 = null; try { segDate2 = sdf.parse(segmentDate); } catch (ParseException e) { LOGGER.error("Error while parsing segment start time" + e.getMessage()); } if (isTwoDatesPresentInRequiredRange(segDate1, segDate2, numberOfDaysAllowedToMerge)) { loadsOfSameDate.add(segment); } // if the load is beyond merged date. // then reset everything and continue search for loads. else if (loadsOfSameDate.size() < 2) { loadsOfSameDate.clear(); // need to add the next segment as first and to check further segDate1 = initializeFirstSegment(loadsOfSameDate, segment, sdf); } else { // case where a load is beyond merge date and there is at least 2 loads to merge. break; } } } else { return listOfSegmentsBelowThresholdSize; } return loadsOfSameDate; }
public static boolean updateLoadMetadataWithMergeStatus( List<LoadMetadataDetails> loadsToMerge, String metaDataFilepath, String MergedLoadName, CarbonLoadModel carbonLoadModel, String mergeLoadStartTime, CompactionType compactionType) { boolean tableStatusUpdationStatus = false; AbsoluteTableIdentifier absoluteTableIdentifier = carbonLoadModel.getCarbonDataLoadSchema().getCarbonTable().getAbsoluteTableIdentifier(); SegmentStatusManager segmentStatusManager = new SegmentStatusManager(absoluteTableIdentifier); ICarbonLock carbonLock = segmentStatusManager.getTableStatusLock(); try { if (carbonLock.lockWithRetries()) { LOGGER.info( "Acquired lock for the table " + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + " for table status updation "); CarbonTablePath carbonTablePath = CarbonStorePath.getCarbonTablePath( absoluteTableIdentifier.getStorePath(), absoluteTableIdentifier.getCarbonTableIdentifier()); String statusFilePath = carbonTablePath.getTableStatusFilePath(); LoadMetadataDetails[] loadDetails = segmentStatusManager.readLoadMetadata(metaDataFilepath); String mergedLoadNumber = MergedLoadName.substring( MergedLoadName.lastIndexOf(CarbonCommonConstants.LOAD_FOLDER) + CarbonCommonConstants.LOAD_FOLDER.length(), MergedLoadName.length()); String modificationOrDeletionTimeStamp = CarbonLoaderUtil.readCurrentTime(); for (LoadMetadataDetails loadDetail : loadDetails) { // check if this segment is merged. if (loadsToMerge.contains(loadDetail)) { // if the compacted load is deleted after the start of the compaction process, // then need to discard the compaction process and treat it as failed compaction. if (loadDetail .getLoadStatus() .equalsIgnoreCase(CarbonCommonConstants.MARKED_FOR_DELETE)) { LOGGER.error( "Compaction is aborted as the segment " + loadDetail.getLoadName() + " is deleted after the compaction is started."); return tableStatusUpdationStatus; } loadDetail.setLoadStatus(CarbonCommonConstants.SEGMENT_COMPACTED); loadDetail.setModificationOrdeletionTimesStamp(modificationOrDeletionTimeStamp); loadDetail.setMergedLoadName(mergedLoadNumber); } } // create entry for merged one. LoadMetadataDetails loadMetadataDetails = new LoadMetadataDetails(); loadMetadataDetails.setPartitionCount(carbonLoadModel.getPartitionId()); loadMetadataDetails.setLoadStatus(CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS); String loadEnddate = CarbonLoaderUtil.readCurrentTime(); loadMetadataDetails.setTimestamp(loadEnddate); loadMetadataDetails.setLoadName(mergedLoadNumber); loadMetadataDetails.setLoadStartTime(mergeLoadStartTime); loadMetadataDetails.setPartitionCount("0"); // if this is a major compaction then set the segment as major compaction. if (compactionType == CompactionType.MAJOR_COMPACTION) { loadMetadataDetails.setMajorCompacted("true"); } List<LoadMetadataDetails> updatedDetailsList = new ArrayList<>(Arrays.asList(loadDetails)); // put the merged folder entry updatedDetailsList.add(loadMetadataDetails); try { segmentStatusManager.writeLoadDetailsIntoFile( statusFilePath, updatedDetailsList.toArray(new LoadMetadataDetails[updatedDetailsList.size()])); tableStatusUpdationStatus = true; } catch (IOException e) { LOGGER.error("Error while writing metadata"); } } else { LOGGER.error( "Could not able to obtain lock for table" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + "for table status updation"); } } finally { if (carbonLock.unlock()) { LOGGER.info( "Table unlocked successfully after table status updation" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName()); } else { LOGGER.error( "Unable to unlock Table lock for table" + carbonLoadModel.getDatabaseName() + "." + carbonLoadModel.getTableName() + " during table status updation"); } } return tableStatusUpdationStatus; }