@Override /** * Implementation of contains method for items for Basic File Persistence. * * <p>This works by building the target file name based on a field name from within the item and * then checks if the file exists. * * @param item to perform check for * @return true if the file exists (and hence the item is persisted) * @throws PersistenceException if the contains check fails */ public boolean contains(Item item) throws PersistenceException { try { Map<String, Object> contents = item.getContents(); Set<String> keys = contents.keySet(); if (keys.isEmpty()) { throw new PersistenceException("Provided item has no content"); } String[] keysArray = (String[]) keys.toArray(); _uuid = ItemNameTools.getUUID(keysArray[0]); String targetFileForCheck = this.getTargetLocation(); File checkFile = new File(targetFileForCheck); return checkFile.exists(); } catch (Exception exc) { throw new PersistenceException("Contains check failed due to " + exc.toString()); } }
@Override public List<Item> generate(File inputFile, String source) throws GenerationException, IOException { FileValidation.standardChecks(inputFile); List<Item> generated = this.generate(new FileInputStream(inputFile), source); List<Item> enriched = new Vector<Item>(); for (Item item : generated) { // Basic File Comparitors, FILE_NAME, FILE_SIZE and FILE_MODIFIED_UTC // TODO - check if this is enough item.addComparitor(StandardAspects.FILE_FILENAME); item.addComparitor(StandardAspects.FILE_SIZE_BYTES); item.addComparitor(StandardAspects.FILE_MODIFIED_UTC); enriched.add(AspectEnrichment.fileEnrichment(item, inputFile)); } return enriched; }
@Override public List<Item> generate(String inputString, String source) throws GenerationException { List<Item> items = new Vector<Item>(); Item item = new Item(); item = this.setMandatoryAspects(item, source); // TODO // Add use of utils to remove punctuation and tidy textual content inputString = TextUtils.clean(inputString); String[] tokens = inputString.split("[ ]"); item.addString("text_token_count", Integer.toString(tokens.length)); item.addString("text_content", inputString); Vector<String> uniqueTokens = new Vector<String>(); for (String token : tokens) { if (!uniqueTokens.contains(token)) uniqueTokens.add(token); } item.addString("text_unique_token_count", Integer.toString(uniqueTokens.size())); StringBuffer uniques = null; for (String token : uniqueTokens) { if (uniques == null) { uniques = new StringBuffer(token); } else { uniques.append(" " + token); } } item.addString("text_unique_tokens", uniques.toString()); items.add(item); return items; }
@Override public List<Item> generate(URL url, String source) throws GenerationException { try { HttpURLConnection connection = (HttpURLConnection) url.openConnection(); List<Item> generated = this.generate(connection.getInputStream(), source); List<Item> enriched = new Vector<Item>(); for (Item item : generated) { // URL basic Text, comparitors are URL and MODIFIED // TODO - This may not work with dynamic pages, check and fix item.addComparitor(StandardAspects.URL_URL); item.addComparitor(StandardAspects.URL_MODIFIED_UTC); enriched.add(AspectEnrichment.urlEnrichment(item, connection)); } return enriched; } catch (Exception exc) { throw new GenerationException(exc); } }
@Override public boolean persistItem(Item item, boolean overwrite) throws PersistenceException { // Caveat - this method uses the *provided* uuid as an indicator of file location. This // abstracts the item contents from the target storage indicator which could be an overhead, // but it allows the system to persist vanilla items (non-qualified items) as well as fully // qualified. // // Also for sake of simplicity the persister just uses String objects from the aspects - the // non-String // objects are discarded **BAD UTH** // // For fully qualified items simply call the item name utils to get the uuid and use that as // the provided parameter to the object. // First check parameters have been setup correctly if (_uuid == null && _targetDirectory == null) { throw new PersistenceException("Persister not initialised (missing parameters)."); } // Prepare the target file String filename = this.getTargetLocation(); // If overwrite then remove the file if it exists if (overwrite) { @SuppressWarnings("unused") boolean success = this.removeItem(item); } else { if (this.itemExists(item)) { throw new PersistenceException("Overwrite disabled and item is already persisted."); } } // Debatable behaviour model - open file now, write aspects as extracted try { PrintWriter out = new PrintWriter(new FileOutputStream(filename)); // Manually map the created date as a separate field out.print("CREATED:::" + Long.toString(item.getCreationUTC()) + "\n"); // Manually store the comparitors for re-constituting the item List<String> comparitors = item.getComparitors(); StringBuffer compBuffer = null; for (String comparitor : comparitors) { if (compBuffer == null) { compBuffer = new StringBuffer("COMPARITORS:::" + comparitor); } else { compBuffer.append("," + comparitor); } } out.print(compBuffer.toString() + "\n"); // Now split the aspects and discard the non-java.util.String ones (for now, **BAD UTH**) Map<String, Object> contents = item.getContents(); for (String key : contents.keySet()) { Object value = contents.get(key); if (value.getClass().getCanonicalName().equals("java.lang.String")) { String payload = (String) value; // Convert the data to store-able String output = key + ":::" + payload + "\n"; out.print(output); } } out.close(); } catch (Exception exc) { throw new PersistenceException("File output failure due to " + exc.toString()); } return false; }