private void readMetaData(File f, FetchedDocument doc) { try { InputStreamReader is = new InputStreamReader(new FileInputStream(f), "UTF-8"); BufferedReader reader = new BufferedReader(is); Map<String, String> metadata = new HashMap<String, String>(); String line = null; while ((line = reader.readLine()) != null) { if (line.length() == 0) { continue; } String[] values = line.split(":", 2); String key = values[0]; String value = values[1]; if ("url".equalsIgnoreCase(key)) { doc.setDocumentURL(value); } else if ("host".equalsIgnoreCase(key)) { // skip, do nothing } else if ("Content-Type".equalsIgnoreCase(key)) { doc.setContentType(value); } else if ("Charset".equalsIgnoreCase(key)) { doc.setContentCharset(value); } else { metadata.put(key, value); } } reader.close(); doc.setDocumentMetadata(metadata); } catch (IOException e) { throw new RuntimeException( "Error while reading metadata from file: '" + f.getAbsolutePath() + "'", e); } }
public void saveDocument(FetchedDocument doc) { /* create directory for current group if it doesn't exist yet. */ String groupId = docIdUtils.getDocumentGroupId(doc.getDocumentId()); createGroup(groupId); File dataFile = getDataFile(doc.getDocumentId()); saveContent(dataFile, doc.getDocumentContent()); File metadataFile = getPropertiesFile(doc.getDocumentId()); saveMetadata(metadataFile, doc); }
private void saveMetadata(File f, FetchedDocument doc) { try { OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(f), "UTF-8"); BufferedWriter bw = new BufferedWriter(ow); writeProperty(bw, "url", doc.getDocumentURL()); writeProperty(bw, "Content-Type", doc.getContentType()); writeProperty(bw, "Charset", doc.getContentCharset()); Map<String, String> metadata = doc.getDocumentMetadata(); for (String key : metadata.keySet()) { writeProperty(bw, key, metadata.get(key)); } bw.flush(); bw.close(); } catch (IOException e) { throw new RuntimeException(e); } }
// document id contains the set encoded in it public FetchedDocument getDocument(String documentId) { File dataFile = getDataFile(documentId); if (!dataFile.exists()) { throw new RuntimeException("Document with id: '" + documentId + "' doesn't exist."); } FetchedDocument doc = new FetchedDocument(); doc.setDocumentId(documentId); byte[] data = readData(dataFile); doc.setDocumentContent(data); File propsFile = getPropertiesFile(documentId); if (!propsFile.exists()) { throw new RuntimeException( "Properties for document with id: '" + documentId + "' don't exist."); } readMetaData(propsFile, doc); return doc; }