Ejemplo n.º 1
0
  private void readMetaData(File f, FetchedDocument doc) {
    try {
      InputStreamReader is = new InputStreamReader(new FileInputStream(f), "UTF-8");
      BufferedReader reader = new BufferedReader(is);
      Map<String, String> metadata = new HashMap<String, String>();
      String line = null;
      while ((line = reader.readLine()) != null) {
        if (line.length() == 0) {
          continue;
        }

        String[] values = line.split(":", 2);
        String key = values[0];
        String value = values[1];
        if ("url".equalsIgnoreCase(key)) {
          doc.setDocumentURL(value);
        } else if ("host".equalsIgnoreCase(key)) {
          // skip, do nothing
        } else if ("Content-Type".equalsIgnoreCase(key)) {
          doc.setContentType(value);
        } else if ("Charset".equalsIgnoreCase(key)) {
          doc.setContentCharset(value);
        } else {
          metadata.put(key, value);
        }
      }
      reader.close();
      doc.setDocumentMetadata(metadata);
    } catch (IOException e) {
      throw new RuntimeException(
          "Error while reading metadata from file: '" + f.getAbsolutePath() + "'", e);
    }
  }
Ejemplo n.º 2
0
  public void saveDocument(FetchedDocument doc) {
    /* create directory for current group if it doesn't exist yet. */
    String groupId = docIdUtils.getDocumentGroupId(doc.getDocumentId());
    createGroup(groupId);

    File dataFile = getDataFile(doc.getDocumentId());
    saveContent(dataFile, doc.getDocumentContent());

    File metadataFile = getPropertiesFile(doc.getDocumentId());
    saveMetadata(metadataFile, doc);
  }
Ejemplo n.º 3
0
  private void saveMetadata(File f, FetchedDocument doc) {
    try {
      OutputStreamWriter ow = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
      BufferedWriter bw = new BufferedWriter(ow);

      writeProperty(bw, "url", doc.getDocumentURL());
      writeProperty(bw, "Content-Type", doc.getContentType());
      writeProperty(bw, "Charset", doc.getContentCharset());

      Map<String, String> metadata = doc.getDocumentMetadata();
      for (String key : metadata.keySet()) {
        writeProperty(bw, key, metadata.get(key));
      }
      bw.flush();
      bw.close();
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }
Ejemplo n.º 4
0
  // document id contains the set encoded in it
  public FetchedDocument getDocument(String documentId) {
    File dataFile = getDataFile(documentId);
    if (!dataFile.exists()) {
      throw new RuntimeException("Document with id: '" + documentId + "' doesn't exist.");
    }
    FetchedDocument doc = new FetchedDocument();
    doc.setDocumentId(documentId);

    byte[] data = readData(dataFile);
    doc.setDocumentContent(data);

    File propsFile = getPropertiesFile(documentId);
    if (!propsFile.exists()) {
      throw new RuntimeException(
          "Properties for document with id: '" + documentId + "' don't exist.");
    }
    readMetaData(propsFile, doc);

    return doc;
  }