Пример #1
0
  /**
   * Parse a single <code>Representation</code> from its string representation
   *
   * @param textualRepresentation the string to be parsed
   * @return
   * @throws InstantiationException
   */
  public static Entry<String, Representation> parseSingleRepresentation(
      String textualRepresentation) throws InstantiationException {
    int beginHeaderIndex = textualRepresentation.indexOf(BEGIN_REPRESENTATION);
    int endHeaderIndex =
        textualRepresentation.indexOf(DELIMITER, beginHeaderIndex + BEGIN_REPRESENTATION.length());
    String representationHeader =
        textualRepresentation.substring(
            beginHeaderIndex + BEGIN_REPRESENTATION.length(), endHeaderIndex);
    int endRepresentation = textualRepresentation.indexOf(END_REPRESENTATION);
    String representationBody =
        textualRepresentation.substring(endHeaderIndex + 1, endRepresentation).trim();
    logger.debug("representation header: " + representationHeader);
    logger.debug("representation body: " + representationBody);

    int descriptionSeparator = representationHeader.indexOf(REPRESENTATION_TYPE_NAME_SEPARATOR);
    String representationType;
    String representationName;
    if (descriptionSeparator == -1) {
      representationType = representationHeader;
      representationName = null;
    } else {
      representationType = representationHeader.substring(0, descriptionSeparator);
      representationName = representationHeader.substring(descriptionSeparator + 1);
    }
    logger.debug("representation type: " + representationType);
    logger.debug("representation name: " + representationName);
    Representation representation =
        representationFactory.parseRepresentation(representationType, representationBody);
    Entry<String, Representation> entry =
        new AbstractMap.SimpleEntry<String, Representation>(representationName, representation);
    return entry;
  }
Пример #2
0
 public static String getTextualRepresentation(Representation representation, String identifier) {
   String ret = "";
   String representationType =
       RepresentationFactory.getRepresentationIdentifier(representation.getClass());
   String identifierPart = "";
   if (identifier != null) {
     identifierPart = ExampleFactory.REPRESENTATION_TYPE_NAME_SEPARATOR + identifier;
   }
   ret +=
       ExampleFactory.BEGIN_REPRESENTATION
           + representationType
           + identifierPart
           + ExampleFactory.DELIMITER
           + " "
           + representation.getTextFromData()
           + ExampleFactory.END_REPRESENTATION
           + representationType
           + ExampleFactory.DELIMITER;
   return ret;
 }
Пример #3
0
/**
 * It is a factory that provides methods for instantiating an example described in a textual format
 * The expected inputs for examples with N labels and M representations are String of the form:
 * Label_1 Label_2 ... Label_n |BR_1:I_1| ... |ER_1| |BR_2:I_2| ... |ER_2| ... |BR_m:I_m| ... |ER_m|
 * Where the R_i are identifiers of the representation types (some R_i can be the same) while the
 * I_i are the specific representation identifier (I_i!=I_j for all i,j) for instance R_1=DV (dense
 * vector) I_1=bow
 *
 * @author Simone Filice
 */
public class ExampleFactory {
  private static Logger logger = LoggerFactory.getLogger(ExampleFactory.class);

  public static final String LABEL_SEPARATOR = " ";
  public static final String REPRESENTATION_TYPE_NAME_SEPARATOR = ":";
  public static final String REPRESENTATION_SEPARATOR = " ";
  public static final String DELIMITER = "|";
  public static final String BEGIN_REPRESENTATION = DELIMITER + "B";
  public static final String END_REPRESENTATION = DELIMITER + "E";
  public static final String BEGIN_PAIR = DELIMITER + "<" + DELIMITER;
  public static final String END_PAIR = DELIMITER + ">" + DELIMITER;
  public static final String PAIR_SEPARATOR = DELIMITER + "," + DELIMITER;

  private static RepresentationFactory representationFactory = RepresentationFactory.getInstance();;

  /**
   * Parse a single <code>Representation</code> from its string representation
   *
   * @param textualRepresentation the string to be parsed
   * @return
   * @throws InstantiationException
   */
  public static Entry<String, Representation> parseSingleRepresentation(
      String textualRepresentation) throws InstantiationException {
    int beginHeaderIndex = textualRepresentation.indexOf(BEGIN_REPRESENTATION);
    int endHeaderIndex =
        textualRepresentation.indexOf(DELIMITER, beginHeaderIndex + BEGIN_REPRESENTATION.length());
    String representationHeader =
        textualRepresentation.substring(
            beginHeaderIndex + BEGIN_REPRESENTATION.length(), endHeaderIndex);
    int endRepresentation = textualRepresentation.indexOf(END_REPRESENTATION);
    String representationBody =
        textualRepresentation.substring(endHeaderIndex + 1, endRepresentation).trim();
    logger.debug("representation header: " + representationHeader);
    logger.debug("representation body: " + representationBody);

    int descriptionSeparator = representationHeader.indexOf(REPRESENTATION_TYPE_NAME_SEPARATOR);
    String representationType;
    String representationName;
    if (descriptionSeparator == -1) {
      representationType = representationHeader;
      representationName = null;
    } else {
      representationType = representationHeader.substring(0, descriptionSeparator);
      representationName = representationHeader.substring(descriptionSeparator + 1);
    }
    logger.debug("representation type: " + representationType);
    logger.debug("representation name: " + representationName);
    Representation representation =
        representationFactory.parseRepresentation(representationType, representationBody);
    Entry<String, Representation> entry =
        new AbstractMap.SimpleEntry<String, Representation>(representationName, representation);
    return entry;
  }

  public static String getTextualRepresentation(Representation representation) {
    return getTextualRepresentation(representation, null);
  }

  public static String getTextualRepresentation(Representation representation, String identifier) {
    String ret = "";
    String representationType =
        RepresentationFactory.getRepresentationIdentifier(representation.getClass());
    String identifierPart = "";
    if (identifier != null) {
      identifierPart = ExampleFactory.REPRESENTATION_TYPE_NAME_SEPARATOR + identifier;
    }
    ret +=
        ExampleFactory.BEGIN_REPRESENTATION
            + representationType
            + identifierPart
            + ExampleFactory.DELIMITER
            + " "
            + representation.getTextFromData()
            + ExampleFactory.END_REPRESENTATION
            + representationType
            + ExampleFactory.DELIMITER;
    return ret;
  }

  public static Example parseExample(String exampleDescription) throws InstantiationException {

    // System.out.println("INPUT: " + exampleDescription);
    int beginFirstRepIndex = exampleDescription.indexOf(DELIMITER);
    String labelsPart = exampleDescription.substring(0, beginFirstRepIndex).trim();
    String representationsPart = exampleDescription.substring(beginFirstRepIndex).trim();
    // System.out.println("Label part: " + labelsPart);
    // System.out.println("representation part: " + representationsPart);
    Example example;
    if (representationsPart.startsWith(BEGIN_PAIR)) {
      example = parseExamplePair(representationsPart);
    } else {
      example = parseSimpleExample(representationsPart);
    }
    // ADDING LABELS
    String[] stringLabels = labelsPart.split(LABEL_SEPARATOR);
    for (String labelDescription : stringLabels) {
      if (labelDescription.trim().length() > 0) {
        Label label = LabelFactory.parseLabel(labelDescription.trim());
        example.addLabel(label);
      }
    }
    return example;
  }

  private static HashMap<String, Representation> parseRepresentations(String representationsPart)
      throws InstantiationException {
    HashMap<String, Representation> representations = new HashMap<String, Representation>();
    String representationRemaining = representationsPart.trim();
    int representationCount = 0;
    while (representationRemaining.length() > 0) {
      int endRepresentationStartIndex = representationRemaining.indexOf(END_REPRESENTATION);
      int endRepresentationEndIndex =
          representationRemaining.indexOf(DELIMITER, endRepresentationStartIndex + 1);
      String representationDescription =
          representationRemaining.substring(0, endRepresentationEndIndex + 1);
      Entry<String, Representation> entry = parseSingleRepresentation(representationDescription);

      representationRemaining =
          representationRemaining.substring(endRepresentationEndIndex + 1).trim();
      String representationName = entry.getKey();
      if (representationName == null) {
        representationName = Integer.toString(representationCount);
      }
      representations.put(representationName, entry.getValue());
      representationCount++;
    }
    return representations;
  }

  private static SimpleExample parseSimpleExample(String representationsPart)
      throws InstantiationException {
    SimpleExample example = new SimpleExample();
    HashMap<String, Representation> representations = parseRepresentations(representationsPart);
    example.setRepresentations(representations);

    return example;
  }

  /**
   * Initializes and returns the example described in <code>exampleDescription</code>
   *
   * @param exampleDescription the the textual description of the example to be instantiated
   * @return the example described in <code>exampleDescription</code>
   */
  private static ExamplePair parseExamplePair(String examplePairDescription)
      throws InstantiationException {
    int begin = examplePairDescription.indexOf(BEGIN_PAIR) + BEGIN_PAIR.length();
    int end = examplePairDescription.lastIndexOf(END_PAIR);

    String pairWithoutBrackets = examplePairDescription.substring(begin, end).trim();

    int pairSeparatorIndex = 0;
    int beginPairIndex = 0;
    int pairSeparatorCount = 0;
    int beginPairCount = 1;
    while (pairSeparatorCount != beginPairCount) {
      pairSeparatorIndex = pairWithoutBrackets.indexOf(PAIR_SEPARATOR, pairSeparatorIndex);
      if (pairSeparatorIndex == -1) {
        throw new InstantiationException("Imbalanced example pair!");
      }
      pairSeparatorCount++;
      while (true) {
        beginPairIndex = pairWithoutBrackets.indexOf(BEGIN_PAIR, beginPairIndex);
        if (beginPairIndex == -1 || beginPairIndex > pairSeparatorIndex) {
          break;
        }
        beginPairCount += 1;
        beginPairIndex += BEGIN_PAIR.length();
      }

      pairSeparatorIndex += PAIR_SEPARATOR.length();
    }
    String leftExampleDescr =
        pairWithoutBrackets.substring(0, pairSeparatorIndex - PAIR_SEPARATOR.length()).trim();
    String rightExampleDescr = pairWithoutBrackets.substring(pairSeparatorIndex).trim();
    Example leftExample = parseExample(leftExampleDescr);
    Example rightExample = parseExample(rightExampleDescr);
    ExamplePair pair = new ExamplePair(leftExample, rightExample);
    if (examplePairDescription.length() > end + END_PAIR.length()) {
      String pairDirectRepresentations = examplePairDescription.substring(end + 1);
      HashMap<String, Representation> representations =
          parseRepresentations(pairDirectRepresentations);
      pair.setRepresentations(representations);
    }

    return pair;
  }
}