private Value decodeValue(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.COMPRESSED_IDENTIFIER) || schema.equals(Schemas.PLAIN_IDENTIFIER)) {
       return decodeIdentifier(record);
     }
   }
   return decodeLiteral(generic);
 }
 private Object decodeNode(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.RECORD)) {
       return decodeRecord(record, null);
     } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)
         || schema.equals(Schemas.COMPRESSED_IDENTIFIER)) {
       return decodeIdentifier(record);
     } else if (schema.equals(Schemas.STATEMENT)) {
       return decodeStatement(record);
     }
   }
   return decodeLiteral(generic);
 }
 private Resource decodeIdentifier(final GenericRecord record) {
   final Schema schema = record.getSchema();
   if (schema.equals(Schemas.COMPRESSED_IDENTIFIER)) {
     try {
       return this.dictionary.objectFor((Integer) record.get(0));
     } catch (final IOException ex) {
       throw new IllegalStateException("Cannot access dictionary: " + ex.getMessage(), ex);
     }
   } else if (schema.equals(Schemas.PLAIN_IDENTIFIER)) {
     final String string = record.get(0).toString();
     if (string.startsWith("_:")) {
       return this.factory.createBNode(string.substring(2));
     } else {
       return this.factory.createURI(string);
     }
   }
   throw new IllegalArgumentException("Unsupported encoded identifier: " + record);
 }
Exemple #4
0
  /**
   * Returns true if the types of two avro schemas are equal. This ignores things like custom field
   * properties that the equals() implementation of Schema checks.
   *
   * @param schema1 The first schema to compare
   * @param schema2 The second schema to compare
   * @return True if the types are equal, otherwise false.
   */
  public static boolean avroSchemaTypesEqual(Schema schema1, Schema schema2) {
    if (schema1.getType() != schema2.getType()) {
      // if the types aren't equal, no need to go further. Return false
      return false;
    }

    if (schema1.getType() == Schema.Type.ENUM || schema1.getType() == Schema.Type.FIXED) {
      // Enum and Fixed types schemas should be equal using the Schema.equals
      // method.
      return schema1.equals(schema2);
    }
    if (schema1.getType() == Schema.Type.ARRAY) {
      // Avro element schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getElementType(), schema2.getElementType());
    } else if (schema1.getType() == Schema.Type.MAP) {
      // Map type values schemas should be equal, which is tested by recursively
      // calling this method.
      return avroSchemaTypesEqual(schema1.getValueType(), schema2.getValueType());
    } else if (schema1.getType() == Schema.Type.UNION) {
      // Compare Union fields in the same position by comparing their schemas
      // recursively calling this method.
      if (schema1.getTypes().size() != schema2.getTypes().size()) {
        return false;
      }
      for (int i = 0; i < schema1.getTypes().size(); i++) {
        if (!avroSchemaTypesEqual(schema1.getTypes().get(i), schema2.getTypes().get(i))) {
          return false;
        }
      }
      return true;
    } else if (schema1.getType() == Schema.Type.RECORD) {
      // Compare record fields that match in name by comparing their schemas
      // recursively calling this method.
      if (schema1.getFields().size() != schema2.getFields().size()) {
        return false;
      }
      for (Field field1 : schema1.getFields()) {
        Field field2 = schema2.getField(field1.name());
        if (field2 == null) {
          return false;
        }
        if (!avroSchemaTypesEqual(field1.schema(), field2.schema())) {
          return false;
        }
      }
      return true;
    } else {
      // All other types are primitive, so them matching in type is enough.
      return true;
    }
  }
 @Override
 public Schema match(List<Schema> schemas, String definition) {
   Schema result = null;
   org.apache.avro.Schema source = new org.apache.avro.Schema.Parser().parse(definition);
   for (Schema s : schemas) {
     org.apache.avro.Schema target = new org.apache.avro.Schema.Parser().parse(s.getDefinition());
     if (target.equals(source)) {
       result = s;
       break;
     }
   }
   return result;
 }
  @Override
  @SuppressWarnings("unchecked")
  protected void writeRecord(Schema schema, Object datum, Encoder out) throws IOException {

    if (persistent == null) {
      persistent = (T) datum;
    }

    if (!writeDirtyBits) {
      super.writeRecord(schema, datum, out);
      return;
    }

    // check if top level schema
    if (schema.equals(persistent.getSchema())) {
      // write readable fields and dirty fields info
      boolean[] dirtyFields = new boolean[schema.getFields().size()];
      boolean[] readableFields = new boolean[schema.getFields().size()];
      StateManager manager = persistent.getStateManager();

      int i = 0;
      for (@SuppressWarnings("unused") Field field : schema.getFields()) {
        dirtyFields[i] = manager.isDirty(persistent, i);
        readableFields[i] = manager.isReadable(persistent, i);
        i++;
      }

      IOUtils.writeBoolArray(out, dirtyFields);
      IOUtils.writeBoolArray(out, readableFields);

      for (Field field : schema.getFields()) {
        if (readableFields[field.pos()]) {
          write(field.schema(), getData().getField(datum, field.name(), field.pos()), out);
        }
      }

    } else {
      super.writeRecord(schema, datum, out);
    }
  }
 private Literal decodeLiteral(final Object generic) {
   if (generic instanceof GenericRecord) {
     final GenericRecord record = (GenericRecord) generic;
     final Schema schema = record.getSchema();
     if (schema.equals(Schemas.STRING_LANG)) {
       final String label = record.get(0).toString(); // Utf8 class used
       final Object language = record.get(1);
       return this.factory.createLiteral(label, language.toString());
     } else if (schema.equals(Schemas.SHORT)) {
       return this.factory.createLiteral(((Integer) record.get(0)).shortValue());
     } else if (schema.equals(Schemas.BYTE)) {
       return this.factory.createLiteral(((Integer) record.get(0)).byteValue());
     } else if (schema.equals(Schemas.BIGINTEGER)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.INTEGER);
     } else if (schema.equals(Schemas.BIGDECIMAL)) {
       return this.factory.createLiteral(record.get(0).toString(), XMLSchema.DECIMAL);
     } else if (schema.equals(Schemas.CALENDAR)) {
       final int tz = (Integer) record.get(0);
       final GregorianCalendar calendar = new GregorianCalendar();
       calendar.setTimeInMillis((Long) record.get(1));
       calendar.setTimeZone(
           TimeZone.getTimeZone(
               String.format(
                   "GMT%s%02d:%02d", tz >= 0 ? "+" : "-", Math.abs(tz) / 60, Math.abs(tz) % 60)));
       return this.factory.createLiteral(this.datatypeFactory.newXMLGregorianCalendar(calendar));
     }
   } else if (generic instanceof CharSequence) {
     return this.factory.createLiteral(generic.toString()); // Utf8 class used
   } else if (generic instanceof Boolean) {
     return this.factory.createLiteral((Boolean) generic);
   } else if (generic instanceof Long) {
     return this.factory.createLiteral((Long) generic);
   } else if (generic instanceof Integer) {
     return this.factory.createLiteral((Integer) generic);
   } else if (generic instanceof Double) {
     return this.factory.createLiteral((Double) generic);
   } else if (generic instanceof Float) {
     return this.factory.createLiteral((Float) generic);
   }
   Preconditions.checkNotNull(generic);
   throw new IllegalArgumentException("Unsupported generic data: " + generic);
 }
  // Verify if the new avro schema being pushed is the same one as the old one
  // Does not have logic to check for Avro schema evolution yet
  public void verifyAvroSchema(String url) throws Exception {
    // create new n store def with schema from the metadata in the input
    // path
    Schema schema = AvroUtils.getAvroSchemaFromPath(getInputPath());
    int replicationFactor = props.getInt("build.replication.factor", 2);
    int requiredReads = props.getInt("build.required.reads", 1);
    int requiredWrites = props.getInt("build.required.writes", 1);
    String description = props.getString("push.store.description", "");
    String owners = props.getString("push.store.owners", "");

    String keySchema =
        "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">"
            + schema.getField(keyField).schema()
            + "</schema-info>\n\t";
    String valSchema =
        "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">"
            + schema.getField(valueField).schema()
            + "</schema-info>\n\t";

    boolean hasCompression = false;
    if (props.containsKey("build.compress.value")) hasCompression = true;

    if (hasCompression) {
      valSchema += "\t<compression><type>gzip</type></compression>\n\t";
    }

    if (props.containsKey("build.force.schema.key")) {
      keySchema = props.get("build.force.schema.key");
    }

    if (props.containsKey("build.force.schema.value")) {
      valSchema = props.get("build.force.schema.value");
    }

    String newStoreDefXml =
        VoldemortUtils.getStoreDefXml(
            storeName,
            replicationFactor,
            requiredReads,
            requiredWrites,
            props.containsKey("build.preferred.reads")
                ? props.getInt("build.preferred.reads")
                : null,
            props.containsKey("build.preferred.writes")
                ? props.getInt("build.preferred.writes")
                : null,
            (props.containsKey("push.force.schema.key"))
                ? props.getString("push.force.schema.key")
                : keySchema,
            (props.containsKey("push.force.schema.value"))
                ? props.getString("push.force.schema.value")
                : valSchema,
            description,
            owners);

    log.info("Verifying store: \n" + newStoreDefXml.toString());

    StoreDefinition newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);

    // get store def from cluster
    log.info("Getting store definition from: " + url + " (node id " + this.nodeId + ")");

    AdminClient adminClient = new AdminClient(url, new AdminClientConfig());
    try {
      List<StoreDefinition> remoteStoreDefs =
          adminClient.getRemoteStoreDefList(this.nodeId).getValue();
      boolean foundStore = false;

      // go over all store defs and see if one has the same name as the
      // store we're trying
      // to build
      for (StoreDefinition remoteStoreDef : remoteStoreDefs) {
        if (remoteStoreDef.getName().equals(storeName)) {
          // if the store already exists, but doesn't match what we
          // want to push, we need
          // to worry
          if (!remoteStoreDef.equals(newStoreDef)) {

            // let's check to see if the key/value serializers are
            // REALLY equal.
            SerializerDefinition localKeySerializerDef = newStoreDef.getKeySerializer();
            SerializerDefinition localValueSerializerDef = newStoreDef.getValueSerializer();
            SerializerDefinition remoteKeySerializerDef = remoteStoreDef.getKeySerializer();
            SerializerDefinition remoteValueSerializerDef = remoteStoreDef.getValueSerializer();

            if (remoteKeySerializerDef.getName().equals("avro-generic")
                && remoteValueSerializerDef.getName().equals("avro-generic")
                && remoteKeySerializerDef.getAllSchemaInfoVersions().size() == 1
                && remoteValueSerializerDef.getAllSchemaInfoVersions().size() == 1) {
              Schema remoteKeyDef = Schema.parse(remoteKeySerializerDef.getCurrentSchemaInfo());
              Schema remoteValDef = Schema.parse(remoteValueSerializerDef.getCurrentSchemaInfo());
              Schema localKeyDef = Schema.parse(localKeySerializerDef.getCurrentSchemaInfo());
              Schema localValDef = Schema.parse(localValueSerializerDef.getCurrentSchemaInfo());

              if (remoteKeyDef.equals(localKeyDef) && remoteValDef.equals(localValDef)) {
                String compressionPolicy = "";
                if (hasCompression) {
                  compressionPolicy = "\n\t\t<compression><type>gzip</type></compression>";
                }

                // if the key/value serializers are REALLY equal
                // (even though the strings may not match), then
                // just use the remote stores to GUARANTEE that
                // they
                // match, and try again.
                newStoreDefXml =
                    VoldemortUtils.getStoreDefXml(
                        storeName,
                        replicationFactor,
                        requiredReads,
                        requiredWrites,
                        props.containsKey("build.preferred.reads")
                            ? props.getInt("build.preferred.reads")
                            : null,
                        props.containsKey("build.preferred.writes")
                            ? props.getInt("build.preferred.writes")
                            : null,
                        "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">"
                            + remoteKeySerializerDef.getCurrentSchemaInfo()
                            + "</schema-info>\n\t",
                        "\n\t\t<type>avro-generic</type>\n\t\t<schema-info version=\"0\">"
                            + remoteValueSerializerDef.getCurrentSchemaInfo()
                            + "</schema-info>"
                            + compressionPolicy
                            + "\n\t");

                newStoreDef = VoldemortUtils.getStoreDef(newStoreDefXml);

                if (!remoteStoreDef.equals(newStoreDef)) {
                  // if we still get a fail, then we know that
                  // the
                  // store defs don't match for reasons OTHER
                  // than
                  // the key/value serializer
                  throw new RuntimeException(
                      "Your store schema is identical, but the store definition does not match. Have: "
                          + newStoreDef
                          + "\nBut expected: "
                          + remoteStoreDef);
                }
              } else {
                // if the key/value serializers are not equal
                // (even
                // in java, not just json strings), then fail
                throw new RuntimeException(
                    "Your store definition does not match the store definition that is already in the cluster. Tried to resolve identical schemas between local and remote, but failed. Have: "
                        + newStoreDef
                        + "\nBut expected: "
                        + remoteStoreDef);
              }
            }
          }

          foundStore = true;
          break;
        }
      }

      // if the store doesn't exist yet, create it
      if (!foundStore) {
        // New requirement - Make sure the user had description and
        // owner specified
        if (description.length() == 0) {
          throw new RuntimeException(
              "Description field missing in store definition. "
                  + "Please add \"push.store.description\" with a line describing your store");
        }

        if (owners.length() == 0) {
          throw new RuntimeException(
              "Owner field missing in store definition. "
                  + "Please add \"push.store.owners\" with value being comma-separated list of LinkedIn email ids");
        }

        log.info("Could not find store " + storeName + " on Voldemort. Adding it to all nodes ");
        adminClient.addStore(newStoreDef);
      }

      storeDefs =
          ImmutableList.of(
              VoldemortUtils.getStoreDef(
                  VoldemortUtils.getStoreDefXml(
                      storeName,
                      replicationFactor,
                      requiredReads,
                      requiredWrites,
                      props.containsKey("build.preferred.reads")
                          ? props.getInt("build.preferred.reads")
                          : null,
                      props.containsKey("build.preferred.writes")
                          ? props.getInt("build.preferred.writes")
                          : null,
                      keySchema,
                      valSchema)));
      cluster = adminClient.getAdminClientCluster();
    } finally {
      adminClient.stop();
    }
  }