static Collection<String> fieldToAlias(Settings settings, Fields fields) {
   FieldAlias fa = alias(settings);
   List<String> names = asStrings(fields);
   for (int i = 0; i < names.size(); i++) {
     String original = names.get(i);
     String alias = fa.toES(original);
     if (alias != null) {
       names.set(i, alias);
     }
   }
   return names;
 }
Esempio n. 2
0
  static Collection<String> columnToAlias(Settings settings) {
    FieldAlias fa = alias(settings);
    List<String> columnNames =
        StringUtils.tokenize(settings.getProperty(HiveConstants.COLUMNS), ",");
    // eliminate virtual columns
    // we can't use virtual columns since some distro don't have this field...
    //        for (VirtualColumn vc : VirtualColumn.VIRTUAL_COLUMNS) {
    //            columnNames.remove(vc.getName());
    //        }

    for (String vc : HiveConstants.VIRTUAL_COLUMNS) {
      columnNames.remove(vc);
    }

    for (int i = 0; i < columnNames.size(); i++) {
      String original = columnNames.get(i);
      String alias = fa.toES(original);
      if (alias != null) {
        columnNames.set(i, alias);
      }
    }
    return columnNames;
  }
  public boolean write(
      Object object, ResourceFieldSchema field, Generator generator, boolean writeFieldName) {
    byte type = (field != null ? field.getType() : DataType.findType(object));

    if (writeFieldName) {
      generator.writeFieldName(alias.toES(field.getName()));
    }

    if (object == null) {
      generator.writeNull();
      return true;
    }

    switch (type) {
      case DataType.ERROR:
      case DataType.UNKNOWN:
        return handleUnknown(object, field, generator);
      case DataType.NULL:
        generator.writeNull();
        break;
      case DataType.BOOLEAN:
        generator.writeBoolean((Boolean) object);
        break;
      case DataType.INTEGER:
        generator.writeNumber(((Number) object).intValue());
        break;
      case DataType.LONG:
        generator.writeNumber(((Number) object).longValue());
        break;
      case DataType.FLOAT:
        generator.writeNumber(((Number) object).floatValue());
        break;
      case DataType.DOUBLE:
        generator.writeNumber(((Number) object).doubleValue());
        break;
      case DataType.BYTE:
        generator.writeNumber((Byte) object);
        break;
      case DataType.CHARARRAY:
        generator.writeString(object.toString());
        break;
      case DataType.BYTEARRAY:
        generator.writeBinary(((DataByteArray) object).get());
        break;
        // DateTime introduced in Pig 11
      case 30: // DataType.DATETIME
        generator.writeString(PigUtils.convertDateToES(object));
        break;
        // DateTime introduced in Pig 12
      case 65: // DataType.BIGINTEGER
        throw new SerializationException(
            "Big integers are not supported by Elasticsearch - consider using a different type (such as string)");
        // DateTime introduced in Pig 12
      case 70: // DataType.BIGDECIMAL
        throw new SerializationException(
            "Big decimals are not supported by Elasticsearch - consider using a different type (such as string)");
      case DataType.MAP:
        ResourceSchema nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginObject();
          generator.writeEndObject();
          break;
        }

        ResourceFieldSchema[] nestedFields = nestedSchema.getFields();

        generator.writeBeginObject();
        // Pig maps are actually String -> Object association so we can save the key right away
        for (Map.Entry<?, ?> entry : ((Map<?, ?>) object).entrySet()) {
          generator.writeFieldName(alias.toES(entry.getKey().toString()));
          write(entry.getValue(), nestedFields[0], generator, false);
        }
        generator.writeEndObject();
        break;

      case DataType.TUPLE:
        nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginObject();
          generator.writeEndObject();
          break;
        }

        nestedFields = nestedSchema.getFields();

        // use getAll instead of get(int) to avoid having to handle Exception...
        List<Object> tuples = ((Tuple) object).getAll();

        generator.writeBeginObject();
        for (int i = 0; i < nestedFields.length; i++) {
          String name = nestedFields[i].getName();
          // handle schemas without names
          name = (StringUtils.hasText(name) ? alias.toES(name) : Integer.toString(i));
          generator.writeFieldName(name);
          write(tuples.get(i), nestedFields[i], generator, false);
        }
        generator.writeEndObject();
        break;

      case DataType.BAG:
        nestedSchema = field.getSchema();

        // empty tuple shortcut
        if (nestedSchema == null) {
          generator.writeBeginArray();
          generator.writeEndArray();
          break;
        }

        ResourceFieldSchema bagType = nestedSchema.getFields()[0];

        generator.writeBeginArray();
        for (Tuple tuple : (DataBag) object) {
          write(tuple, bagType, generator, false);
        }
        generator.writeEndArray();
        break;
      default:
        if (writeUnknownTypes) {
          return handleUnknown(object, field, generator);
        }
        return false;
    }
    return true;
  }