Пример #1
0
    /* (non-Javadoc)
     * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
     */
    @Override
    protected void map(LongWritable key, Text value, Context context)
        throws IOException, InterruptedException {
      String[] items = value.toString().split(fieldDelimRegex);

      if (null == entity) {
        if (identifyWithFilePrefix) {
          FileSplit fileInpSplit = (FileSplit) context.getInputSplit();
          String filePrefix = fileInpSplit.getPath().getName().substring(0, filePrefixLength);
          entity = schema.getEntityByFilePrefix(filePrefix);
        } else {
          entity = schema.getEntityBySize(items.length);
        }
        idOrdinal = entity.getIdField().getOrdinal();
      }

      if (null != entity) {
        hash = items[idOrdinal].hashCode() % bucketCount;
        hash = hash < 0 ? -hash : hash;
        if (entity.getType() == 0) {
          if (identifyWithFilePrefix) {
            valueHolder.set("0," + value.toString());
          } else {
            valueHolder.set(value);
          }
          for (int i = 0; i < bucketCount; ++i) {
            keyHolder.set((hash * bucketCount + i) * 10);
            context.write(keyHolder, valueHolder);
          }
        } else {
          if (identifyWithFilePrefix) {
            valueHolder.set("1," + value.toString());
          } else {
            valueHolder.set(value);
          }
          for (int i = 0; i < bucketCount; ++i) {
            keyHolder.set(((i * bucketCount + hash) * 10) + 1);
            context.write(keyHolder, valueHolder);
          }
        }
      } else {

      }
    }