001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import java.io.DataInputStream;
021import java.io.DataOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import org.apache.hadoop.hbase.ExtendedCell;
026import org.apache.hadoop.hbase.KeyValue;
027import org.apache.hadoop.hbase.KeyValueUtil;
028import org.apache.hadoop.hbase.PrivateCellUtil;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.hadoop.io.serializer.Deserializer;
031import org.apache.hadoop.io.serializer.Serialization;
032import org.apache.hadoop.io.serializer.Serializer;
033import org.apache.yetus.audience.InterfaceAudience;
034
035/**
036 * Similar to CellSerialization, but includes the sequenceId from an ExtendedCell. This is necessary
037 * so that CellSortReducer can sort by sequenceId, if applicable. Note that these two serializations
038 * are not compatible -- data serialized by CellSerialization cannot be deserialized with
039 * ExtendedCellSerialization and vice versa. This is ok for {@link HFileOutputFormat2} because the
040 * serialization is not actually used for the actual written HFiles, just intermediate data (between
041 * mapper and reducer of a single job).
042 */
043@InterfaceAudience.Private
044public class ExtendedCellSerialization implements Serialization<ExtendedCell> {
045  @Override
046  public boolean accept(Class<?> c) {
047    return ExtendedCell.class.isAssignableFrom(c);
048  }
049
050  @Override
051  public ExtendedCellDeserializer getDeserializer(Class<ExtendedCell> t) {
052    return new ExtendedCellDeserializer();
053  }
054
055  @Override
056  public ExtendedCellSerializer getSerializer(Class<ExtendedCell> c) {
057    return new ExtendedCellSerializer();
058  }
059
060  public static class ExtendedCellDeserializer implements Deserializer<ExtendedCell> {
061    private DataInputStream dis;
062
063    @Override
064    public void close() throws IOException {
065      this.dis.close();
066    }
067
068    @Override
069    public KeyValue deserialize(ExtendedCell ignore) throws IOException {
070      KeyValue kv = KeyValueUtil.create(this.dis);
071      PrivateCellUtil.setSequenceId(kv, this.dis.readLong());
072      return kv;
073    }
074
075    @Override
076    public void open(InputStream is) throws IOException {
077      this.dis = new DataInputStream(is);
078    }
079  }
080
081  public static class ExtendedCellSerializer implements Serializer<ExtendedCell> {
082    private DataOutputStream dos;
083
084    @Override
085    public void close() throws IOException {
086      this.dos.close();
087    }
088
089    @Override
090    public void open(OutputStream os) throws IOException {
091      this.dos = new DataOutputStream(os);
092    }
093
094    @Override
095    public void serialize(ExtendedCell kv) throws IOException {
096      dos.writeInt(PrivateCellUtil.estimatedSerializedSizeOf(kv) - Bytes.SIZEOF_INT);
097      kv.write(dos, true);
098      dos.writeLong(kv.getSequenceId());
099    }
100  }
101}