001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.encoding; 019 020import java.io.DataInputStream; 021import java.io.DataOutputStream; 022import java.io.IOException; 023import java.nio.ByteBuffer; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.Cell; 026import org.apache.hadoop.hbase.CellComparator; 027import org.apache.hadoop.hbase.io.hfile.HFileContext; 028import org.apache.hadoop.hbase.nio.ByteBuff; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * Encoding of KeyValue. It aims to be fast and efficient using assumptions: 033 * <ul> 034 * <li>the KeyValues are stored sorted by key</li> 035 * <li>we know the structure of KeyValue</li> 036 * <li>the values are always iterated forward from beginning of block</li> 037 * <li>knowledge of Key Value format</li> 038 * </ul> 039 * It is designed to work fast enough to be feasible as in memory compression. 040 */ 041@InterfaceAudience.Private 042public interface DataBlockEncoder { 043 // TODO: This Interface should be deprecated and replaced. It presumes hfile and carnal knowledge 044 // of 045 // Cell internals. It was done for a different time. Remove. Purge. 046 /** 047 * Starts encoding for a block of KeyValues. Call 048 * {@link #endBlockEncoding(HFileBlockEncodingContext, DataOutputStream, byte[])} to finish 049 * encoding of a block. 050 */ 051 void startBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out) 052 throws IOException; 053 054 /** 055 * Encodes a KeyValue. After the encode, {@link EncodingState#postCellEncode(int, int)} needs to 056 * be called to keep track of the encoded and unencoded data size 057 */ 058 void encode(Cell cell, HFileBlockEncodingContext encodingCtx, DataOutputStream out) 059 throws IOException; 060 061 /** 062 * Ends encoding for a block of KeyValues. Gives a chance for the encoder to do the finishing 063 * stuff for the encoded block. It must be called at the end of block encoding. 064 */ 065 void endBlockEncoding(HFileBlockEncodingContext encodingCtx, DataOutputStream out, 066 byte[] uncompressedBytesWithHeader) throws IOException; 067 068 /** 069 * Decode. 070 * @param source Compressed stream of KeyValues. 071 * @return Uncompressed block of KeyValues. 072 * @throws IOException If there is an error in source. 073 */ 074 ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) 075 throws IOException; 076 077 /** 078 * Return first key in block as a cell. Useful for indexing. Typically does not make a deep copy 079 * but returns a buffer wrapping a segment of the actual block's byte array. This is because the 080 * first key in block is usually stored unencoded. 081 * @param block encoded block we want index, the position will not change 082 * @return First key in block as a cell. 083 */ 084 Cell getFirstKeyCellInBlock(ByteBuff block); 085 086 /** 087 * Create a HFileBlock seeker which find KeyValues within a block. 088 * @return A newly created seeker. 089 */ 090 EncodedSeeker createSeeker(HFileBlockDecodingContext decodingCtx); 091 092 /** 093 * Creates a encoder specific encoding context store configuration encoding strategy used header 094 * bytes to be written, put a dummy header here if the header is unknown HFile meta data 095 * @return a newly created encoding context 096 */ 097 HFileBlockEncodingContext newDataBlockEncodingContext(Configuration conf, 098 DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); 099 100 /** 101 * Creates an encoder specific decoding context, which will prepare the data before actual 102 * decoding store configuration HFile meta data 103 * @return a newly created decoding context 104 */ 105 HFileBlockDecodingContext newDataBlockDecodingContext(Configuration conf, HFileContext meta); 106 107 /** 108 * An interface which enable to seek while underlying data is encoded. It works on one HFileBlock, 109 * but it is reusable. See {@link #setCurrentBuffer(ByteBuff)}. 110 */ 111 interface EncodedSeeker { 112 /** 113 * Set on which buffer there will be done seeking. 114 * @param buffer Used for seeking. 115 */ 116 void setCurrentBuffer(ByteBuff buffer); 117 118 /** 119 * From the current position creates a cell using the key part of the current buffer 120 * @return key at current position 121 */ 122 Cell getKey(); 123 124 /** 125 * Does a shallow copy of the value at the current position. A shallow copy is possible because 126 * the returned buffer refers to the backing array of the original encoded buffer. 127 * @return value at current position 128 */ 129 ByteBuffer getValueShallowCopy(); 130 131 /** Returns the Cell at the current position. Includes memstore timestamp. */ 132 Cell getCell(); 133 134 /** Set position to beginning of given block */ 135 void rewind(); 136 137 /** 138 * Move to next position 139 * @return true on success, false if there is no more positions. 140 */ 141 boolean next(); 142 143 /** 144 * Moves the seeker position within the current block to: 145 * <ul> 146 * <li>the last key that that is less than or equal to the given key if <code>seekBefore</code> 147 * is false</li> 148 * <li>the last key that is strictly less than the given key if <code> 149 * seekBefore</code> is true. The caller is responsible for loading the previous block if the 150 * requested key turns out to be the first key of the current block.</li> 151 * </ul> 152 * @param key - Cell to which the seek should happen 153 * @param seekBefore find the key strictly less than the given key in case of an exact match. 154 * Does not matter in case of an inexact match. 155 * @return 0 on exact match, 1 on inexact match. 156 */ 157 int seekToKeyInBlock(Cell key, boolean seekBefore); 158 159 /** 160 * Compare the given key against the current key 161 * @return -1 is the passed key is smaller than the current key, 0 if equal and 1 if greater 162 */ 163 public int compareKey(CellComparator comparator, Cell key); 164 } 165}