001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE;
021
022import java.io.DataInput;
023import java.io.DataInputStream;
024import java.io.DataOutput;
025import java.io.IOException;
026import java.util.concurrent.atomic.AtomicReference;
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.hadoop.hbase.KeyValue;
031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
032import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
033import org.apache.hadoop.hbase.nio.ByteBuff;
034import org.apache.hadoop.hbase.regionserver.KeyValueScanner;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.apache.hadoop.hbase.util.ClassSize;
037import org.apache.yetus.audience.InterfaceAudience;
038
039/**
040 * Does not perform any kind of encoding/decoding.
041 */
042@InterfaceAudience.Private
043public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder {
044
045  public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder();
046
047  /** Cannot be instantiated. Use {@link #INSTANCE} instead. */
048  private NoOpIndexBlockEncoder() {
049  }
050
051  @Override
052  public void saveMetadata(HFile.Writer writer) {
053  }
054
055  @Override
056  public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out)
057    throws IOException {
058    if (rootIndexBlock) {
059      writeRoot(blockIndexChunk, out);
060    } else {
061      writeNonRoot(blockIndexChunk, out);
062    }
063  }
064
065  /**
066   * Writes the block index chunk in the non-root index block format. This format contains the
067   * number of entries, an index of integer offsets for quick binary search on variable-length
068   * records, and tuples of block offset, on-disk block size, and the first key for each entry.
069   */
070  private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
071    // The number of entries in the block.
072    out.writeInt(blockIndexChunk.getNumEntries());
073
074    if (
075      blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size()
076    ) {
077      throw new IOException("Corrupted block index chunk writer: "
078        + blockIndexChunk.getBlockKeys().size() + " entries but "
079        + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items");
080    }
081
082    // For each entry, write a "secondary index" of relative offsets to the
083    // entries from the end of the secondary index. This works, because at
084    // read time we read the number of entries and know where the secondary
085    // index ends.
086    for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks())
087      out.writeInt(currentSecondaryIndex);
088
089    // We include one other element in the secondary index to calculate the
090    // size of each entry more easily by subtracting secondary index elements.
091    out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize());
092
093    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
094      out.writeLong(blockIndexChunk.getBlockOffset(i));
095      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
096      out.write(blockIndexChunk.getBlockKey(i));
097    }
098  }
099
100  /**
101   * Writes this chunk into the given output stream in the root block index format. This format is
102   * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of
103   * the block instead of its uncompressed size.
104   * @param out the data output stream to write the block index to. Typically a stream writing into
105   *            an {@link HFile} block.
106   */
107  private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException {
108    for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) {
109      out.writeLong(blockIndexChunk.getBlockOffset(i));
110      out.writeInt(blockIndexChunk.getOnDiskDataSize(i));
111      Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i));
112    }
113  }
114
115  @Override
116  public IndexBlockEncoding getIndexBlockEncoding() {
117    return IndexBlockEncoding.NONE;
118  }
119
120  @Override
121  public EncodedSeeker createSeeker() {
122    return new NoOpEncodedSeeker();
123  }
124
125  @Override
126  public String toString() {
127    return getClass().getSimpleName();
128  }
129
130  protected static class NoOpEncodedSeeker implements EncodedSeeker {
131
132    protected long[] blockOffsets;
133    protected int[] blockDataSizes;
134    protected int rootCount = 0;
135
136    // Mid-key metadata.
137    protected long midLeafBlockOffset = -1;
138    protected int midLeafBlockOnDiskSize = -1;
139    protected int midKeyEntry = -1;
140
141    private Cell[] blockKeys;
142    private CellComparator comparator;
143    protected int searchTreeLevel;
144
145    /** Pre-computed mid-key */
146    private AtomicReference<Cell> midKey = new AtomicReference<>();
147
148    @Override
149    public long heapSize() {
150      long heapSize = ClassSize.align(ClassSize.OBJECT);
151
152      // Mid-key metadata.
153      heapSize += MID_KEY_METADATA_SIZE;
154
155      if (blockOffsets != null) {
156        heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG);
157      }
158
159      if (blockDataSizes != null) {
160        heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT);
161      }
162
163      if (blockKeys != null) {
164        heapSize += ClassSize.REFERENCE;
165        // Adding array + references overhead
166        heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE);
167
168        // Adding blockKeys
169        for (Cell key : blockKeys) {
170          heapSize += ClassSize.align(key.heapSize());
171        }
172      }
173      // Add comparator and the midkey atomicreference
174      heapSize += 2 * ClassSize.REFERENCE;
175      // Add rootCount and searchTreeLevel
176      heapSize += 2 * Bytes.SIZEOF_INT;
177
178      return ClassSize.align(heapSize);
179    }
180
181    @Override
182    public boolean isEmpty() {
183      return blockKeys.length == 0;
184    }
185
186    @Override
187    public Cell getRootBlockKey(int i) {
188      return blockKeys[i];
189    }
190
191    @Override
192    public int getRootBlockCount() {
193      return rootCount;
194    }
195
196    @Override
197    public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator,
198      int treeLevel) throws IOException {
199      this.comparator = comparator;
200      this.searchTreeLevel = treeLevel;
201      init(blk, numEntries);
202    }
203
204    private void init(HFileBlock blk, int numEntries) throws IOException {
205      DataInputStream in = readRootIndex(blk, numEntries);
206      // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum)
207      // of root index block, so after reading the root index there is no need to subtract the
208      // checksum bytes.
209      if (in.available() < MID_KEY_METADATA_SIZE) {
210        // No mid-key metadata available.
211        return;
212      }
213      midLeafBlockOffset = in.readLong();
214      midLeafBlockOnDiskSize = in.readInt();
215      midKeyEntry = in.readInt();
216    }
217
218    private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException {
219      DataInputStream in = blk.getByteStream();
220      readRootIndex(in, numEntries);
221      return in;
222    }
223
224    private void readRootIndex(DataInput in, final int numEntries) throws IOException {
225      blockOffsets = new long[numEntries];
226      initialize(numEntries);
227      blockDataSizes = new int[numEntries];
228
229      // If index size is zero, no index was written.
230      if (numEntries > 0) {
231        for (int i = 0; i < numEntries; ++i) {
232          long offset = in.readLong();
233          int dataSize = in.readInt();
234          byte[] key = Bytes.readByteArray(in);
235          add(key, offset, dataSize);
236        }
237      }
238    }
239
240    private void initialize(int numEntries) {
241      blockKeys = new Cell[numEntries];
242    }
243
244    private void add(final byte[] key, final long offset, final int dataSize) {
245      blockOffsets[rootCount] = offset;
246      // Create the blockKeys as Cells once when the reader is opened
247      blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length);
248      blockDataSizes[rootCount] = dataSize;
249      rootCount++;
250    }
251
252    @Override
253    public Cell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException {
254      if (rootCount == 0) throw new IOException("HFile empty");
255
256      Cell targetMidKey = this.midKey.get();
257      if (targetMidKey != null) {
258        return targetMidKey;
259      }
260
261      if (midLeafBlockOffset >= 0) {
262        if (cachingBlockReader == null) {
263          throw new IOException(
264            "Have to read the middle leaf block but " + "no block reader available");
265        }
266
267        // Caching, using pread, assuming this is not a compaction.
268        HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset,
269          midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null);
270        try {
271          byte[] bytes = HFileBlockIndex.BlockIndexReader
272            .getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry);
273          assert bytes != null;
274          targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
275        } finally {
276          midLeafBlock.release();
277        }
278      } else {
279        // The middle of the root-level index.
280        targetMidKey = blockKeys[rootCount / 2];
281      }
282
283      this.midKey.set(targetMidKey);
284      return targetMidKey;
285    }
286
287    @Override
288    public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock,
289      boolean cacheBlocks, boolean pread, boolean isCompaction,
290      DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader)
291      throws IOException {
292      int rootLevelIndex = rootBlockContainingKey(key);
293      if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
294        return null;
295      }
296
297      // the next indexed key
298      Cell nextIndexedKey = null;
299
300      // Read the next-level (intermediate or leaf) index block.
301      long currentOffset = blockOffsets[rootLevelIndex];
302      int currentOnDiskSize = blockDataSizes[rootLevelIndex];
303
304      if (rootLevelIndex < blockKeys.length - 1) {
305        nextIndexedKey = blockKeys[rootLevelIndex + 1];
306      } else {
307        nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY;
308      }
309
310      int lookupLevel = 1; // How many levels deep we are in our lookup.
311      int index = -1;
312
313      HFileBlock block = null;
314      KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue();
315      while (true) {
316        try {
317          // Must initialize it with null here, because if don't and once an exception happen in
318          // readBlock, then we'll release the previous assigned block twice in the finally block.
319          // (See HBASE-22422)
320          block = null;
321          if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
322            // Avoid reading the same block again, even with caching turned off.
323            // This is crucial for compaction-type workload which might have
324            // caching turned off. This is like a one-block cache inside the
325            // scanner.
326            block = currentBlock;
327          } else {
328            // Call HFile's caching block reader API. We always cache index
329            // blocks, otherwise we might get terrible performance.
330            boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
331            BlockType expectedBlockType;
332            if (lookupLevel < searchTreeLevel - 1) {
333              expectedBlockType = BlockType.INTERMEDIATE_INDEX;
334            } else if (lookupLevel == searchTreeLevel - 1) {
335              expectedBlockType = BlockType.LEAF_INDEX;
336            } else {
337              // this also accounts for ENCODED_DATA
338              expectedBlockType = BlockType.DATA;
339            }
340            block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
341              pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
342          }
343
344          if (block == null) {
345            throw new IOException("Failed to read block at offset " + currentOffset
346              + ", onDiskSize=" + currentOnDiskSize);
347          }
348
349          // Found a data block, break the loop and check our level in the tree.
350          if (block.getBlockType().isData()) {
351            break;
352          }
353
354          // Not a data block. This must be a leaf-level or intermediate-level
355          // index block. We don't allow going deeper than searchTreeLevel.
356          if (++lookupLevel > searchTreeLevel) {
357            throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel
358              + ", searchTreeLevel=" + searchTreeLevel);
359          }
360
361          // Locate the entry corresponding to the given key in the non-root
362          // (leaf or intermediate-level) index block.
363          ByteBuff buffer = block.getBufferWithoutHeader();
364          index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator);
365          if (index == -1) {
366            // This has to be changed
367            // For now change this to key value
368            throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the"
369              + " first key of the non-root index block " + block);
370          }
371
372          currentOffset = buffer.getLong();
373          currentOnDiskSize = buffer.getInt();
374
375          // Only update next indexed key if there is a next indexed key in the current level
376          byte[] nonRootIndexedKey =
377            HFileBlockIndex.BlockIndexReader.getNonRootIndexedKey(buffer, index + 1);
378          if (nonRootIndexedKey != null) {
379            tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length);
380            nextIndexedKey = tmpNextIndexKV;
381          }
382        } finally {
383          if (block != null && !block.getBlockType().isData()) {
384            // Release the block immediately if it is not the data block
385            block.release();
386          }
387        }
388      }
389
390      if (lookupLevel != searchTreeLevel) {
391        assert block.getBlockType().isData();
392        // Though we have retrieved a data block we have found an issue
393        // in the retrieved data block. Hence returned the block so that
394        // the ref count can be decremented
395        if (block != null) {
396          block.release();
397        }
398        throw new IOException("Reached a data block at level " + lookupLevel
399          + " but the number of levels is " + searchTreeLevel);
400      }
401
402      // set the next indexed key for the current block.
403      return new BlockWithScanInfo(block, nextIndexedKey);
404    }
405
406    @Override
407    public int rootBlockContainingKey(Cell key) {
408      // Here the comparator should not be null as this happens for the root-level block
409      int pos = Bytes.binarySearch(blockKeys, key, comparator);
410      // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see
411      // binarySearch's javadoc.
412
413      if (pos >= 0) {
414        // This means this is an exact match with an element of blockKeys.
415        assert pos < blockKeys.length;
416        return pos;
417      }
418
419      // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i],
420      // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that
421      // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if
422      // key < blockKeys[0], meaning the file does not contain the given key.
423
424      int i = -pos - 1;
425      assert 0 <= i && i <= blockKeys.length;
426      return i - 1;
427    }
428
429    @Override
430    public String toString() {
431      StringBuilder sb = new StringBuilder();
432      sb.append("size=" + rootCount).append("\n");
433      for (int i = 0; i < rootCount; i++) {
434        sb.append("key=").append((blockKeys[i])).append("\n  offset=").append(blockOffsets[i])
435          .append(", dataSize=" + blockDataSizes[i]).append("\n");
436      }
437      return sb.toString();
438    }
439  }
440}