001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.MID_KEY_METADATA_SIZE; 021 022import java.io.DataInput; 023import java.io.DataInputStream; 024import java.io.DataOutput; 025import java.io.IOException; 026import java.util.concurrent.atomic.AtomicReference; 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellComparator; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.hadoop.hbase.KeyValue; 031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 032import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; 033import org.apache.hadoop.hbase.nio.ByteBuff; 034import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 035import org.apache.hadoop.hbase.util.Bytes; 036import org.apache.hadoop.hbase.util.ClassSize; 037import org.apache.yetus.audience.InterfaceAudience; 038 039/** 040 * Does not perform any kind of encoding/decoding. 041 */ 042@InterfaceAudience.Private 043public class NoOpIndexBlockEncoder implements HFileIndexBlockEncoder { 044 045 public static final NoOpIndexBlockEncoder INSTANCE = new NoOpIndexBlockEncoder(); 046 047 /** Cannot be instantiated. Use {@link #INSTANCE} instead. */ 048 private NoOpIndexBlockEncoder() { 049 } 050 051 @Override 052 public void saveMetadata(HFile.Writer writer) { 053 } 054 055 @Override 056 public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out) 057 throws IOException { 058 if (rootIndexBlock) { 059 writeRoot(blockIndexChunk, out); 060 } else { 061 writeNonRoot(blockIndexChunk, out); 062 } 063 } 064 065 /** 066 * Writes the block index chunk in the non-root index block format. This format contains the 067 * number of entries, an index of integer offsets for quick binary search on variable-length 068 * records, and tuples of block offset, on-disk block size, and the first key for each entry. 069 */ 070 private void writeNonRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException { 071 // The number of entries in the block. 072 out.writeInt(blockIndexChunk.getNumEntries()); 073 074 if ( 075 blockIndexChunk.getSecondaryIndexOffsetMarks().size() != blockIndexChunk.getBlockKeys().size() 076 ) { 077 throw new IOException("Corrupted block index chunk writer: " 078 + blockIndexChunk.getBlockKeys().size() + " entries but " 079 + blockIndexChunk.getSecondaryIndexOffsetMarks().size() + " secondary index items"); 080 } 081 082 // For each entry, write a "secondary index" of relative offsets to the 083 // entries from the end of the secondary index. This works, because at 084 // read time we read the number of entries and know where the secondary 085 // index ends. 086 for (int currentSecondaryIndex : blockIndexChunk.getSecondaryIndexOffsetMarks()) 087 out.writeInt(currentSecondaryIndex); 088 089 // We include one other element in the secondary index to calculate the 090 // size of each entry more easily by subtracting secondary index elements. 091 out.writeInt(blockIndexChunk.getCurTotalNonRootEntrySize()); 092 093 for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) { 094 out.writeLong(blockIndexChunk.getBlockOffset(i)); 095 out.writeInt(blockIndexChunk.getOnDiskDataSize(i)); 096 out.write(blockIndexChunk.getBlockKey(i)); 097 } 098 } 099 100 /** 101 * Writes this chunk into the given output stream in the root block index format. This format is 102 * similar to the {@link HFile} version 1 block index format, except that we store on-disk size of 103 * the block instead of its uncompressed size. 104 * @param out the data output stream to write the block index to. Typically a stream writing into 105 * an {@link HFile} block. 106 */ 107 private void writeRoot(BlockIndexChunk blockIndexChunk, DataOutput out) throws IOException { 108 for (int i = 0; i < blockIndexChunk.getNumEntries(); ++i) { 109 out.writeLong(blockIndexChunk.getBlockOffset(i)); 110 out.writeInt(blockIndexChunk.getOnDiskDataSize(i)); 111 Bytes.writeByteArray(out, blockIndexChunk.getBlockKey(i)); 112 } 113 } 114 115 @Override 116 public IndexBlockEncoding getIndexBlockEncoding() { 117 return IndexBlockEncoding.NONE; 118 } 119 120 @Override 121 public EncodedSeeker createSeeker() { 122 return new NoOpEncodedSeeker(); 123 } 124 125 @Override 126 public String toString() { 127 return getClass().getSimpleName(); 128 } 129 130 protected static class NoOpEncodedSeeker implements EncodedSeeker { 131 132 protected long[] blockOffsets; 133 protected int[] blockDataSizes; 134 protected int rootCount = 0; 135 136 // Mid-key metadata. 137 protected long midLeafBlockOffset = -1; 138 protected int midLeafBlockOnDiskSize = -1; 139 protected int midKeyEntry = -1; 140 141 private Cell[] blockKeys; 142 private CellComparator comparator; 143 protected int searchTreeLevel; 144 145 /** Pre-computed mid-key */ 146 private AtomicReference<Cell> midKey = new AtomicReference<>(); 147 148 @Override 149 public long heapSize() { 150 long heapSize = ClassSize.align(ClassSize.OBJECT); 151 152 // Mid-key metadata. 153 heapSize += MID_KEY_METADATA_SIZE; 154 155 if (blockOffsets != null) { 156 heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length * Bytes.SIZEOF_LONG); 157 } 158 159 if (blockDataSizes != null) { 160 heapSize += ClassSize.align(ClassSize.ARRAY + blockDataSizes.length * Bytes.SIZEOF_INT); 161 } 162 163 if (blockKeys != null) { 164 heapSize += ClassSize.REFERENCE; 165 // Adding array + references overhead 166 heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE); 167 168 // Adding blockKeys 169 for (Cell key : blockKeys) { 170 heapSize += ClassSize.align(key.heapSize()); 171 } 172 } 173 // Add comparator and the midkey atomicreference 174 heapSize += 2 * ClassSize.REFERENCE; 175 // Add rootCount and searchTreeLevel 176 heapSize += 2 * Bytes.SIZEOF_INT; 177 178 return ClassSize.align(heapSize); 179 } 180 181 @Override 182 public boolean isEmpty() { 183 return blockKeys.length == 0; 184 } 185 186 @Override 187 public Cell getRootBlockKey(int i) { 188 return blockKeys[i]; 189 } 190 191 @Override 192 public int getRootBlockCount() { 193 return rootCount; 194 } 195 196 @Override 197 public void initRootIndex(HFileBlock blk, int numEntries, CellComparator comparator, 198 int treeLevel) throws IOException { 199 this.comparator = comparator; 200 this.searchTreeLevel = treeLevel; 201 init(blk, numEntries); 202 } 203 204 private void init(HFileBlock blk, int numEntries) throws IOException { 205 DataInputStream in = readRootIndex(blk, numEntries); 206 // HFileBlock.getByteStream() returns a byte stream for reading the data(excluding checksum) 207 // of root index block, so after reading the root index there is no need to subtract the 208 // checksum bytes. 209 if (in.available() < MID_KEY_METADATA_SIZE) { 210 // No mid-key metadata available. 211 return; 212 } 213 midLeafBlockOffset = in.readLong(); 214 midLeafBlockOnDiskSize = in.readInt(); 215 midKeyEntry = in.readInt(); 216 } 217 218 private DataInputStream readRootIndex(HFileBlock blk, final int numEntries) throws IOException { 219 DataInputStream in = blk.getByteStream(); 220 readRootIndex(in, numEntries); 221 return in; 222 } 223 224 private void readRootIndex(DataInput in, final int numEntries) throws IOException { 225 blockOffsets = new long[numEntries]; 226 initialize(numEntries); 227 blockDataSizes = new int[numEntries]; 228 229 // If index size is zero, no index was written. 230 if (numEntries > 0) { 231 for (int i = 0; i < numEntries; ++i) { 232 long offset = in.readLong(); 233 int dataSize = in.readInt(); 234 byte[] key = Bytes.readByteArray(in); 235 add(key, offset, dataSize); 236 } 237 } 238 } 239 240 private void initialize(int numEntries) { 241 blockKeys = new Cell[numEntries]; 242 } 243 244 private void add(final byte[] key, final long offset, final int dataSize) { 245 blockOffsets[rootCount] = offset; 246 // Create the blockKeys as Cells once when the reader is opened 247 blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 248 blockDataSizes[rootCount] = dataSize; 249 rootCount++; 250 } 251 252 @Override 253 public Cell midkey(HFile.CachingBlockReader cachingBlockReader) throws IOException { 254 if (rootCount == 0) throw new IOException("HFile empty"); 255 256 Cell targetMidKey = this.midKey.get(); 257 if (targetMidKey != null) { 258 return targetMidKey; 259 } 260 261 if (midLeafBlockOffset >= 0) { 262 if (cachingBlockReader == null) { 263 throw new IOException( 264 "Have to read the middle leaf block but " + "no block reader available"); 265 } 266 267 // Caching, using pread, assuming this is not a compaction. 268 HFileBlock midLeafBlock = cachingBlockReader.readBlock(midLeafBlockOffset, 269 midLeafBlockOnDiskSize, true, true, false, true, BlockType.LEAF_INDEX, null); 270 try { 271 byte[] bytes = HFileBlockIndex.BlockIndexReader 272 .getNonRootIndexedKey(midLeafBlock.getBufferWithoutHeader(), midKeyEntry); 273 assert bytes != null; 274 targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length); 275 } finally { 276 midLeafBlock.release(); 277 } 278 } else { 279 // The middle of the root-level index. 280 targetMidKey = blockKeys[rootCount / 2]; 281 } 282 283 this.midKey.set(targetMidKey); 284 return targetMidKey; 285 } 286 287 @Override 288 public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, 289 boolean cacheBlocks, boolean pread, boolean isCompaction, 290 DataBlockEncoding expectedDataBlockEncoding, HFile.CachingBlockReader cachingBlockReader) 291 throws IOException { 292 int rootLevelIndex = rootBlockContainingKey(key); 293 if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { 294 return null; 295 } 296 297 // the next indexed key 298 Cell nextIndexedKey = null; 299 300 // Read the next-level (intermediate or leaf) index block. 301 long currentOffset = blockOffsets[rootLevelIndex]; 302 int currentOnDiskSize = blockDataSizes[rootLevelIndex]; 303 304 if (rootLevelIndex < blockKeys.length - 1) { 305 nextIndexedKey = blockKeys[rootLevelIndex + 1]; 306 } else { 307 nextIndexedKey = KeyValueScanner.NO_NEXT_INDEXED_KEY; 308 } 309 310 int lookupLevel = 1; // How many levels deep we are in our lookup. 311 int index = -1; 312 313 HFileBlock block = null; 314 KeyValue.KeyOnlyKeyValue tmpNextIndexKV = new KeyValue.KeyOnlyKeyValue(); 315 while (true) { 316 try { 317 // Must initialize it with null here, because if don't and once an exception happen in 318 // readBlock, then we'll release the previous assigned block twice in the finally block. 319 // (See HBASE-22422) 320 block = null; 321 if (currentBlock != null && currentBlock.getOffset() == currentOffset) { 322 // Avoid reading the same block again, even with caching turned off. 323 // This is crucial for compaction-type workload which might have 324 // caching turned off. This is like a one-block cache inside the 325 // scanner. 326 block = currentBlock; 327 } else { 328 // Call HFile's caching block reader API. We always cache index 329 // blocks, otherwise we might get terrible performance. 330 boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); 331 BlockType expectedBlockType; 332 if (lookupLevel < searchTreeLevel - 1) { 333 expectedBlockType = BlockType.INTERMEDIATE_INDEX; 334 } else if (lookupLevel == searchTreeLevel - 1) { 335 expectedBlockType = BlockType.LEAF_INDEX; 336 } else { 337 // this also accounts for ENCODED_DATA 338 expectedBlockType = BlockType.DATA; 339 } 340 block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, 341 pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding); 342 } 343 344 if (block == null) { 345 throw new IOException("Failed to read block at offset " + currentOffset 346 + ", onDiskSize=" + currentOnDiskSize); 347 } 348 349 // Found a data block, break the loop and check our level in the tree. 350 if (block.getBlockType().isData()) { 351 break; 352 } 353 354 // Not a data block. This must be a leaf-level or intermediate-level 355 // index block. We don't allow going deeper than searchTreeLevel. 356 if (++lookupLevel > searchTreeLevel) { 357 throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel 358 + ", searchTreeLevel=" + searchTreeLevel); 359 } 360 361 // Locate the entry corresponding to the given key in the non-root 362 // (leaf or intermediate-level) index block. 363 ByteBuff buffer = block.getBufferWithoutHeader(); 364 index = HFileBlockIndex.BlockIndexReader.locateNonRootIndexEntry(buffer, key, comparator); 365 if (index == -1) { 366 // This has to be changed 367 // For now change this to key value 368 throw new IOException("The key " + CellUtil.getCellKeyAsString(key) + " is before the" 369 + " first key of the non-root index block " + block); 370 } 371 372 currentOffset = buffer.getLong(); 373 currentOnDiskSize = buffer.getInt(); 374 375 // Only update next indexed key if there is a next indexed key in the current level 376 byte[] nonRootIndexedKey = 377 HFileBlockIndex.BlockIndexReader.getNonRootIndexedKey(buffer, index + 1); 378 if (nonRootIndexedKey != null) { 379 tmpNextIndexKV.setKey(nonRootIndexedKey, 0, nonRootIndexedKey.length); 380 nextIndexedKey = tmpNextIndexKV; 381 } 382 } finally { 383 if (block != null && !block.getBlockType().isData()) { 384 // Release the block immediately if it is not the data block 385 block.release(); 386 } 387 } 388 } 389 390 if (lookupLevel != searchTreeLevel) { 391 assert block.getBlockType().isData(); 392 // Though we have retrieved a data block we have found an issue 393 // in the retrieved data block. Hence returned the block so that 394 // the ref count can be decremented 395 if (block != null) { 396 block.release(); 397 } 398 throw new IOException("Reached a data block at level " + lookupLevel 399 + " but the number of levels is " + searchTreeLevel); 400 } 401 402 // set the next indexed key for the current block. 403 return new BlockWithScanInfo(block, nextIndexedKey); 404 } 405 406 @Override 407 public int rootBlockContainingKey(Cell key) { 408 // Here the comparator should not be null as this happens for the root-level block 409 int pos = Bytes.binarySearch(blockKeys, key, comparator); 410 // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see 411 // binarySearch's javadoc. 412 413 if (pos >= 0) { 414 // This means this is an exact match with an element of blockKeys. 415 assert pos < blockKeys.length; 416 return pos; 417 } 418 419 // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], 420 // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that 421 // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if 422 // key < blockKeys[0], meaning the file does not contain the given key. 423 424 int i = -pos - 1; 425 assert 0 <= i && i <= blockKeys.length; 426 return i - 1; 427 } 428 429 @Override 430 public String toString() { 431 StringBuilder sb = new StringBuilder(); 432 sb.append("size=" + rootCount).append("\n"); 433 for (int i = 0; i < rootCount; i++) { 434 sb.append("key=").append((blockKeys[i])).append("\n offset=").append(blockOffsets[i]) 435 .append(", dataSize=" + blockDataSizes[i]).append("\n"); 436 } 437 return sb.toString(); 438 } 439 } 440}