001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION; 021import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.BLOCK_CACHE_KEY_KEY; 022 023import io.opentelemetry.api.common.Attributes; 024import io.opentelemetry.api.trace.Span; 025import java.io.DataInput; 026import java.io.IOException; 027import java.nio.ByteBuffer; 028import java.util.ArrayList; 029import java.util.Optional; 030import java.util.function.IntConsumer; 031import org.apache.hadoop.conf.Configurable; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.ByteBufferKeyOnlyKeyValue; 035import org.apache.hadoop.hbase.Cell; 036import org.apache.hadoop.hbase.CellComparator; 037import org.apache.hadoop.hbase.CellUtil; 038import org.apache.hadoop.hbase.HConstants; 039import org.apache.hadoop.hbase.KeyValue; 040import org.apache.hadoop.hbase.PrivateCellUtil; 041import org.apache.hadoop.hbase.SizeCachedByteBufferKeyValue; 042import org.apache.hadoop.hbase.SizeCachedKeyValue; 043import org.apache.hadoop.hbase.SizeCachedNoTagsByteBufferKeyValue; 044import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; 045import org.apache.hadoop.hbase.io.HFileLink; 046import org.apache.hadoop.hbase.io.compress.Compression; 047import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; 048import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 049import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; 050import org.apache.hadoop.hbase.nio.ByteBuff; 051import org.apache.hadoop.hbase.regionserver.KeyValueScanner; 052import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 053import org.apache.hadoop.hbase.util.ByteBufferUtils; 054import org.apache.hadoop.hbase.util.Bytes; 055import org.apache.hadoop.hbase.util.IdLock; 056import org.apache.hadoop.hbase.util.ObjectIntPair; 057import org.apache.hadoop.io.WritableUtils; 058import org.apache.yetus.audience.InterfaceAudience; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062/** 063 * Implementation that can handle all hfile versions of {@link HFile.Reader}. 064 */ 065@InterfaceAudience.Private 066@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 067public abstract class HFileReaderImpl implements HFile.Reader, Configurable { 068 // This class is HFileReaderV3 + HFileReaderV2 + AbstractHFileReader all squashed together into 069 // one file. Ditto for all the HFileReader.ScannerV? implementations. I was running up against 070 // the MaxInlineLevel limit because too many tiers involved reading from an hfile. Was also hard 071 // to navigate the source code when so many classes participating in read. 072 private static final Logger LOG = LoggerFactory.getLogger(HFileReaderImpl.class); 073 074 /** Data block index reader keeping the root data index in memory */ 075 protected HFileBlockIndex.CellBasedKeyBlockIndexReader dataBlockIndexReader; 076 077 /** Meta block index reader -- always single level */ 078 protected HFileBlockIndex.ByteArrayKeyBlockIndexReader metaBlockIndexReader; 079 080 protected FixedFileTrailer trailer; 081 082 private final boolean primaryReplicaReader; 083 084 /** 085 * What kind of data block encoding should be used while reading, writing, and handling cache. 086 */ 087 protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; 088 089 /** Block cache configuration. */ 090 protected final CacheConfig cacheConf; 091 092 protected ReaderContext context; 093 094 protected final HFileInfo fileInfo; 095 096 /** Path of file */ 097 protected final Path path; 098 099 /** File name to be used for block names */ 100 protected final String name; 101 102 private Configuration conf; 103 104 protected HFileContext hfileContext; 105 106 /** Filesystem-level block reader. */ 107 protected HFileBlock.FSReader fsBlockReader; 108 109 /** 110 * A "sparse lock" implementation allowing to lock on a particular block identified by offset. The 111 * purpose of this is to avoid two clients loading the same block, and have all but one client 112 * wait to get the block from the cache. 113 */ 114 private IdLock offsetLock = new IdLock(); 115 116 /** Minimum minor version supported by this HFile format */ 117 static final int MIN_MINOR_VERSION = 0; 118 119 /** Maximum minor version supported by this HFile format */ 120 // We went to version 2 when we moved to pb'ing fileinfo and the trailer on 121 // the file. This version can read Writables version 1. 122 static final int MAX_MINOR_VERSION = 3; 123 124 /** Minor versions starting with this number have faked index key */ 125 static final int MINOR_VERSION_WITH_FAKED_KEY = 3; 126 127 /** 128 * Opens a HFile. 129 * @param context Reader context info 130 * @param fileInfo HFile info 131 * @param cacheConf Cache configuration. 132 * @param conf Configuration 133 */ 134 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "URF_UNREAD_PUBLIC_OR_PROTECTED_FIELD") 135 public HFileReaderImpl(ReaderContext context, HFileInfo fileInfo, CacheConfig cacheConf, 136 Configuration conf) throws IOException { 137 this.cacheConf = cacheConf; 138 this.context = context; 139 this.path = context.getFilePath(); 140 this.name = path.getName(); 141 this.conf = conf; 142 this.primaryReplicaReader = context.isPrimaryReplicaReader(); 143 this.fileInfo = fileInfo; 144 this.trailer = fileInfo.getTrailer(); 145 this.hfileContext = fileInfo.getHFileContext(); 146 this.fsBlockReader = 147 new HFileBlock.FSReaderImpl(context, hfileContext, cacheConf.getByteBuffAllocator(), conf); 148 this.dataBlockEncoder = HFileDataBlockEncoderImpl.createFromFileInfo(fileInfo); 149 fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 150 dataBlockIndexReader = fileInfo.getDataBlockIndexReader(); 151 metaBlockIndexReader = fileInfo.getMetaBlockIndexReader(); 152 } 153 154 @SuppressWarnings("serial") 155 public static class BlockIndexNotLoadedException extends IllegalStateException { 156 public BlockIndexNotLoadedException(Path path) { 157 // Add a message in case anyone relies on it as opposed to class name. 158 super(path + " block index not loaded"); 159 } 160 } 161 162 private Optional<String> toStringFirstKey() { 163 return getFirstKey().map(CellUtil::getCellKeyAsString); 164 } 165 166 private Optional<String> toStringLastKey() { 167 return getLastKey().map(CellUtil::getCellKeyAsString); 168 } 169 170 @Override 171 public String toString() { 172 return "reader=" + path.toString() 173 + (!isFileInfoLoaded() 174 ? "" 175 : ", compression=" + trailer.getCompressionCodec().getName() + ", cacheConf=" + cacheConf 176 + ", firstKey=" + toStringFirstKey() + ", lastKey=" + toStringLastKey()) 177 + ", avgKeyLen=" + fileInfo.getAvgKeyLen() + ", avgValueLen=" + fileInfo.getAvgValueLen() 178 + ", entries=" + trailer.getEntryCount() + ", length=" + context.getFileSize(); 179 } 180 181 @Override 182 public long length() { 183 return context.getFileSize(); 184 } 185 186 /** 187 * @return the first key in the file. May be null if file has no entries. Note that this is not 188 * the first row key, but rather the byte form of the first KeyValue. 189 */ 190 @Override 191 public Optional<Cell> getFirstKey() { 192 if (dataBlockIndexReader == null) { 193 throw new BlockIndexNotLoadedException(path); 194 } 195 return dataBlockIndexReader.isEmpty() 196 ? Optional.empty() 197 : Optional.of(dataBlockIndexReader.getRootBlockKey(0)); 198 } 199 200 /** 201 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 202 * eliminate {@link KeyValue} here. 203 * @return the first row key, or null if the file is empty. 204 */ 205 @Override 206 public Optional<byte[]> getFirstRowKey() { 207 // We have to copy the row part to form the row key alone 208 return getFirstKey().map(CellUtil::cloneRow); 209 } 210 211 /** 212 * TODO left from {@link HFile} version 1: move this to StoreFile after Ryan's patch goes in to 213 * eliminate {@link KeyValue} here. 214 * @return the last row key, or null if the file is empty. 215 */ 216 @Override 217 public Optional<byte[]> getLastRowKey() { 218 // We have to copy the row part to form the row key alone 219 return getLastKey().map(CellUtil::cloneRow); 220 } 221 222 /** Returns number of KV entries in this HFile */ 223 @Override 224 public long getEntries() { 225 return trailer.getEntryCount(); 226 } 227 228 /** Returns comparator */ 229 @Override 230 public CellComparator getComparator() { 231 return this.hfileContext.getCellComparator(); 232 } 233 234 public Compression.Algorithm getCompressionAlgorithm() { 235 return trailer.getCompressionCodec(); 236 } 237 238 /** 239 * @return the total heap size of data and meta block indexes in bytes. Does not take into account 240 * non-root blocks of a multilevel data index. 241 */ 242 @Override 243 public long indexSize() { 244 return (dataBlockIndexReader != null ? dataBlockIndexReader.heapSize() : 0) 245 + ((metaBlockIndexReader != null) ? metaBlockIndexReader.heapSize() : 0); 246 } 247 248 @Override 249 public String getName() { 250 return name; 251 } 252 253 @Override 254 public void setDataBlockEncoder(HFileDataBlockEncoder dataBlockEncoder) { 255 this.dataBlockEncoder = dataBlockEncoder; 256 this.fsBlockReader.setDataBlockEncoder(dataBlockEncoder, conf); 257 } 258 259 @Override 260 public void setDataBlockIndexReader(HFileBlockIndex.CellBasedKeyBlockIndexReader reader) { 261 this.dataBlockIndexReader = reader; 262 } 263 264 @Override 265 public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() { 266 return dataBlockIndexReader; 267 } 268 269 @Override 270 public void setMetaBlockIndexReader(HFileBlockIndex.ByteArrayKeyBlockIndexReader reader) { 271 this.metaBlockIndexReader = reader; 272 } 273 274 @Override 275 public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() { 276 return metaBlockIndexReader; 277 } 278 279 @Override 280 public FixedFileTrailer getTrailer() { 281 return trailer; 282 } 283 284 @Override 285 public ReaderContext getContext() { 286 return this.context; 287 } 288 289 @Override 290 public HFileInfo getHFileInfo() { 291 return this.fileInfo; 292 } 293 294 @Override 295 public boolean isPrimaryReplicaReader() { 296 return primaryReplicaReader; 297 } 298 299 /** 300 * An exception thrown when an operation requiring a scanner to be seeked is invoked on a scanner 301 * that is not seeked. 302 */ 303 @SuppressWarnings("serial") 304 public static class NotSeekedException extends IllegalStateException { 305 public NotSeekedException(Path path) { 306 super(path + " not seeked to a key/value"); 307 } 308 } 309 310 protected static class HFileScannerImpl implements HFileScanner { 311 private ByteBuff blockBuffer; 312 protected final boolean cacheBlocks; 313 protected final boolean pread; 314 protected final boolean isCompaction; 315 private int currKeyLen; 316 private int currValueLen; 317 private int currMemstoreTSLen; 318 private long currMemstoreTS; 319 protected final HFile.Reader reader; 320 private int currTagsLen; 321 private short rowLen; 322 // buffer backed keyonlyKV 323 private ByteBufferKeyOnlyKeyValue bufBackedKeyOnlyKv = new ByteBufferKeyOnlyKeyValue(); 324 // A pair for reusing in blockSeek() so that we don't garbage lot of objects 325 final ObjectIntPair<ByteBuffer> pair = new ObjectIntPair<>(); 326 327 /** 328 * The next indexed key is to keep track of the indexed key of the next data block. If the 329 * nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the current data block is the 330 * last data block. If the nextIndexedKey is null, it means the nextIndexedKey has not been 331 * loaded yet. 332 */ 333 protected Cell nextIndexedKey; 334 // Current block being used. NOTICE: DON't release curBlock separately except in shipped() or 335 // close() methods. Because the shipped() or close() will do the release finally, even if any 336 // exception occur the curBlock will be released by the close() method (see 337 // RegionScannerImpl#handleException). Call the releaseIfNotCurBlock() to release the 338 // unreferenced block please. 339 protected HFileBlock curBlock; 340 // Whether we returned a result for curBlock's size in recordBlockSize(). 341 // gets reset whenever curBlock is changed. 342 private boolean providedCurrentBlockSize = false; 343 // Previous blocks that were used in the course of the read 344 protected final ArrayList<HFileBlock> prevBlocks = new ArrayList<>(); 345 346 public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, 347 final boolean pread, final boolean isCompaction) { 348 this.reader = reader; 349 this.cacheBlocks = cacheBlocks; 350 this.pread = pread; 351 this.isCompaction = isCompaction; 352 } 353 354 void updateCurrBlockRef(HFileBlock block) { 355 if (block != null && curBlock != null && block.getOffset() == curBlock.getOffset()) { 356 return; 357 } 358 if (this.curBlock != null && this.curBlock.isSharedMem()) { 359 prevBlocks.add(this.curBlock); 360 } 361 this.curBlock = block; 362 this.providedCurrentBlockSize = false; 363 } 364 365 void reset() { 366 // We don't have to keep ref to heap block 367 if (this.curBlock != null && this.curBlock.isSharedMem()) { 368 this.prevBlocks.add(this.curBlock); 369 } 370 this.curBlock = null; 371 } 372 373 private void returnBlocks(boolean returnAll) { 374 this.prevBlocks.forEach(HFileBlock::release); 375 this.prevBlocks.clear(); 376 if (returnAll && this.curBlock != null) { 377 this.curBlock.release(); 378 this.curBlock = null; 379 } 380 } 381 382 @Override 383 public boolean isSeeked() { 384 return blockBuffer != null; 385 } 386 387 @Override 388 public String toString() { 389 return "HFileScanner for reader " + String.valueOf(getReader()); 390 } 391 392 protected void assertSeeked() { 393 if (!isSeeked()) { 394 throw new NotSeekedException(reader.getPath()); 395 } 396 } 397 398 @Override 399 public HFile.Reader getReader() { 400 return reader; 401 } 402 403 // From non encoded HFiles, we always read back KeyValue or its descendant.(Note: When HFile 404 // block is in DBB, it will be OffheapKV). So all parts of the Cell is in a contiguous 405 // array/buffer. How many bytes we should wrap to make the KV is what this method returns. 406 private int getKVBufSize() { 407 int kvBufSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; 408 if (currTagsLen > 0) { 409 kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; 410 } 411 return kvBufSize; 412 } 413 414 @Override 415 public void close() { 416 if (!pread) { 417 // For seek + pread stream socket should be closed when the scanner is closed. HBASE-9393 418 reader.unbufferStream(); 419 } 420 this.returnBlocks(true); 421 } 422 423 @Override 424 public void recordBlockSize(IntConsumer blockSizeConsumer) { 425 if (!providedCurrentBlockSize && curBlock != null) { 426 providedCurrentBlockSize = true; 427 blockSizeConsumer.accept(curBlock.getUncompressedSizeWithoutHeader()); 428 } 429 } 430 431 // Returns the #bytes in HFile for the current cell. Used to skip these many bytes in current 432 // HFile block's buffer so as to position to the next cell. 433 private int getCurCellSerializedSize() { 434 int curCellSize = KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen; 435 if (this.reader.getFileContext().isIncludesTags()) { 436 curCellSize += Bytes.SIZEOF_SHORT + currTagsLen; 437 } 438 return curCellSize; 439 } 440 441 protected void readKeyValueLen() { 442 // This is a hot method. We go out of our way to make this method short so it can be 443 // inlined and is not too big to compile. We also manage position in ByteBuffer ourselves 444 // because it is faster than going via range-checked ByteBuffer methods or going through a 445 // byte buffer array a byte at a time. 446 // Get a long at a time rather than read two individual ints. In micro-benchmarking, even 447 // with the extra bit-fiddling, this is order-of-magnitude faster than getting two ints. 448 // Trying to imitate what was done - need to profile if this is better or 449 // earlier way is better by doing mark and reset? 450 // But ensure that you read long instead of two ints 451 long ll = blockBuffer.getLongAfterPosition(0); 452 // Read top half as an int of key length and bottom int as value length 453 this.currKeyLen = (int) (ll >> Integer.SIZE); 454 this.currValueLen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 455 checkKeyValueLen(); 456 this.rowLen = blockBuffer.getShortAfterPosition(Bytes.SIZEOF_LONG); 457 // Move position past the key and value lengths and then beyond the key and value 458 int p = (Bytes.SIZEOF_LONG + currKeyLen + currValueLen); 459 if (reader.getFileContext().isIncludesTags()) { 460 // Tags length is a short. 461 this.currTagsLen = blockBuffer.getShortAfterPosition(p); 462 checkTagsLen(); 463 p += (Bytes.SIZEOF_SHORT + currTagsLen); 464 } 465 readMvccVersion(p); 466 } 467 468 private final void checkTagsLen() { 469 if (checkLen(this.currTagsLen)) { 470 throw new IllegalStateException( 471 "Invalid currTagsLen " + this.currTagsLen + ". Block offset: " + curBlock.getOffset() 472 + ", block length: " + this.blockBuffer.limit() + ", position: " 473 + this.blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 474 } 475 } 476 477 /** 478 * Read mvcc. Does checks to see if we even need to read the mvcc at all. 479 */ 480 protected void readMvccVersion(final int offsetFromPos) { 481 // See if we even need to decode mvcc. 482 if (!this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 483 return; 484 } 485 if (!this.reader.getHFileInfo().isDecodeMemstoreTS()) { 486 currMemstoreTS = 0; 487 currMemstoreTSLen = 1; 488 return; 489 } 490 _readMvccVersion(offsetFromPos); 491 } 492 493 /** 494 * Actually do the mvcc read. Does no checks. 495 */ 496 private void _readMvccVersion(int offsetFromPos) { 497 // This is Bytes#bytesToVint inlined so can save a few instructions in this hot method; i.e. 498 // previous if one-byte vint, we'd redo the vint call to find int size. 499 // Also the method is kept small so can be inlined. 500 byte firstByte = blockBuffer.getByteAfterPosition(offsetFromPos); 501 int len = WritableUtils.decodeVIntSize(firstByte); 502 if (len == 1) { 503 this.currMemstoreTS = firstByte; 504 } else { 505 int remaining = len - 1; 506 long i = 0; 507 offsetFromPos++; 508 if (remaining >= Bytes.SIZEOF_INT) { 509 // The int read has to be converted to unsigned long so the & op 510 i = (blockBuffer.getIntAfterPosition(offsetFromPos) & 0x00000000ffffffffL); 511 remaining -= Bytes.SIZEOF_INT; 512 offsetFromPos += Bytes.SIZEOF_INT; 513 } 514 if (remaining >= Bytes.SIZEOF_SHORT) { 515 short s = blockBuffer.getShortAfterPosition(offsetFromPos); 516 i = i << 16; 517 i = i | (s & 0xFFFF); 518 remaining -= Bytes.SIZEOF_SHORT; 519 offsetFromPos += Bytes.SIZEOF_SHORT; 520 } 521 for (int idx = 0; idx < remaining; idx++) { 522 byte b = blockBuffer.getByteAfterPosition(offsetFromPos + idx); 523 i = i << 8; 524 i = i | (b & 0xFF); 525 } 526 currMemstoreTS = (WritableUtils.isNegativeVInt(firstByte) ? ~i : i); 527 } 528 this.currMemstoreTSLen = len; 529 } 530 531 /** 532 * Within a loaded block, seek looking for the last key that is smaller than (or equal to?) the 533 * key we are interested in. A note on the seekBefore: if you have seekBefore = true, AND the 534 * first key in the block = key, then you'll get thrown exceptions. The caller has to check for 535 * that case and load the previous block as appropriate. the key to find find the key before the 536 * given key in case of exact match. 537 * @return 0 in case of an exact key match, 1 in case of an inexact match, -2 in case of an 538 * inexact match and furthermore, the input key less than the first key of current 539 * block(e.g. using a faked index key) 540 */ 541 protected int blockSeek(Cell key, boolean seekBefore) { 542 int klen, vlen, tlen = 0; 543 int lastKeyValueSize = -1; 544 int offsetFromPos; 545 do { 546 offsetFromPos = 0; 547 // Better to ensure that we use the BB Utils here 548 long ll = blockBuffer.getLongAfterPosition(offsetFromPos); 549 klen = (int) (ll >> Integer.SIZE); 550 vlen = (int) (Bytes.MASK_FOR_LOWER_INT_IN_LONG ^ ll); 551 if (checkKeyLen(klen) || checkLen(vlen)) { 552 throw new IllegalStateException( 553 "Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " + curBlock.getOffset() 554 + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position() 555 + " (without header)." + " path=" + reader.getPath()); 556 } 557 offsetFromPos += Bytes.SIZEOF_LONG; 558 this.rowLen = blockBuffer.getShortAfterPosition(offsetFromPos); 559 blockBuffer.asSubByteBuffer(blockBuffer.position() + offsetFromPos, klen, pair); 560 bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), klen, rowLen); 561 int comp = 562 PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, bufBackedKeyOnlyKv); 563 offsetFromPos += klen + vlen; 564 if (this.reader.getFileContext().isIncludesTags()) { 565 // Read short as unsigned, high byte first 566 tlen = ((blockBuffer.getByteAfterPosition(offsetFromPos) & 0xff) << 8) 567 ^ (blockBuffer.getByteAfterPosition(offsetFromPos + 1) & 0xff); 568 if (checkLen(tlen)) { 569 throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " 570 + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " 571 + blockBuffer.position() + " (without header)." + " path=" + reader.getPath()); 572 } 573 // add the two bytes read for the tags. 574 offsetFromPos += tlen + (Bytes.SIZEOF_SHORT); 575 } 576 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 577 // Directly read the mvcc based on current position 578 readMvccVersion(offsetFromPos); 579 } 580 if (comp == 0) { 581 if (seekBefore) { 582 if (lastKeyValueSize < 0) { 583 throw new IllegalStateException("blockSeek with seekBefore " 584 + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) 585 + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" 586 + curBlock.getOnDiskSizeWithHeader() + ", path=" + reader.getPath()); 587 } 588 blockBuffer.moveBack(lastKeyValueSize); 589 readKeyValueLen(); 590 return 1; // non exact match. 591 } 592 currKeyLen = klen; 593 currValueLen = vlen; 594 currTagsLen = tlen; 595 return 0; // indicate exact match 596 } else if (comp < 0) { 597 if (lastKeyValueSize > 0) { 598 blockBuffer.moveBack(lastKeyValueSize); 599 } 600 readKeyValueLen(); 601 if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { 602 return HConstants.INDEX_KEY_MAGIC; 603 } 604 return 1; 605 } 606 // The size of this key/value tuple, including key/value length fields. 607 lastKeyValueSize = klen + vlen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE; 608 // include tag length also if tags included with KV 609 if (reader.getFileContext().isIncludesTags()) { 610 lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; 611 } 612 blockBuffer.skip(lastKeyValueSize); 613 } while (blockBuffer.hasRemaining()); 614 615 // Seek to the last key we successfully read. This will happen if this is 616 // the last key/value pair in the file, in which case the following call 617 // to next() has to return false. 618 blockBuffer.moveBack(lastKeyValueSize); 619 readKeyValueLen(); 620 return 1; // didn't exactly find it. 621 } 622 623 @Override 624 public Cell getNextIndexedKey() { 625 return nextIndexedKey; 626 } 627 628 @Override 629 public int seekTo(Cell key) throws IOException { 630 return seekTo(key, true); 631 } 632 633 @Override 634 public int reseekTo(Cell key) throws IOException { 635 int compared; 636 if (isSeeked()) { 637 compared = compareKey(reader.getComparator(), key); 638 if (compared < 1) { 639 // If the required key is less than or equal to current key, then 640 // don't do anything. 641 return compared; 642 } else { 643 // The comparison with no_next_index_key has to be checked 644 if ( 645 this.nextIndexedKey != null && (this.nextIndexedKey 646 == KeyValueScanner.NO_NEXT_INDEXED_KEY 647 || PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), key, nextIndexedKey) 648 < 0) 649 ) { 650 // The reader shall continue to scan the current data block instead 651 // of querying the 652 // block index as long as it knows the target key is strictly 653 // smaller than 654 // the next indexed key or the current data block is the last data 655 // block. 656 return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, false); 657 } 658 } 659 } 660 // Don't rewind on a reseek operation, because reseek implies that we are 661 // always going forward in the file. 662 return seekTo(key, false); 663 } 664 665 /** 666 * An internal API function. Seek to the given key, optionally rewinding to the first key of the 667 * block before doing the seek. 668 * @param key - a cell representing the key that we need to fetch 669 * @param rewind whether to rewind to the first key of the block before doing the seek. If this 670 * is false, we are assuming we never go back, otherwise the result is undefined. 671 * @return -1 if the key is earlier than the first key of the file, 0 if we are at the given 672 * key, 1 if we are past the given key -2 if the key is earlier than the first key of 673 * the file while using a faked index key 674 */ 675 public int seekTo(Cell key, boolean rewind) throws IOException { 676 HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); 677 BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, curBlock, 678 cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding(), reader); 679 if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { 680 // This happens if the key e.g. falls before the beginning of the file. 681 return -1; 682 } 683 return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), 684 blockWithScanInfo.getNextIndexedKey(), rewind, key, false); 685 } 686 687 @Override 688 public boolean seekBefore(Cell key) throws IOException { 689 HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, curBlock, 690 cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction), reader); 691 if (seekToBlock == null) { 692 return false; 693 } 694 Cell firstKey = getFirstKeyCellInBlock(seekToBlock); 695 if (PrivateCellUtil.compareKeyIgnoresMvcc(reader.getComparator(), firstKey, key) >= 0) { 696 long previousBlockOffset = seekToBlock.getPrevBlockOffset(); 697 // The key we are interested in 698 if (previousBlockOffset == -1) { 699 // we have a 'problem', the key we want is the first of the file. 700 releaseIfNotCurBlock(seekToBlock); 701 return false; 702 } 703 704 // The first key in the current block 'seekToBlock' is greater than the given 705 // seekBefore key. We will go ahead by reading the next block that satisfies the 706 // given key. Return the current block before reading the next one. 707 releaseIfNotCurBlock(seekToBlock); 708 // It is important that we compute and pass onDiskSize to the block 709 // reader so that it does not have to read the header separately to 710 // figure out the size. Currently, we do not have a way to do this 711 // correctly in the general case however. 712 // TODO: See https://issues.apache.org/jira/browse/HBASE-14576 713 int prevBlockSize = -1; 714 seekToBlock = reader.readBlock(previousBlockOffset, prevBlockSize, cacheBlocks, pread, 715 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 716 // TODO shortcut: seek forward in this block to the last key of the 717 // block. 718 } 719 loadBlockAndSeekToKey(seekToBlock, firstKey, true, key, true); 720 return true; 721 } 722 723 /** 724 * The curBlock will be released by shipping or close method, so only need to consider releasing 725 * the block, which was read from HFile before and not referenced by curBlock. 726 */ 727 protected void releaseIfNotCurBlock(HFileBlock block) { 728 if (curBlock != block) { 729 block.release(); 730 } 731 } 732 733 /** 734 * Scans blocks in the "scanned" section of the {@link HFile} until the next data block is 735 * found. 736 * @return the next block, or null if there are no more data blocks 737 */ 738 @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NP_NULL_ON_SOME_PATH", 739 justification = "Yeah, unnecessary null check; could do w/ clean up") 740 protected HFileBlock readNextDataBlock() throws IOException { 741 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 742 if (curBlock == null) { 743 return null; 744 } 745 HFileBlock block = this.curBlock; 746 do { 747 if (block.getOffset() >= lastDataBlockOffset) { 748 releaseIfNotCurBlock(block); 749 return null; 750 } 751 if (block.getOffset() < 0) { 752 releaseIfNotCurBlock(block); 753 throw new IOException("Invalid block offset=" + block + ", path=" + reader.getPath()); 754 } 755 // We are reading the next block without block type validation, because 756 // it might turn out to be a non-data block. 757 block = reader.readBlock(block.getOffset() + block.getOnDiskSizeWithHeader(), 758 block.getNextBlockOnDiskSize(), cacheBlocks, pread, isCompaction, true, null, 759 getEffectiveDataBlockEncoding()); 760 if (block != null && !block.getBlockType().isData()) { 761 // Whatever block we read we will be returning it unless 762 // it is a datablock. Just in case the blocks are non data blocks 763 block.release(); 764 } 765 } while (!block.getBlockType().isData()); 766 return block; 767 } 768 769 public DataBlockEncoding getEffectiveDataBlockEncoding() { 770 return this.reader.getEffectiveEncodingInCache(isCompaction); 771 } 772 773 @Override 774 public Cell getCell() { 775 if (!isSeeked()) { 776 return null; 777 } 778 779 Cell ret; 780 int cellBufSize = getKVBufSize(); 781 long seqId = 0L; 782 if (this.reader.getHFileInfo().shouldIncludeMemStoreTS()) { 783 seqId = currMemstoreTS; 784 } 785 if (blockBuffer.hasArray()) { 786 // TODO : reduce the varieties of KV here. Check if based on a boolean 787 // we can handle the 'no tags' case. 788 if (currTagsLen > 0) { 789 ret = new SizeCachedKeyValue(blockBuffer.array(), 790 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 791 rowLen); 792 } else { 793 ret = new SizeCachedNoTagsKeyValue(blockBuffer.array(), 794 blockBuffer.arrayOffset() + blockBuffer.position(), cellBufSize, seqId, currKeyLen, 795 rowLen); 796 } 797 } else { 798 ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); 799 if (buf.isDirect()) { 800 ret = currTagsLen > 0 801 ? new SizeCachedByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, currKeyLen, 802 rowLen) 803 : new SizeCachedNoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId, 804 currKeyLen, rowLen); 805 } else { 806 if (currTagsLen > 0) { 807 ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 808 cellBufSize, seqId, currKeyLen, rowLen); 809 } else { 810 ret = new SizeCachedNoTagsKeyValue(buf.array(), buf.arrayOffset() + buf.position(), 811 cellBufSize, seqId, currKeyLen, rowLen); 812 } 813 } 814 } 815 return ret; 816 } 817 818 @Override 819 public Cell getKey() { 820 assertSeeked(); 821 // Create a new object so that this getKey is cached as firstKey, lastKey 822 ObjectIntPair<ByteBuffer> keyPair = new ObjectIntPair<>(); 823 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, keyPair); 824 ByteBuffer keyBuf = keyPair.getFirst(); 825 if (keyBuf.hasArray()) { 826 return new KeyValue.KeyOnlyKeyValue(keyBuf.array(), 827 keyBuf.arrayOffset() + keyPair.getSecond(), currKeyLen); 828 } else { 829 // Better to do a copy here instead of holding on to this BB so that 830 // we could release the blocks referring to this key. This key is specifically used 831 // in HalfStoreFileReader to get the firstkey and lastkey by creating a new scanner 832 // every time. So holding onto the BB (incase of DBB) is not advised here. 833 byte[] key = new byte[currKeyLen]; 834 ByteBufferUtils.copyFromBufferToArray(key, keyBuf, keyPair.getSecond(), 0, currKeyLen); 835 return new KeyValue.KeyOnlyKeyValue(key, 0, currKeyLen); 836 } 837 } 838 839 @Override 840 public ByteBuffer getValue() { 841 assertSeeked(); 842 // Okie to create new Pair. Not used in hot path 843 ObjectIntPair<ByteBuffer> valuePair = new ObjectIntPair<>(); 844 this.blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen, 845 currValueLen, valuePair); 846 ByteBuffer valBuf = valuePair.getFirst().duplicate(); 847 valBuf.position(valuePair.getSecond()); 848 valBuf.limit(currValueLen + valuePair.getSecond()); 849 return valBuf.slice(); 850 } 851 852 protected void setNonSeekedState() { 853 reset(); 854 blockBuffer = null; 855 currKeyLen = 0; 856 currValueLen = 0; 857 currMemstoreTS = 0; 858 currMemstoreTSLen = 0; 859 currTagsLen = 0; 860 } 861 862 /** 863 * Set the position on current backing blockBuffer. 864 */ 865 private void positionThisBlockBuffer() { 866 try { 867 blockBuffer.skip(getCurCellSerializedSize()); 868 } catch (IllegalArgumentException e) { 869 LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen 870 + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit() 871 + "; currBlock currBlockOffset = " + this.curBlock.getOffset() + "; path=" 872 + reader.getPath()); 873 throw e; 874 } 875 } 876 877 /** 878 * Set our selves up for the next 'next' invocation, set up next block. 879 * @return True is more to read else false if at the end. 880 */ 881 private boolean positionForNextBlock() throws IOException { 882 // Methods are small so they get inlined because they are 'hot'. 883 long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); 884 if (this.curBlock.getOffset() >= lastDataBlockOffset) { 885 setNonSeekedState(); 886 return false; 887 } 888 return isNextBlock(); 889 } 890 891 private boolean isNextBlock() throws IOException { 892 // Methods are small so they get inlined because they are 'hot'. 893 HFileBlock nextBlock = readNextDataBlock(); 894 if (nextBlock == null) { 895 setNonSeekedState(); 896 return false; 897 } 898 updateCurrentBlock(nextBlock); 899 return true; 900 } 901 902 private final boolean _next() throws IOException { 903 // Small method so can be inlined. It is a hot one. 904 if (blockBuffer.remaining() <= 0) { 905 return positionForNextBlock(); 906 } 907 908 // We are still in the same block. 909 readKeyValueLen(); 910 return true; 911 } 912 913 /** 914 * Go to the next key/value in the block section. Loads the next block if necessary. If 915 * successful, {@link #getKey()} and {@link #getValue()} can be called. 916 * @return true if successfully navigated to the next key/value 917 */ 918 @Override 919 public boolean next() throws IOException { 920 // This is a hot method so extreme measures taken to ensure it is small and inlineable. 921 // Checked by setting: -XX:+UnlockDiagnosticVMOptions -XX:+PrintInlining -XX:+PrintCompilation 922 assertSeeked(); 923 positionThisBlockBuffer(); 924 return _next(); 925 } 926 927 /** 928 * Positions this scanner at the start of the file. 929 * @return false if empty file; i.e. a call to next would return false and the current key and 930 * value are undefined. 931 */ 932 @Override 933 public boolean seekTo() throws IOException { 934 if (reader == null) { 935 return false; 936 } 937 938 if (reader.getTrailer().getEntryCount() == 0) { 939 // No data blocks. 940 return false; 941 } 942 943 long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); 944 if (curBlock != null && curBlock.getOffset() == firstDataBlockOffset) { 945 return processFirstDataBlock(); 946 } 947 948 readAndUpdateNewBlock(firstDataBlockOffset); 949 return true; 950 } 951 952 protected boolean processFirstDataBlock() throws IOException { 953 blockBuffer.rewind(); 954 readKeyValueLen(); 955 return true; 956 } 957 958 protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException { 959 HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, 960 isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); 961 if (newBlock.getOffset() < 0) { 962 releaseIfNotCurBlock(newBlock); 963 throw new IOException( 964 "Invalid offset=" + newBlock.getOffset() + ", path=" + reader.getPath()); 965 } 966 updateCurrentBlock(newBlock); 967 } 968 969 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, 970 Cell key, boolean seekBefore) throws IOException { 971 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 972 updateCurrentBlock(seekToBlock); 973 } else if (rewind) { 974 blockBuffer.rewind(); 975 } 976 // Update the nextIndexedKey 977 this.nextIndexedKey = nextIndexedKey; 978 return blockSeek(key, seekBefore); 979 } 980 981 /** Returns True if v <= 0 or v > current block buffer limit. */ 982 protected final boolean checkKeyLen(final int v) { 983 return v <= 0 || v > this.blockBuffer.limit(); 984 } 985 986 /** Returns True if v < 0 or v > current block buffer limit. */ 987 protected final boolean checkLen(final int v) { 988 return v < 0 || v > this.blockBuffer.limit(); 989 } 990 991 /** 992 * Check key and value lengths are wholesome. 993 */ 994 protected final void checkKeyValueLen() { 995 if (checkKeyLen(this.currKeyLen) || checkLen(this.currValueLen)) { 996 throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen 997 + " or currValueLen " + this.currValueLen + ". Block offset: " + this.curBlock.getOffset() 998 + ", block length: " + this.blockBuffer.limit() + ", position: " 999 + this.blockBuffer.position() + " (without header)." + ", path=" + reader.getPath()); 1000 } 1001 } 1002 1003 /** 1004 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1005 * key/value pair. 1006 * @param newBlock the block read by {@link HFileReaderImpl#readBlock}, it's a totally new block 1007 * with new allocated {@link ByteBuff}, so if no further reference to this 1008 * block, we should release it carefully. 1009 */ 1010 protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { 1011 try { 1012 if (newBlock.getBlockType() != BlockType.DATA) { 1013 throw new IllegalStateException( 1014 "ScannerV2 works only on data blocks, got " + newBlock.getBlockType() + "; " 1015 + "HFileName=" + reader.getPath() + ", " + "dataBlockEncoder=" 1016 + reader.getDataBlockEncoding() + ", " + "isCompaction=" + isCompaction); 1017 } 1018 updateCurrBlockRef(newBlock); 1019 blockBuffer = newBlock.getBufferWithoutHeader(); 1020 readKeyValueLen(); 1021 } finally { 1022 releaseIfNotCurBlock(newBlock); 1023 } 1024 // Reset the next indexed key 1025 this.nextIndexedKey = null; 1026 } 1027 1028 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1029 ByteBuff buffer = curBlock.getBufferWithoutHeader(); 1030 // It is safe to manipulate this buffer because we own the buffer object. 1031 buffer.rewind(); 1032 int klen = buffer.getInt(); 1033 buffer.skip(Bytes.SIZEOF_INT);// Skip value len part 1034 ByteBuffer keyBuff = buffer.asSubByteBuffer(klen); 1035 if (keyBuff.hasArray()) { 1036 return new KeyValue.KeyOnlyKeyValue(keyBuff.array(), 1037 keyBuff.arrayOffset() + keyBuff.position(), klen); 1038 } else { 1039 return new ByteBufferKeyOnlyKeyValue(keyBuff, keyBuff.position(), klen); 1040 } 1041 } 1042 1043 @Override 1044 public String getKeyString() { 1045 return CellUtil.toString(getKey(), false); 1046 } 1047 1048 @Override 1049 public String getValueString() { 1050 return ByteBufferUtils.toStringBinary(getValue()); 1051 } 1052 1053 public int compareKey(CellComparator comparator, Cell key) { 1054 blockBuffer.asSubByteBuffer(blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen, pair); 1055 this.bufBackedKeyOnlyKv.setKey(pair.getFirst(), pair.getSecond(), currKeyLen, rowLen); 1056 return PrivateCellUtil.compareKeyIgnoresMvcc(comparator, key, this.bufBackedKeyOnlyKv); 1057 } 1058 1059 @Override 1060 public void shipped() throws IOException { 1061 this.returnBlocks(false); 1062 } 1063 } 1064 1065 @Override 1066 public Path getPath() { 1067 return path; 1068 } 1069 1070 @Override 1071 public DataBlockEncoding getDataBlockEncoding() { 1072 return dataBlockEncoder.getDataBlockEncoding(); 1073 } 1074 1075 @Override 1076 public Configuration getConf() { 1077 return conf; 1078 } 1079 1080 @Override 1081 public void setConf(Configuration conf) { 1082 this.conf = conf; 1083 } 1084 1085 /** Minor versions in HFile starting with this number have hbase checksums */ 1086 public static final int MINOR_VERSION_WITH_CHECKSUM = 1; 1087 /** In HFile minor version that does not support checksums */ 1088 public static final int MINOR_VERSION_NO_CHECKSUM = 0; 1089 1090 /** HFile minor version that introduced pbuf filetrailer */ 1091 public static final int PBUF_TRAILER_MINOR_VERSION = 2; 1092 1093 /** 1094 * The size of a (key length, value length) tuple that prefixes each entry in a data block. 1095 */ 1096 public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; 1097 1098 /** 1099 * Retrieve block from cache. Validates the retrieved block's type vs {@code expectedBlockType} 1100 * and its encoding vs. {@code expectedDataBlockEncoding}. Unpacks the block as necessary. 1101 */ 1102 private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, 1103 boolean updateCacheMetrics, BlockType expectedBlockType, 1104 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1105 // Check cache for block. If found return. 1106 BlockCache cache = cacheConf.getBlockCache().orElse(null); 1107 if (cache != null) { 1108 HFileBlock cachedBlock = 1109 (HFileBlock) cache.getBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics); 1110 if (cachedBlock != null) { 1111 if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { 1112 HFileBlock compressedBlock = cachedBlock; 1113 cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1114 // In case of compressed block after unpacking we can release the compressed block 1115 if (compressedBlock != cachedBlock) { 1116 compressedBlock.release(); 1117 } 1118 } 1119 try { 1120 validateBlockType(cachedBlock, expectedBlockType); 1121 } catch (IOException e) { 1122 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1123 throw e; 1124 } 1125 1126 if (expectedDataBlockEncoding == null) { 1127 return cachedBlock; 1128 } 1129 DataBlockEncoding actualDataBlockEncoding = cachedBlock.getDataBlockEncoding(); 1130 // Block types other than data blocks always have 1131 // DataBlockEncoding.NONE. To avoid false negative cache misses, only 1132 // perform this check if cached block is a data block. 1133 if ( 1134 cachedBlock.getBlockType().isData() 1135 && !actualDataBlockEncoding.equals(expectedDataBlockEncoding) 1136 ) { 1137 // This mismatch may happen if a Scanner, which is used for say a 1138 // compaction, tries to read an encoded block from the block cache. 1139 // The reverse might happen when an EncodedScanner tries to read 1140 // un-encoded blocks which were cached earlier. 1141 // 1142 // Because returning a data block with an implicit BlockType mismatch 1143 // will cause the requesting scanner to throw a disk read should be 1144 // forced here. This will potentially cause a significant number of 1145 // cache misses, so update so we should keep track of this as it might 1146 // justify the work on a CompoundScanner. 1147 if ( 1148 !expectedDataBlockEncoding.equals(DataBlockEncoding.NONE) 1149 && !actualDataBlockEncoding.equals(DataBlockEncoding.NONE) 1150 ) { 1151 // If the block is encoded but the encoding does not match the 1152 // expected encoding it is likely the encoding was changed but the 1153 // block was not yet evicted. Evictions on file close happen async 1154 // so blocks with the old encoding still linger in cache for some 1155 // period of time. This event should be rare as it only happens on 1156 // schema definition change. 1157 LOG.info( 1158 "Evicting cached block with key {} because data block encoding mismatch; " 1159 + "expected {}, actual {}, path={}", 1160 cacheKey, actualDataBlockEncoding, expectedDataBlockEncoding, path); 1161 // This is an error scenario. so here we need to release the block. 1162 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1163 } 1164 return null; 1165 } 1166 return cachedBlock; 1167 } 1168 } 1169 return null; 1170 } 1171 1172 private void returnAndEvictBlock(BlockCache cache, BlockCacheKey cacheKey, Cacheable block) { 1173 block.release(); 1174 cache.evictBlock(cacheKey); 1175 } 1176 1177 /** 1178 * @param cacheBlock Add block to cache, if found 1179 * @return block wrapped in a ByteBuffer, with header skipped 1180 */ 1181 @Override 1182 public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException { 1183 if (trailer.getMetaIndexCount() == 0) { 1184 return null; // there are no meta blocks 1185 } 1186 if (metaBlockIndexReader == null) { 1187 throw new IOException(path + " meta index not loaded"); 1188 } 1189 1190 byte[] mbname = Bytes.toBytes(metaBlockName); 1191 int block = metaBlockIndexReader.rootBlockContainingKey(mbname, 0, mbname.length); 1192 if (block == -1) { 1193 return null; 1194 } 1195 long blockSize = metaBlockIndexReader.getRootBlockDataSize(block); 1196 1197 // Per meta key from any given file, synchronize reads for said block. This 1198 // is OK to do for meta blocks because the meta block index is always 1199 // single-level. 1200 synchronized (metaBlockIndexReader.getRootBlockKey(block)) { 1201 // Check cache for block. If found return. 1202 long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); 1203 BlockCacheKey cacheKey = 1204 new BlockCacheKey(name, metaBlockOffset, this.isPrimaryReplicaReader(), BlockType.META); 1205 1206 cacheBlock &= cacheConf.shouldCacheBlockOnRead(BlockType.META.getCategory()); 1207 HFileBlock cachedBlock = 1208 getCachedBlock(cacheKey, cacheBlock, false, true, BlockType.META, null); 1209 if (cachedBlock != null) { 1210 assert cachedBlock.isUnpacked() : "Packed block leak."; 1211 // Return a distinct 'shallow copy' of the block, 1212 // so pos does not get messed by the scanner 1213 return cachedBlock; 1214 } 1215 // Cache Miss, please load. 1216 1217 HFileBlock compressedBlock = 1218 fsBlockReader.readBlockData(metaBlockOffset, blockSize, true, false, true); 1219 HFileBlock uncompressedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); 1220 if (compressedBlock != uncompressedBlock) { 1221 compressedBlock.release(); 1222 } 1223 1224 // Cache the block 1225 if (cacheBlock) { 1226 cacheConf.getBlockCache().ifPresent( 1227 cache -> cache.cacheBlock(cacheKey, uncompressedBlock, cacheConf.isInMemory())); 1228 } 1229 return uncompressedBlock; 1230 } 1231 } 1232 1233 /** 1234 * Whether we use heap or not depends on our intent to cache the block. We want to avoid 1235 * allocating to off-heap if we intend to cache into the on-heap L1 cache. Otherwise, it's more 1236 * efficient to allocate to off-heap since we can control GC ourselves for those. So our decision 1237 * here breaks down as follows: <br> 1238 * If block cache is disabled, don't use heap. If we're not using the CombinedBlockCache, use heap 1239 * unless caching is disabled for the request. Otherwise, only use heap if caching is enabled and 1240 * the expected block type is not DATA (which goes to off-heap L2 in combined cache). 1241 * @see org.apache.hadoop.hbase.io.hfile.HFileBlock.FSReader#readBlockData(long, long, boolean, 1242 * boolean, boolean) 1243 */ 1244 private boolean shouldUseHeap(BlockType expectedBlockType, boolean cacheBlock) { 1245 if (!cacheConf.getBlockCache().isPresent()) { 1246 return false; 1247 } 1248 1249 // we only cache a block if cacheBlock is true and caching-on-read is enabled in CacheConfig 1250 // we can really only check for that if have an expectedBlockType 1251 if (expectedBlockType != null) { 1252 cacheBlock &= cacheConf.shouldCacheBlockOnRead(expectedBlockType.getCategory()); 1253 } 1254 1255 if (!cacheConf.isCombinedBlockCache()) { 1256 // Block to cache in LruBlockCache must be an heap one, if caching enabled. So just allocate 1257 // block memory from heap for saving an extra off-heap to heap copying in that case. 1258 return cacheBlock; 1259 } 1260 1261 return cacheBlock && expectedBlockType != null && !expectedBlockType.isData(); 1262 } 1263 1264 @Override 1265 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock, 1266 boolean pread, final boolean isCompaction, boolean updateCacheMetrics, 1267 BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException { 1268 return readBlock(dataBlockOffset, onDiskBlockSize, cacheBlock, pread, isCompaction, 1269 updateCacheMetrics, expectedBlockType, expectedDataBlockEncoding, false); 1270 } 1271 1272 @Override 1273 public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, final boolean cacheBlock, 1274 boolean pread, final boolean isCompaction, boolean updateCacheMetrics, 1275 BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) 1276 throws IOException { 1277 if (dataBlockIndexReader == null) { 1278 throw new IOException(path + " block index not loaded"); 1279 } 1280 long trailerOffset = trailer.getLoadOnOpenDataOffset(); 1281 if (dataBlockOffset < 0 || dataBlockOffset >= trailerOffset) { 1282 throw new IOException("Requested block is out of range: " + dataBlockOffset 1283 + ", lastDataBlockOffset: " + trailer.getLastDataBlockOffset() 1284 + ", trailer.getLoadOnOpenDataOffset: " + trailerOffset + ", path=" + path); 1285 } 1286 // For any given block from any given file, synchronize reads for said 1287 // block. 1288 // Without a cache, this synchronizing is needless overhead, but really 1289 // the other choice is to duplicate work (which the cache would prevent you 1290 // from doing). 1291 1292 BlockCacheKey cacheKey = 1293 new BlockCacheKey(name, dataBlockOffset, this.isPrimaryReplicaReader(), expectedBlockType); 1294 Attributes attributes = Attributes.of(BLOCK_CACHE_KEY_KEY, cacheKey.toString()); 1295 1296 boolean cacheable = cacheBlock && cacheIfCompactionsOff(); 1297 1298 boolean useLock = false; 1299 IdLock.Entry lockEntry = null; 1300 final Span span = Span.current(); 1301 try { 1302 while (true) { 1303 // Check cache for block. If found return. 1304 if (cacheConf.shouldReadBlockFromCache(expectedBlockType) && !cacheOnly) { 1305 if (useLock) { 1306 lockEntry = offsetLock.getLockEntry(dataBlockOffset); 1307 } 1308 // Try and get the block from the block cache. If the useLock variable is true then this 1309 // is the second time through the loop and it should not be counted as a block cache miss. 1310 HFileBlock cachedBlock = getCachedBlock(cacheKey, cacheBlock, useLock, updateCacheMetrics, 1311 expectedBlockType, expectedDataBlockEncoding); 1312 if (cachedBlock != null) { 1313 if (LOG.isTraceEnabled()) { 1314 LOG.trace("Block for file {} is coming from Cache {}", 1315 Bytes.toString(cachedBlock.getHFileContext().getTableName()), cachedBlock); 1316 } 1317 span.addEvent("block cache hit", attributes); 1318 assert cachedBlock.isUnpacked() : "Packed block leak."; 1319 if (cachedBlock.getBlockType().isData()) { 1320 if (updateCacheMetrics) { 1321 HFile.DATABLOCK_READ_COUNT.increment(); 1322 } 1323 // Validate encoding type for data blocks. We include encoding 1324 // type in the cache key, and we expect it to match on a cache hit. 1325 if (cachedBlock.getDataBlockEncoding() != dataBlockEncoder.getDataBlockEncoding()) { 1326 // Remember to release the block when in exceptional path. 1327 cacheConf.getBlockCache().ifPresent(cache -> { 1328 returnAndEvictBlock(cache, cacheKey, cachedBlock); 1329 }); 1330 throw new IOException("Cached block under key " + cacheKey + " " 1331 + "has wrong encoding: " + cachedBlock.getDataBlockEncoding() + " (expected: " 1332 + dataBlockEncoder.getDataBlockEncoding() + "), path=" + path); 1333 } 1334 } 1335 // Cache-hit. Return! 1336 return cachedBlock; 1337 } 1338 1339 if (!useLock && cacheable && cacheConf.shouldLockOnCacheMiss(expectedBlockType)) { 1340 // check cache again with lock 1341 useLock = true; 1342 continue; 1343 } 1344 // Carry on, please load. 1345 } 1346 1347 span.addEvent("block cache miss", attributes); 1348 // Load block from filesystem. 1349 HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, pread, 1350 !isCompaction, shouldUseHeap(expectedBlockType, cacheable)); 1351 try { 1352 validateBlockType(hfileBlock, expectedBlockType); 1353 } catch (IOException e) { 1354 hfileBlock.release(); 1355 throw e; 1356 } 1357 BlockType.BlockCategory category = hfileBlock.getBlockType().getCategory(); 1358 final boolean cacheCompressed = cacheConf.shouldCacheCompressed(category); 1359 final boolean cacheOnRead = cacheConf.shouldCacheBlockOnRead(category); 1360 1361 // Don't need the unpacked block back and we're storing the block in the cache compressed 1362 if (cacheOnly && cacheCompressed && cacheOnRead) { 1363 cacheConf.getBlockCache().ifPresent(cache -> { 1364 LOG.debug("Skipping decompression of block {} in prefetch", cacheKey); 1365 // Cache the block if necessary 1366 if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { 1367 cache.cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory(), cacheOnly); 1368 } 1369 }); 1370 1371 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1372 HFile.DATABLOCK_READ_COUNT.increment(); 1373 } 1374 return hfileBlock; 1375 } 1376 HFileBlock unpacked = hfileBlock.unpack(hfileContext, fsBlockReader); 1377 // Cache the block if necessary 1378 cacheConf.getBlockCache().ifPresent(cache -> { 1379 if (cacheable && cacheConf.shouldCacheBlockOnRead(category)) { 1380 // Using the wait on cache during compaction and prefetching. 1381 cache.cacheBlock(cacheKey, cacheCompressed ? hfileBlock : unpacked, 1382 cacheConf.isInMemory(), cacheOnly); 1383 } 1384 }); 1385 if (unpacked != hfileBlock) { 1386 // End of life here if hfileBlock is an independent block. 1387 hfileBlock.release(); 1388 } 1389 if (updateCacheMetrics && hfileBlock.getBlockType().isData()) { 1390 HFile.DATABLOCK_READ_COUNT.increment(); 1391 } 1392 1393 return unpacked; 1394 } 1395 } finally { 1396 if (lockEntry != null) { 1397 offsetLock.releaseLockEntry(lockEntry); 1398 } 1399 } 1400 } 1401 1402 @Override 1403 public boolean hasMVCCInfo() { 1404 return fileInfo.shouldIncludeMemStoreTS() && fileInfo.isDecodeMemstoreTS(); 1405 } 1406 1407 /** 1408 * Compares the actual type of a block retrieved from cache or disk with its expected type and 1409 * throws an exception in case of a mismatch. Expected block type of {@link BlockType#DATA} is 1410 * considered to match the actual block type [@link {@link BlockType#ENCODED_DATA} as well. 1411 * @param block a block retrieved from cache or disk 1412 * @param expectedBlockType the expected block type, or null to skip the check 1413 */ 1414 private void validateBlockType(HFileBlock block, BlockType expectedBlockType) throws IOException { 1415 if (expectedBlockType == null) { 1416 return; 1417 } 1418 BlockType actualBlockType = block.getBlockType(); 1419 if (expectedBlockType.isData() && actualBlockType.isData()) { 1420 // We consider DATA to match ENCODED_DATA for the purpose of this 1421 // verification. 1422 return; 1423 } 1424 if (actualBlockType != expectedBlockType) { 1425 throw new IOException("Expected block type " + expectedBlockType + ", " + "but got " 1426 + actualBlockType + ": " + block + ", path=" + path); 1427 } 1428 } 1429 1430 /** 1431 * @return Last key as cell in the file. May be null if file has no entries. Note that this is not 1432 * the last row key, but it is the Cell representation of the last key 1433 */ 1434 @Override 1435 public Optional<Cell> getLastKey() { 1436 return dataBlockIndexReader.isEmpty() 1437 ? Optional.empty() 1438 : Optional.of(fileInfo.getLastKeyCell()); 1439 } 1440 1441 /** 1442 * @return Midkey for this file. We work with block boundaries only so returned midkey is an 1443 * approximation only. 1444 */ 1445 @Override 1446 public Optional<Cell> midKey() throws IOException { 1447 return Optional.ofNullable(dataBlockIndexReader.midkey(this)); 1448 } 1449 1450 @Override 1451 public void close() throws IOException { 1452 close(cacheConf.shouldEvictOnClose()); 1453 } 1454 1455 @Override 1456 public DataBlockEncoding getEffectiveEncodingInCache(boolean isCompaction) { 1457 return dataBlockEncoder.getEffectiveEncodingInCache(isCompaction); 1458 } 1459 1460 /** For testing */ 1461 @Override 1462 public HFileBlock.FSReader getUncachedBlockReader() { 1463 return fsBlockReader; 1464 } 1465 1466 /** 1467 * Scanner that operates on encoded data blocks. 1468 */ 1469 protected static class EncodedScanner extends HFileScannerImpl { 1470 private final HFileBlockDecodingContext decodingCtx; 1471 private final DataBlockEncoder.EncodedSeeker seeker; 1472 private final DataBlockEncoder dataBlockEncoder; 1473 1474 public EncodedScanner(HFile.Reader reader, boolean cacheBlocks, boolean pread, 1475 boolean isCompaction, HFileContext meta, Configuration conf) { 1476 super(reader, cacheBlocks, pread, isCompaction); 1477 DataBlockEncoding encoding = reader.getDataBlockEncoding(); 1478 dataBlockEncoder = encoding.getEncoder(); 1479 decodingCtx = dataBlockEncoder.newDataBlockDecodingContext(conf, meta); 1480 seeker = dataBlockEncoder.createSeeker(decodingCtx); 1481 } 1482 1483 @Override 1484 public boolean isSeeked() { 1485 return curBlock != null; 1486 } 1487 1488 @Override 1489 public void setNonSeekedState() { 1490 reset(); 1491 } 1492 1493 /** 1494 * Updates the current block to be the given {@link HFileBlock}. Seeks to the the first 1495 * key/value pair. 1496 * @param newBlock the block to make current, and read by {@link HFileReaderImpl#readBlock}, 1497 * it's a totally new block with new allocated {@link ByteBuff}, so if no 1498 * further reference to this block, we should release it carefully. 1499 */ 1500 @Override 1501 protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { 1502 try { 1503 // sanity checks 1504 if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { 1505 throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); 1506 } 1507 short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); 1508 if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { 1509 String encoderCls = dataBlockEncoder.getClass().getName(); 1510 throw new CorruptHFileException( 1511 "Encoder " + encoderCls + " doesn't support data block encoding " 1512 + DataBlockEncoding.getNameFromId(dataBlockEncoderId) + ",path=" + reader.getPath()); 1513 } 1514 updateCurrBlockRef(newBlock); 1515 ByteBuff encodedBuffer = getEncodedBuffer(newBlock); 1516 seeker.setCurrentBuffer(encodedBuffer); 1517 } finally { 1518 releaseIfNotCurBlock(newBlock); 1519 } 1520 // Reset the next indexed key 1521 this.nextIndexedKey = null; 1522 } 1523 1524 private ByteBuff getEncodedBuffer(HFileBlock newBlock) { 1525 ByteBuff origBlock = newBlock.getBufferReadOnly(); 1526 int pos = newBlock.headerSize() + DataBlockEncoding.ID_SIZE; 1527 origBlock.position(pos); 1528 origBlock 1529 .limit(pos + newBlock.getUncompressedSizeWithoutHeader() - DataBlockEncoding.ID_SIZE); 1530 return origBlock.slice(); 1531 } 1532 1533 @Override 1534 protected boolean processFirstDataBlock() throws IOException { 1535 seeker.rewind(); 1536 return true; 1537 } 1538 1539 @Override 1540 public boolean next() throws IOException { 1541 boolean isValid = seeker.next(); 1542 if (!isValid) { 1543 HFileBlock newBlock = readNextDataBlock(); 1544 isValid = newBlock != null; 1545 if (isValid) { 1546 updateCurrentBlock(newBlock); 1547 } else { 1548 setNonSeekedState(); 1549 } 1550 } 1551 return isValid; 1552 } 1553 1554 @Override 1555 public Cell getKey() { 1556 assertValidSeek(); 1557 return seeker.getKey(); 1558 } 1559 1560 @Override 1561 public ByteBuffer getValue() { 1562 assertValidSeek(); 1563 return seeker.getValueShallowCopy(); 1564 } 1565 1566 @Override 1567 public Cell getCell() { 1568 if (this.curBlock == null) { 1569 return null; 1570 } 1571 return seeker.getCell(); 1572 } 1573 1574 @Override 1575 public String getKeyString() { 1576 return CellUtil.toString(getKey(), false); 1577 } 1578 1579 @Override 1580 public String getValueString() { 1581 ByteBuffer valueBuffer = getValue(); 1582 return ByteBufferUtils.toStringBinary(valueBuffer); 1583 } 1584 1585 private void assertValidSeek() { 1586 if (this.curBlock == null) { 1587 throw new NotSeekedException(reader.getPath()); 1588 } 1589 } 1590 1591 @Override 1592 protected Cell getFirstKeyCellInBlock(HFileBlock curBlock) { 1593 return dataBlockEncoder.getFirstKeyCellInBlock(getEncodedBuffer(curBlock)); 1594 } 1595 1596 @Override 1597 protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, 1598 Cell key, boolean seekBefore) throws IOException { 1599 if (this.curBlock == null || this.curBlock.getOffset() != seekToBlock.getOffset()) { 1600 updateCurrentBlock(seekToBlock); 1601 } else if (rewind) { 1602 seeker.rewind(); 1603 } 1604 this.nextIndexedKey = nextIndexedKey; 1605 return seeker.seekToKeyInBlock(key, seekBefore); 1606 } 1607 1608 @Override 1609 public int compareKey(CellComparator comparator, Cell key) { 1610 return seeker.compareKey(comparator, key); 1611 } 1612 } 1613 1614 /** 1615 * Returns a buffer with the Bloom filter metadata. The caller takes ownership of the buffer. 1616 */ 1617 @Override 1618 public DataInput getGeneralBloomFilterMetadata() throws IOException { 1619 return this.getBloomFilterMetadata(BlockType.GENERAL_BLOOM_META); 1620 } 1621 1622 @Override 1623 public DataInput getDeleteBloomFilterMetadata() throws IOException { 1624 return this.getBloomFilterMetadata(BlockType.DELETE_FAMILY_BLOOM_META); 1625 } 1626 1627 private DataInput getBloomFilterMetadata(BlockType blockType) throws IOException { 1628 if ( 1629 blockType != BlockType.GENERAL_BLOOM_META && blockType != BlockType.DELETE_FAMILY_BLOOM_META 1630 ) { 1631 throw new RuntimeException( 1632 "Block Type: " + blockType.toString() + " is not supported, path=" + path); 1633 } 1634 1635 for (HFileBlock b : fileInfo.getLoadOnOpenBlocks()) { 1636 if (b.getBlockType() == blockType) { 1637 return b.getByteStream(); 1638 } 1639 } 1640 return null; 1641 } 1642 1643 public boolean isFileInfoLoaded() { 1644 return true; // We load file info in constructor in version 2. 1645 } 1646 1647 @Override 1648 public HFileContext getFileContext() { 1649 return hfileContext; 1650 } 1651 1652 /** 1653 * Returns false if block prefetching was requested for this file and has not completed, true 1654 * otherwise 1655 */ 1656 @Override 1657 public boolean prefetchComplete() { 1658 return PrefetchExecutor.isCompleted(path); 1659 } 1660 1661 /** 1662 * Returns true if block prefetching was started after waiting for specified delay, false 1663 * otherwise 1664 */ 1665 @Override 1666 public boolean prefetchStarted() { 1667 return PrefetchExecutor.isPrefetchStarted(); 1668 } 1669 1670 /** 1671 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1672 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up 1673 * in a Scanner. Letting go of your references to the scanner is sufficient. NOTE: Do not use this 1674 * overload of getScanner for compactions. See 1675 * {@link #getScanner(Configuration, boolean, boolean, boolean)} 1676 * @param conf Store configuration. 1677 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1678 * @param pread Use positional read rather than seek+read if true (pread is better for 1679 * random reads, seek+read is better scanning). 1680 * @return Scanner on this file. 1681 */ 1682 @Override 1683 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread) { 1684 return getScanner(conf, cacheBlocks, pread, false); 1685 } 1686 1687 /** 1688 * Create a Scanner on this file. No seeks or reads are done on creation. Call 1689 * {@link HFileScanner#seekTo(Cell)} to position an start the read. There is nothing to clean up 1690 * in a Scanner. Letting go of your references to the scanner is sufficient. 1691 * @param conf Store configuration. 1692 * @param cacheBlocks True if we should cache blocks read in by this scanner. 1693 * @param pread Use positional read rather than seek+read if true (pread is better for 1694 * random reads, seek+read is better scanning). 1695 * @param isCompaction is scanner being used for a compaction? 1696 * @return Scanner on this file. 1697 */ 1698 @Override 1699 public HFileScanner getScanner(Configuration conf, boolean cacheBlocks, final boolean pread, 1700 final boolean isCompaction) { 1701 if (dataBlockEncoder.useEncodedScanner()) { 1702 return new EncodedScanner(this, cacheBlocks, pread, isCompaction, this.hfileContext, conf); 1703 } 1704 return new HFileScannerImpl(this, cacheBlocks, pread, isCompaction); 1705 } 1706 1707 public int getMajorVersion() { 1708 return 3; 1709 } 1710 1711 @Override 1712 public void unbufferStream() { 1713 fsBlockReader.unbufferStream(); 1714 } 1715 1716 protected boolean cacheIfCompactionsOff() { 1717 return (!StoreFileInfo.isReference(name) && !HFileLink.isHFileLink(name)) 1718 || !conf.getBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION, true); 1719 } 1720}