001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.ByteArrayOutputStream; 025import java.io.DataOutput; 026import java.io.DataOutputStream; 027import java.io.IOException; 028import java.nio.ByteBuffer; 029import java.util.ArrayList; 030import java.util.Arrays; 031import java.util.Collection; 032import java.util.HashSet; 033import java.util.List; 034import java.util.Random; 035import java.util.Set; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FSDataInputStream; 038import org.apache.hadoop.fs.FSDataOutputStream; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.CellComparatorImpl; 042import org.apache.hadoop.hbase.CellUtil; 043import org.apache.hadoop.hbase.HBaseClassTestRule; 044import org.apache.hadoop.hbase.HBaseCommonTestingUtility; 045import org.apache.hadoop.hbase.HBaseTestingUtility; 046import org.apache.hadoop.hbase.HConstants; 047import org.apache.hadoop.hbase.KeyValue; 048import org.apache.hadoop.hbase.KeyValueUtil; 049import org.apache.hadoop.hbase.PrivateCellUtil; 050import org.apache.hadoop.hbase.fs.HFileSystem; 051import org.apache.hadoop.hbase.io.ByteBuffAllocator; 052import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 053import org.apache.hadoop.hbase.io.compress.Compression; 054import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; 055import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 056import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; 057import org.apache.hadoop.hbase.io.hfile.HFile.Writer; 058import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex.BlockIndexReader; 059import org.apache.hadoop.hbase.io.hfile.NoOpIndexBlockEncoder.NoOpEncodedSeeker; 060import org.apache.hadoop.hbase.nio.ByteBuff; 061import org.apache.hadoop.hbase.nio.MultiByteBuff; 062import org.apache.hadoop.hbase.testclassification.IOTests; 063import org.apache.hadoop.hbase.testclassification.MediumTests; 064import org.apache.hadoop.hbase.util.Bytes; 065import org.apache.hadoop.hbase.util.ClassSize; 066import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 067import org.junit.Before; 068import org.junit.ClassRule; 069import org.junit.Test; 070import org.junit.experimental.categories.Category; 071import org.junit.runner.RunWith; 072import org.junit.runners.Parameterized; 073import org.junit.runners.Parameterized.Parameters; 074import org.slf4j.Logger; 075import org.slf4j.LoggerFactory; 076 077@RunWith(Parameterized.class) 078@Category({ IOTests.class, MediumTests.class }) 079public class TestHFileBlockIndex { 080 081 @ClassRule 082 public static final HBaseClassTestRule CLASS_RULE = 083 HBaseClassTestRule.forClass(TestHFileBlockIndex.class); 084 085 @Parameters 086 public static Collection<Object[]> compressionAlgorithms() { 087 return HBaseCommonTestingUtility.COMPRESSION_ALGORITHMS_PARAMETERIZED; 088 } 089 090 public TestHFileBlockIndex(Compression.Algorithm compr) { 091 this.compr = compr; 092 } 093 094 private static final Logger LOG = LoggerFactory.getLogger(TestHFileBlockIndex.class); 095 private static final Random RNG = new Random(); // This test depends on Random#setSeed 096 private static final int NUM_DATA_BLOCKS = 1000; 097 private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 098 099 private static final int SMALL_BLOCK_SIZE = 4096; 100 private static final int NUM_KV = 10000; 101 102 private static FileSystem fs; 103 private Path path; 104 private long rootIndexOffset; 105 private int numRootEntries; 106 private int numLevels; 107 private static final List<byte[]> keys = new ArrayList<>(); 108 private final Compression.Algorithm compr; 109 private byte[] firstKeyInFile; 110 private Configuration conf; 111 112 private static final int[] INDEX_CHUNK_SIZES = { 4096, 512, 384 }; 113 private static final int[] EXPECTED_NUM_LEVELS = { 2, 3, 4 }; 114 private static final int[] UNCOMPRESSED_INDEX_SIZES = { 19187, 21813, 23086 }; 115 116 private static final boolean includesMemstoreTS = true; 117 118 static { 119 assert INDEX_CHUNK_SIZES.length == EXPECTED_NUM_LEVELS.length; 120 assert INDEX_CHUNK_SIZES.length == UNCOMPRESSED_INDEX_SIZES.length; 121 } 122 123 @Before 124 public void setUp() throws IOException { 125 keys.clear(); 126 firstKeyInFile = null; 127 conf = TEST_UTIL.getConfiguration(); 128 RNG.setSeed(2389757); 129 130 // This test requires at least HFile format version 2. 131 conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); 132 133 fs = HFileSystem.get(conf); 134 } 135 136 @Test 137 public void testBlockIndex() throws IOException { 138 testBlockIndexInternals(false); 139 clear(); 140 testBlockIndexInternals(true); 141 } 142 143 private void clear() throws IOException { 144 keys.clear(); 145 firstKeyInFile = null; 146 conf = TEST_UTIL.getConfiguration(); 147 RNG.setSeed(2389757); 148 149 // This test requires at least HFile format version 2. 150 conf.setInt(HFile.FORMAT_VERSION_KEY, 3); 151 152 fs = HFileSystem.get(conf); 153 } 154 155 private void testBlockIndexInternals(boolean useTags) throws IOException { 156 path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); 157 writeWholeIndex(useTags); 158 readIndex(useTags); 159 } 160 161 /** 162 * A wrapper around a block reader which only caches the results of the last operation. Not 163 * thread-safe. 164 */ 165 private static class BlockReaderWrapper implements HFile.CachingBlockReader { 166 167 private HFileBlock.FSReader realReader; 168 private long prevOffset; 169 private long prevOnDiskSize; 170 private boolean prevPread; 171 private HFileBlock prevBlock; 172 173 public int hitCount = 0; 174 public int missCount = 0; 175 176 public BlockReaderWrapper(HFileBlock.FSReader realReader) { 177 this.realReader = realReader; 178 } 179 180 @Override 181 public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, 182 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 183 DataBlockEncoding expectedDataBlockEncoding) throws IOException { 184 return readBlock(offset, onDiskSize, cacheBlock, pread, isCompaction, updateCacheMetrics, 185 expectedBlockType, expectedDataBlockEncoding, false); 186 } 187 188 @Override 189 public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, 190 boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, 191 DataBlockEncoding expectedDataBlockEncoding, boolean cacheOnly) throws IOException { 192 if (offset == prevOffset && onDiskSize == prevOnDiskSize && pread == prevPread) { 193 hitCount += 1; 194 return prevBlock; 195 } 196 197 missCount += 1; 198 prevBlock = realReader.readBlockData(offset, onDiskSize, pread, false, true); 199 prevOffset = offset; 200 prevOnDiskSize = onDiskSize; 201 prevPread = pread; 202 203 return prevBlock; 204 } 205 } 206 207 private void readIndex(boolean useTags) throws IOException { 208 long fileSize = fs.getFileStatus(path).getLen(); 209 LOG.info("Size of {}: {} compression={}", path, fileSize, compr.toString()); 210 211 FSDataInputStream istream = fs.open(path); 212 HFileContext meta = 213 new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS) 214 .withIncludesTags(useTags).withCompression(compr).build(); 215 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, path).build(); 216 HFileBlock.FSReader blockReader = 217 new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, conf); 218 219 BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); 220 HFileBlockIndex.BlockIndexReader indexReader = 221 new HFileBlockIndex.CellBasedKeyBlockIndexReader(CellComparatorImpl.COMPARATOR, numLevels); 222 223 indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, fileSize) 224 .nextBlockWithBlockType(BlockType.ROOT_INDEX), numRootEntries); 225 226 long prevOffset = -1; 227 int i = 0; 228 int expectedHitCount = 0; 229 int expectedMissCount = 0; 230 LOG.info("Total number of keys: " + keys.size()); 231 for (byte[] key : keys) { 232 assertTrue(key != null); 233 assertTrue(indexReader != null); 234 KeyValue.KeyOnlyKeyValue keyOnlyKey = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); 235 HFileBlock b = indexReader.seekToDataBlock(keyOnlyKey, null, true, true, false, null, brw); 236 if ( 237 PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, keyOnlyKey, firstKeyInFile, 0, 238 firstKeyInFile.length) < 0 239 ) { 240 assertTrue(b == null); 241 ++i; 242 continue; 243 } 244 245 String keyStr = "key #" + i + ", " + Bytes.toStringBinary(key); 246 247 assertTrue("seekToDataBlock failed for " + keyStr, b != null); 248 249 if (prevOffset == b.getOffset()) { 250 assertEquals(++expectedHitCount, brw.hitCount); 251 } else { 252 LOG.info("First key in a new block: " + keyStr + ", block offset: " + b.getOffset() + ")"); 253 assertTrue(b.getOffset() > prevOffset); 254 assertEquals(++expectedMissCount, brw.missCount); 255 prevOffset = b.getOffset(); 256 } 257 ++i; 258 } 259 260 istream.close(); 261 } 262 263 private void writeWholeIndex(boolean useTags) throws IOException { 264 assertEquals(0, keys.size()); 265 HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true) 266 .withIncludesMvcc(includesMemstoreTS).withIncludesTags(useTags).withCompression(compr) 267 .withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build(); 268 HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta); 269 FSDataOutputStream outputStream = fs.create(path); 270 HFileBlockIndex.BlockIndexWriter biw = 271 new HFileBlockIndex.BlockIndexWriter(hbw, null, null, null); 272 for (int i = 0; i < NUM_DATA_BLOCKS; ++i) { 273 hbw.startWriting(BlockType.DATA).write(Bytes.toBytes(String.valueOf(RNG.nextInt(1000)))); 274 long blockOffset = outputStream.getPos(); 275 hbw.writeHeaderAndData(outputStream); 276 277 byte[] firstKey = null; 278 byte[] family = Bytes.toBytes("f"); 279 byte[] qualifier = Bytes.toBytes("q"); 280 for (int j = 0; j < 16; ++j) { 281 byte[] k = new KeyValue(RandomKeyValueUtil.randomOrderedKey(RNG, i * 16 + j), family, 282 qualifier, EnvironmentEdgeManager.currentTime(), KeyValue.Type.Put).getKey(); 283 keys.add(k); 284 if (j == 8) { 285 firstKey = k; 286 } 287 } 288 assertTrue(firstKey != null); 289 if (firstKeyInFile == null) { 290 firstKeyInFile = firstKey; 291 } 292 biw.addEntry(firstKey, blockOffset, hbw.getOnDiskSizeWithHeader()); 293 294 writeInlineBlocks(hbw, outputStream, biw, false); 295 } 296 writeInlineBlocks(hbw, outputStream, biw, true); 297 rootIndexOffset = biw.writeIndexBlocks(outputStream); 298 outputStream.close(); 299 300 numLevels = biw.getNumLevels(); 301 numRootEntries = biw.getNumRootEntries(); 302 303 LOG.info("Index written: numLevels=" + numLevels + ", numRootEntries=" + numRootEntries 304 + ", rootIndexOffset=" + rootIndexOffset); 305 } 306 307 private void writeInlineBlocks(HFileBlock.Writer hbw, FSDataOutputStream outputStream, 308 HFileBlockIndex.BlockIndexWriter biw, boolean isClosing) throws IOException { 309 while (biw.shouldWriteBlock(isClosing)) { 310 long offset = outputStream.getPos(); 311 biw.writeInlineBlock(hbw.startWriting(biw.getInlineBlockType())); 312 hbw.writeHeaderAndData(outputStream); 313 biw.blockWritten(offset, hbw.getOnDiskSizeWithHeader(), 314 hbw.getUncompressedSizeWithoutHeader()); 315 LOG.info( 316 "Wrote an inline index block at " + offset + ", size " + hbw.getOnDiskSizeWithHeader()); 317 } 318 } 319 320 private static final long getDummyFileOffset(int i) { 321 return i * 185 + 379; 322 } 323 324 private static final int getDummyOnDiskSize(int i) { 325 return i * i * 37 + i * 19 + 13; 326 } 327 328 @Test 329 public void testSecondaryIndexBinarySearch() throws IOException { 330 int numTotalKeys = 99; 331 assertTrue(numTotalKeys % 2 == 1); // Ensure no one made this even. 332 333 // We only add odd-index keys into the array that we will binary-search. 334 int numSearchedKeys = (numTotalKeys - 1) / 2; 335 336 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 337 DataOutputStream dos = new DataOutputStream(baos); 338 339 dos.writeInt(numSearchedKeys); 340 int curAllEntriesSize = 0; 341 int numEntriesAdded = 0; 342 343 // Only odd-index elements of this array are used to keep the secondary 344 // index entries of the corresponding keys. 345 int secondaryIndexEntries[] = new int[numTotalKeys]; 346 347 for (int i = 0; i < numTotalKeys; ++i) { 348 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i * 2); 349 KeyValue cell = new KeyValue(k, Bytes.toBytes("f"), Bytes.toBytes("q"), Bytes.toBytes("val")); 350 // KeyValue cell = new KeyValue.KeyOnlyKeyValue(k, 0, k.length); 351 keys.add(cell.getKey()); 352 String msgPrefix = "Key #" + i + " (" + Bytes.toStringBinary(k) + "): "; 353 StringBuilder padding = new StringBuilder(); 354 while (msgPrefix.length() + padding.length() < 70) 355 padding.append(' '); 356 msgPrefix += padding; 357 if (i % 2 == 1) { 358 dos.writeInt(curAllEntriesSize); 359 secondaryIndexEntries[i] = curAllEntriesSize; 360 LOG.info( 361 msgPrefix + "secondary index entry #" + ((i - 1) / 2) + ", offset " + curAllEntriesSize); 362 curAllEntriesSize += cell.getKey().length + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 363 ++numEntriesAdded; 364 } else { 365 secondaryIndexEntries[i] = -1; 366 LOG.info(msgPrefix + "not in the searched array"); 367 } 368 } 369 370 // Make sure the keys are increasing. 371 for (int i = 0; i < keys.size() - 1; ++i) 372 assertTrue(CellComparatorImpl.COMPARATOR.compare( 373 new KeyValue.KeyOnlyKeyValue(keys.get(i), 0, keys.get(i).length), 374 new KeyValue.KeyOnlyKeyValue(keys.get(i + 1), 0, keys.get(i + 1).length)) < 0); 375 376 dos.writeInt(curAllEntriesSize); 377 assertEquals(numSearchedKeys, numEntriesAdded); 378 int secondaryIndexOffset = dos.size(); 379 assertEquals(Bytes.SIZEOF_INT * (numSearchedKeys + 2), secondaryIndexOffset); 380 381 for (int i = 1; i <= numTotalKeys - 1; i += 2) { 382 assertEquals(dos.size(), secondaryIndexOffset + secondaryIndexEntries[i]); 383 long dummyFileOffset = getDummyFileOffset(i); 384 int dummyOnDiskSize = getDummyOnDiskSize(i); 385 LOG.debug("Storing file offset=" + dummyFileOffset + " and onDiskSize=" + dummyOnDiskSize 386 + " at offset " + dos.size()); 387 dos.writeLong(dummyFileOffset); 388 dos.writeInt(dummyOnDiskSize); 389 LOG.debug("Stored key " + ((i - 1) / 2) + " at offset " + dos.size()); 390 dos.write(keys.get(i)); 391 } 392 393 dos.writeInt(curAllEntriesSize); 394 395 ByteBuffer nonRootIndex = ByteBuffer.wrap(baos.toByteArray()); 396 for (int i = 0; i < numTotalKeys; ++i) { 397 byte[] searchKey = keys.get(i); 398 byte[] arrayHoldingKey = new byte[searchKey.length + searchKey.length / 2]; 399 400 // To make things a bit more interesting, store the key we are looking 401 // for at a non-zero offset in a new array. 402 System.arraycopy(searchKey, 0, arrayHoldingKey, searchKey.length / 2, searchKey.length); 403 404 KeyValue.KeyOnlyKeyValue cell = 405 new KeyValue.KeyOnlyKeyValue(arrayHoldingKey, searchKey.length / 2, searchKey.length); 406 int searchResult = BlockIndexReader.binarySearchNonRootIndex(cell, 407 new MultiByteBuff(nonRootIndex), CellComparatorImpl.COMPARATOR); 408 String lookupFailureMsg = 409 "Failed to look up key #" + i + " (" + Bytes.toStringBinary(searchKey) + ")"; 410 411 int expectedResult; 412 int referenceItem; 413 414 if (i % 2 == 1) { 415 // This key is in the array we search as the element (i - 1) / 2. Make 416 // sure we find it. 417 expectedResult = (i - 1) / 2; 418 referenceItem = i; 419 } else { 420 // This key is not in the array but between two elements on the array, 421 // in the beginning, or in the end. The result should be the previous 422 // key in the searched array, or -1 for i = 0. 423 expectedResult = i / 2 - 1; 424 referenceItem = i - 1; 425 } 426 427 assertEquals(lookupFailureMsg, expectedResult, searchResult); 428 429 // Now test we can get the offset and the on-disk-size using a 430 // higher-level API function.s 431 boolean locateBlockResult = 432 (BlockIndexReader.locateNonRootIndexEntry(new MultiByteBuff(nonRootIndex), cell, 433 CellComparatorImpl.COMPARATOR) != -1); 434 435 if (i == 0) { 436 assertFalse(locateBlockResult); 437 } else { 438 assertTrue(locateBlockResult); 439 String errorMsg = "i=" + i + ", position=" + nonRootIndex.position(); 440 assertEquals(errorMsg, getDummyFileOffset(referenceItem), nonRootIndex.getLong()); 441 assertEquals(errorMsg, getDummyOnDiskSize(referenceItem), nonRootIndex.getInt()); 442 } 443 } 444 445 } 446 447 @Test 448 public void testBlockIndexChunk() throws IOException { 449 BlockIndexChunk c = new HFileBlockIndex.BlockIndexChunkImpl(); 450 HFileIndexBlockEncoder indexBlockEncoder = NoOpIndexBlockEncoder.INSTANCE; 451 ByteArrayOutputStream baos = new ByteArrayOutputStream(); 452 int N = 1000; 453 int[] numSubEntriesAt = new int[N]; 454 int numSubEntries = 0; 455 for (int i = 0; i < N; ++i) { 456 baos.reset(); 457 DataOutputStream dos = new DataOutputStream(baos); 458 indexBlockEncoder.encode(c, false, dos); 459 assertEquals(c.getNonRootSize(), dos.size()); 460 461 baos.reset(); 462 dos = new DataOutputStream(baos); 463 indexBlockEncoder.encode(c, true, dos); 464 assertEquals(c.getRootSize(), dos.size()); 465 466 byte[] k = RandomKeyValueUtil.randomOrderedKey(RNG, i); 467 numSubEntries += RNG.nextInt(5) + 1; 468 keys.add(k); 469 c.add(k, getDummyFileOffset(i), getDummyOnDiskSize(i), numSubEntries); 470 } 471 472 // Test the ability to look up the entry that contains a particular 473 // deeper-level index block's entry ("sub-entry"), assuming a global 474 // 0-based ordering of sub-entries. This is needed for mid-key calculation. 475 for (int i = 0; i < N; ++i) { 476 for (int j = i == 0 ? 0 : numSubEntriesAt[i - 1]; j < numSubEntriesAt[i]; ++j) { 477 assertEquals(i, c.getEntryBySubEntry(j)); 478 } 479 } 480 } 481 482 /** Checks if the HeapSize calculator is within reason */ 483 @Test 484 public void testHeapSizeForBlockIndex() throws IOException { 485 Class<HFileBlockIndex.BlockIndexReader> cl = HFileBlockIndex.BlockIndexReader.class; 486 long expected = ClassSize.estimateBase(cl, false); 487 488 HFileBlockIndex.BlockIndexReader bi = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1); 489 long actual = bi.heapSize(); 490 491 // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, 492 // int [] blockDataSizes) are all null they are not going to show up in the 493 // HeapSize calculation, so need to remove those array costs from expected. 494 // Already the block keys are not there in this case 495 expected -= ClassSize.align(2 * ClassSize.ARRAY); 496 497 if (expected != actual) { 498 expected = ClassSize.estimateBase(cl, true); 499 assertEquals(expected, actual); 500 } 501 } 502 503 /** 504 * to check if looks good when midKey on a leaf index block boundary 505 */ 506 @Test 507 public void testMidKeyOnLeafIndexBlockBoundary() throws IOException { 508 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_midkey"); 509 int maxChunkSize = 512; 510 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 511 // should open hfile.block.index.cacheonwrite 512 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, true); 513 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 514 BlockCache blockCache = cacheConf.getBlockCache().get(); 515 // Evict all blocks that were cached-on-write by the previous invocation. 516 blockCache.evictBlocksByHfileName(hfilePath.getName()); 517 // Write the HFile 518 HFileContext meta = new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE) 519 .withCompression(Algorithm.NONE).withDataBlockEncoding(DataBlockEncoding.NONE).build(); 520 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 521 .withFileContext(meta).create(); 522 Random rand = new Random(19231737); 523 byte[] family = Bytes.toBytes("f"); 524 byte[] qualifier = Bytes.toBytes("q"); 525 int kvNumberToBeWritten = 16; 526 // the new generated hfile will contain 2 leaf-index blocks and 16 data blocks, 527 // midkey is just on the boundary of the first leaf-index block 528 for (int i = 0; i < kvNumberToBeWritten; ++i) { 529 byte[] row = RandomKeyValueUtil.randomOrderedFixedLengthKey(rand, i, 30); 530 531 // Key will be interpreted by KeyValue.KEY_COMPARATOR 532 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 533 RandomKeyValueUtil.randomFixedLengthValue(rand, SMALL_BLOCK_SIZE)); 534 writer.append(kv); 535 } 536 writer.close(); 537 538 // close hfile.block.index.cacheonwrite 539 conf.setBoolean(CacheConfig.CACHE_INDEX_BLOCKS_ON_WRITE_KEY, false); 540 541 // Read the HFile 542 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 543 544 boolean hasArrayIndexOutOfBoundsException = false; 545 try { 546 // get the mid-key. 547 reader.midKey(); 548 } catch (ArrayIndexOutOfBoundsException e) { 549 hasArrayIndexOutOfBoundsException = true; 550 } finally { 551 reader.close(); 552 } 553 554 // to check if ArrayIndexOutOfBoundsException occurred 555 assertFalse(hasArrayIndexOutOfBoundsException); 556 } 557 558 /** 559 * Testing block index through the HFile writer/reader APIs. Allows to test setting index block 560 * size through configuration, intermediate-level index blocks, and caching index blocks on write. 561 */ 562 @Test 563 public void testHFileWriterAndReader() throws IOException { 564 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "hfile_for_block_index"); 565 CacheConfig cacheConf = new CacheConfig(conf, BlockCacheFactory.createBlockCache(conf)); 566 BlockCache blockCache = cacheConf.getBlockCache().get(); 567 568 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; ++testI) { 569 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 570 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 571 LOG.info("Index block size: " + indexBlockSize + ", compression: " + compr); 572 // Evict all blocks that were cached-on-write by the previous invocation. 573 blockCache.evictBlocksByHfileName(hfilePath.getName()); 574 575 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 576 Set<String> keyStrSet = new HashSet<>(); 577 byte[][] keys = new byte[NUM_KV][]; 578 byte[][] values = new byte[NUM_KV][]; 579 580 // Write the HFile 581 { 582 HFileContext meta = 583 new HFileContextBuilder().withBlockSize(SMALL_BLOCK_SIZE).withCompression(compr).build(); 584 HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, hfilePath) 585 .withFileContext(meta).create(); 586 Random rand = new Random(19231737); 587 byte[] family = Bytes.toBytes("f"); 588 byte[] qualifier = Bytes.toBytes("q"); 589 for (int i = 0; i < NUM_KV; ++i) { 590 byte[] row = RandomKeyValueUtil.randomOrderedKey(rand, i); 591 592 // Key will be interpreted by KeyValue.KEY_COMPARATOR 593 KeyValue kv = new KeyValue(row, family, qualifier, EnvironmentEdgeManager.currentTime(), 594 RandomKeyValueUtil.randomValue(rand)); 595 byte[] k = kv.getKey(); 596 writer.append(kv); 597 keys[i] = k; 598 values[i] = CellUtil.cloneValue(kv); 599 keyStrSet.add(Bytes.toStringBinary(k)); 600 if (i > 0) { 601 assertTrue((PrivateCellUtil.compare(CellComparatorImpl.COMPARATOR, kv, keys[i - 1], 0, 602 keys[i - 1].length)) > 0); 603 } 604 } 605 606 writer.close(); 607 } 608 609 // Read the HFile 610 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 611 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 612 613 assertTrue(Bytes.equals(keys[0], ((KeyValue) reader.getFirstKey().get()).getKey())); 614 assertTrue(Bytes.equals(keys[NUM_KV - 1], ((KeyValue) reader.getLastKey().get()).getKey())); 615 LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); 616 617 for (boolean pread : new boolean[] { false, true }) { 618 HFileScanner scanner = reader.getScanner(conf, true, pread); 619 for (int i = 0; i < NUM_KV; ++i) { 620 checkSeekTo(keys, scanner, i); 621 checkKeyValue("i=" + i, keys[i], values[i], 622 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 623 } 624 assertTrue(scanner.seekTo()); 625 for (int i = NUM_KV - 1; i >= 0; --i) { 626 checkSeekTo(keys, scanner, i); 627 checkKeyValue("i=" + i, keys[i], values[i], 628 ByteBuffer.wrap(((KeyValue) scanner.getKey()).getKey()), scanner.getValue()); 629 } 630 } 631 632 // Manually compute the mid-key and validate it. 633 HFile.Reader reader2 = reader; 634 HFileBlock.FSReader fsReader = reader2.getUncachedBlockReader(); 635 636 HFileBlock.BlockIterator iter = 637 fsReader.blockRange(0, reader.getTrailer().getLoadOnOpenDataOffset()); 638 HFileBlock block; 639 List<byte[]> blockKeys = new ArrayList<>(); 640 while ((block = iter.nextBlock()) != null) { 641 if (block.getBlockType() != BlockType.LEAF_INDEX) return; 642 ByteBuff b = block.getBufferReadOnly(); 643 int n = b.getIntAfterPosition(0); 644 // One int for the number of items, and n + 1 for the secondary index. 645 int entriesOffset = Bytes.SIZEOF_INT * (n + 2); 646 647 // Get all the keys from the leaf index block. S 648 for (int i = 0; i < n; ++i) { 649 int keyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 1)); 650 int nextKeyRelOffset = b.getIntAfterPosition(Bytes.SIZEOF_INT * (i + 2)); 651 int keyLen = nextKeyRelOffset - keyRelOffset; 652 int keyOffset = b.arrayOffset() + entriesOffset + keyRelOffset 653 + HFileBlockIndex.SECONDARY_INDEX_ENTRY_OVERHEAD; 654 byte[] blockKey = Arrays.copyOfRange(b.array(), keyOffset, keyOffset + keyLen); 655 String blockKeyStr = Bytes.toString(blockKey); 656 blockKeys.add(blockKey); 657 658 // If the first key of the block is not among the keys written, we 659 // are not parsing the non-root index block format correctly. 660 assertTrue("Invalid block key from leaf-level block: " + blockKeyStr, 661 keyStrSet.contains(blockKeyStr)); 662 } 663 } 664 665 // Validate the mid-key. 666 assertEquals(Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), 667 reader.midKey()); 668 669 assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], 670 reader.getTrailer().getUncompressedDataIndexSize()); 671 672 reader.close(); 673 reader2.close(); 674 } 675 } 676 677 private void checkSeekTo(byte[][] keys, HFileScanner scanner, int i) throws IOException { 678 assertEquals("Failed to seek to key #" + i + " (" + Bytes.toStringBinary(keys[i]) + ")", 0, 679 scanner.seekTo(KeyValueUtil.createKeyValueFromKey(keys[i]))); 680 } 681 682 private void assertArrayEqualsBuffer(String msgPrefix, byte[] arr, ByteBuffer buf) { 683 assertEquals( 684 msgPrefix + ": expected " + Bytes.toStringBinary(arr) + ", actual " 685 + Bytes.toStringBinary(buf), 686 0, Bytes.compareTo(arr, 0, arr.length, buf.array(), buf.arrayOffset(), buf.limit())); 687 } 688 689 /** Check a key/value pair after it was read by the reader */ 690 private void checkKeyValue(String msgPrefix, byte[] expectedKey, byte[] expectedValue, 691 ByteBuffer keyRead, ByteBuffer valueRead) { 692 if (!msgPrefix.isEmpty()) msgPrefix += ". "; 693 694 assertArrayEqualsBuffer(msgPrefix + "Invalid key", expectedKey, keyRead); 695 assertArrayEqualsBuffer(msgPrefix + "Invalid value", expectedValue, valueRead); 696 } 697 698 @Test 699 public void testIntermediateLevelIndicesWithLargeKeys() throws IOException { 700 testIntermediateLevelIndicesWithLargeKeys(16); 701 } 702 703 @Test 704 public void testIntermediateLevelIndicesWithLargeKeysWithMinNumEntries() throws IOException { 705 // because of the large rowKeys, we will end up with a 50-level block index without sanity check 706 testIntermediateLevelIndicesWithLargeKeys(2); 707 } 708 709 public void testIntermediateLevelIndicesWithLargeKeys(int minNumEntries) throws IOException { 710 Path hfPath = 711 new Path(TEST_UTIL.getDataTestDir(), "testIntermediateLevelIndicesWithLargeKeys.hfile"); 712 int maxChunkSize = 1024; 713 FileSystem fs = FileSystem.get(conf); 714 CacheConfig cacheConf = new CacheConfig(conf); 715 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); 716 conf.setInt(HFileBlockIndex.MIN_INDEX_NUM_ENTRIES_KEY, minNumEntries); 717 HFileContext context = new HFileContextBuilder().withBlockSize(16).build(); 718 HFile.Writer hfw = new HFile.WriterFactory(conf, cacheConf).withFileContext(context) 719 .withPath(fs, hfPath).create(); 720 List<byte[]> keys = new ArrayList<>(); 721 722 // This should result in leaf-level indices and a root level index 723 for (int i = 0; i < 100; i++) { 724 byte[] rowkey = new byte[maxChunkSize + 1]; 725 byte[] b = Bytes.toBytes(i); 726 System.arraycopy(b, 0, rowkey, rowkey.length - b.length, b.length); 727 keys.add(rowkey); 728 hfw.append(CellUtil.createCell(rowkey)); 729 } 730 hfw.close(); 731 732 HFile.Reader reader = HFile.createReader(fs, hfPath, cacheConf, true, conf); 733 // Scanner doesn't do Cells yet. Fix. 734 HFileScanner scanner = reader.getScanner(conf, true, true); 735 for (int i = 0; i < keys.size(); ++i) { 736 scanner.seekTo(CellUtil.createCell(keys.get(i))); 737 } 738 reader.close(); 739 } 740 741 /** 742 * This test is for HBASE-27940, which midkey metadata in root index block would always be ignored 743 * by {@link BlockIndexReader#readMultiLevelIndexRoot}. 744 */ 745 @Test 746 public void testMidKeyReadSuccessfullyFromRootIndexBlock() throws IOException { 747 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, 128); 748 Path hfilePath = 749 new Path(TEST_UTIL.getDataTestDir(), "testMidKeyReadSuccessfullyFromRootIndexBlock"); 750 Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; 751 int entryCount = 50000; 752 HFileContext context = new HFileContextBuilder().withBlockSize(4096).withIncludesTags(false) 753 .withDataBlockEncoding(DataBlockEncoding.NONE).withCompression(compressAlgo).build(); 754 755 try (HFile.Writer writer = new HFile.WriterFactory(conf, new CacheConfig(conf)) 756 .withPath(fs, hfilePath).withFileContext(context).create()) { 757 758 List<KeyValue> keyValues = new ArrayList<>(entryCount); 759 for (int i = 0; i < entryCount; ++i) { 760 byte[] keyBytes = RandomKeyValueUtil.randomOrderedKey(RNG, i); 761 // A random-length random value. 762 byte[] valueBytes = RandomKeyValueUtil.randomValue(RNG); 763 KeyValue keyValue = 764 new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, valueBytes); 765 writer.append(keyValue); 766 keyValues.add(keyValue); 767 } 768 } 769 770 try (FSDataInputStream fsdis = fs.open(hfilePath)) { 771 long fileSize = fs.getFileStatus(hfilePath).getLen(); 772 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis, fileSize); 773 774 assertEquals(3, trailer.getMajorVersion()); 775 assertEquals(entryCount, trailer.getEntryCount()); 776 HFileContext meta = new HFileContextBuilder().withCompression(compressAlgo) 777 .withIncludesMvcc(false).withIncludesTags(false) 778 .withDataBlockEncoding(DataBlockEncoding.NONE).withHBaseCheckSum(true).build(); 779 ReaderContext readerContext = 780 new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(fsdis)) 781 .withFilePath(hfilePath).withFileSystem(fs).withFileSize(fileSize).build(); 782 HFileBlock.FSReader blockReader = 783 new HFileBlock.FSReaderImpl(readerContext, meta, ByteBuffAllocator.HEAP, conf); 784 785 MyEncoder encoder = new MyEncoder(); 786 HFileBlockIndex.CellBasedKeyBlockIndexReaderV2 dataBlockIndexReader = 787 new HFileBlockIndex.CellBasedKeyBlockIndexReaderV2(trailer.createComparator(), 788 trailer.getNumDataIndexLevels(), encoder); 789 790 HFileBlock.BlockIterator blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(), 791 fileSize - trailer.getTrailerSize()); 792 // Data index. We also read statistics about the block index written after 793 // the root level. 794 dataBlockIndexReader.readMultiLevelIndexRoot( 795 blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); 796 NoOpEncodedSeeker noOpEncodedSeeker = (NoOpEncodedSeeker) encoder.encoderSeeker; 797 // Assert we have read midkey metadata successfully. 798 assertTrue(noOpEncodedSeeker.midLeafBlockOffset >= 0); 799 assertTrue(noOpEncodedSeeker.midLeafBlockOnDiskSize > 0); 800 assertTrue(noOpEncodedSeeker.midKeyEntry >= 0); 801 } 802 } 803 804 static class MyEncoder implements HFileIndexBlockEncoder { 805 806 EncodedSeeker encoderSeeker; 807 808 @Override 809 public void saveMetadata(Writer writer) throws IOException { 810 NoOpIndexBlockEncoder.INSTANCE.saveMetadata(writer); 811 812 } 813 814 @Override 815 public void encode(BlockIndexChunk blockIndexChunk, boolean rootIndexBlock, DataOutput out) 816 throws IOException { 817 NoOpIndexBlockEncoder.INSTANCE.encode(blockIndexChunk, rootIndexBlock, out); 818 } 819 820 @Override 821 public IndexBlockEncoding getIndexBlockEncoding() { 822 return NoOpIndexBlockEncoder.INSTANCE.getIndexBlockEncoding(); 823 } 824 825 @Override 826 public EncodedSeeker createSeeker() { 827 encoderSeeker = NoOpIndexBlockEncoder.INSTANCE.createSeeker(); 828 return encoderSeeker; 829 } 830 831 } 832}