001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertTrue; 022 023import java.io.IOException; 024import java.util.Random; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.CellUtil; 029import org.apache.hadoop.hbase.ExtendedCell; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HBaseTestingUtil; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.KeyValue; 034import org.apache.hadoop.hbase.PrivateCellUtil; 035import org.apache.hadoop.hbase.fs.HFileSystem; 036import org.apache.hadoop.hbase.regionserver.BloomType; 037import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 038import org.apache.hadoop.hbase.testclassification.IOTests; 039import org.apache.hadoop.hbase.testclassification.MediumTests; 040import org.apache.hadoop.hbase.util.BloomFilterFactory; 041import org.apache.hadoop.hbase.util.BloomFilterUtil; 042import org.apache.hadoop.hbase.util.Bytes; 043import org.junit.ClassRule; 044import org.junit.Test; 045import org.junit.experimental.categories.Category; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049@Category({ IOTests.class, MediumTests.class }) 050public class TestSeekBeforeWithInlineBlocks { 051 052 @ClassRule 053 public static final HBaseClassTestRule CLASS_RULE = 054 HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class); 055 056 private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class); 057 058 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 059 060 private static final int NUM_KV = 10000; 061 062 private static final int DATA_BLOCK_SIZE = 4096; 063 private static final int BLOOM_BLOCK_SIZE = 1024; 064 private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 }; 065 private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 }; 066 067 private static final Random RAND = new Random(192537); 068 private static final byte[] FAM = Bytes.toBytes("family"); 069 070 private FileSystem fs; 071 private Configuration conf; 072 073 /** 074 * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs 075 * to know the size of that data block, which it calculates using current data block offset and 076 * the previous data block offset. This fails to work when there are leaf-level index blocks in 077 * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore() 078 * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This 079 * bug also happens for inline Bloom blocks for the same reasons. 080 */ 081 @Test 082 public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException { 083 conf = TEST_UTIL.getConfiguration(); 084 TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 085 086 // Try out different HFile versions to ensure reverse scan works on each version 087 for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; hfileVersion 088 <= HFile.MAX_FORMAT_VERSION; hfileVersion++) { 089 090 conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion); 091 fs = HFileSystem.get(conf); 092 093 // Try out different bloom types because inline Bloom blocks break seekBefore() 094 for (BloomType bloomType : BloomType.values()) { 095 096 // Test out HFile block indices of various sizes/levels 097 for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) { 098 int indexBlockSize = INDEX_CHUNK_SIZES[testI]; 099 int expectedNumLevels = EXPECTED_NUM_LEVELS[testI]; 100 101 LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s", 102 hfileVersion, bloomType, expectedNumLevels)); 103 104 conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize); 105 conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE); 106 conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 107 108 ExtendedCell[] cells = new ExtendedCell[NUM_KV]; 109 110 Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), String.format( 111 "testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", hfileVersion, bloomType, testI)); 112 113 // Disable caching to prevent it from hiding any bugs in block seeks/reads 114 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); 115 CacheConfig cacheConf = new CacheConfig(conf); 116 117 // Write the HFile 118 { 119 HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build(); 120 121 StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs) 122 .withFilePath(hfilePath).withFileContext(meta).withBloomType(bloomType).build(); 123 124 for (int i = 0; i < NUM_KV; i++) { 125 byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i); 126 byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND); 127 byte[] value = RandomKeyValueUtil.randomValue(RAND); 128 KeyValue kv = new KeyValue(row, FAM, qual, value); 129 130 storeFileWriter.append(kv); 131 cells[i] = kv; 132 } 133 134 storeFileWriter.close(); 135 } 136 137 // Read the HFile 138 HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf); 139 140 // Sanity check the HFile index level 141 assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); 142 143 // Check that we can seekBefore in either direction and with both pread 144 // enabled and disabled 145 for (boolean pread : new boolean[] { false, true }) { 146 HFileScanner scanner = reader.getScanner(conf, true, pread); 147 checkNoSeekBefore(cells, scanner, 0); 148 for (int i = 1; i < NUM_KV; i++) { 149 checkSeekBefore(cells, scanner, i); 150 checkCell(cells[i - 1], scanner.getCell()); 151 } 152 assertTrue(scanner.seekTo()); 153 for (int i = NUM_KV - 1; i >= 1; i--) { 154 checkSeekBefore(cells, scanner, i); 155 checkCell(cells[i - 1], scanner.getCell()); 156 } 157 checkNoSeekBefore(cells, scanner, 0); 158 scanner.close(); 159 } 160 161 reader.close(); 162 } 163 } 164 } 165 } 166 167 private void checkSeekBefore(ExtendedCell[] cells, HFileScanner scanner, int i) 168 throws IOException { 169 assertEquals( 170 "Failed to seek to the key before #" + i + " (" + CellUtil.getCellKeyAsString(cells[i]) + ")", 171 true, scanner.seekBefore(cells[i])); 172 } 173 174 private void checkNoSeekBefore(ExtendedCell[] cells, HFileScanner scanner, int i) 175 throws IOException { 176 assertEquals("Incorrectly succeeded in seeking to before first key (" 177 + CellUtil.getCellKeyAsString(cells[i]) + ")", false, scanner.seekBefore(cells[i])); 178 } 179 180 /** Check a key/value pair after it was read by the reader */ 181 private void checkCell(ExtendedCell expected, ExtendedCell actual) { 182 assertTrue(String.format("Expected key %s, but was %s", CellUtil.getCellKeyAsString(expected), 183 CellUtil.getCellKeyAsString(actual)), PrivateCellUtil.equals(expected, actual)); 184 } 185}