001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.Random;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.CellUtil;
029import org.apache.hadoop.hbase.ExtendedCell;
030import org.apache.hadoop.hbase.HBaseClassTestRule;
031import org.apache.hadoop.hbase.HBaseTestingUtil;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.KeyValue;
034import org.apache.hadoop.hbase.PrivateCellUtil;
035import org.apache.hadoop.hbase.fs.HFileSystem;
036import org.apache.hadoop.hbase.regionserver.BloomType;
037import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
038import org.apache.hadoop.hbase.testclassification.IOTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.BloomFilterFactory;
041import org.apache.hadoop.hbase.util.BloomFilterUtil;
042import org.apache.hadoop.hbase.util.Bytes;
043import org.junit.ClassRule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049@Category({ IOTests.class, MediumTests.class })
050public class TestSeekBeforeWithInlineBlocks {
051
052  @ClassRule
053  public static final HBaseClassTestRule CLASS_RULE =
054    HBaseClassTestRule.forClass(TestSeekBeforeWithInlineBlocks.class);
055
056  private static final Logger LOG = LoggerFactory.getLogger(TestSeekBeforeWithInlineBlocks.class);
057
058  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
059
060  private static final int NUM_KV = 10000;
061
062  private static final int DATA_BLOCK_SIZE = 4096;
063  private static final int BLOOM_BLOCK_SIZE = 1024;
064  private static final int[] INDEX_CHUNK_SIZES = { 65536, 4096, 1024 };
065  private static final int[] EXPECTED_NUM_LEVELS = { 1, 2, 3 };
066
067  private static final Random RAND = new Random(192537);
068  private static final byte[] FAM = Bytes.toBytes("family");
069
070  private FileSystem fs;
071  private Configuration conf;
072
073  /**
074   * Scanner.seekBefore() could fail because when seeking to a previous HFile data block, it needs
075   * to know the size of that data block, which it calculates using current data block offset and
076   * the previous data block offset. This fails to work when there are leaf-level index blocks in
077   * the scannable section of the HFile, i.e. starting in HFileV2. This test will try seekBefore()
078   * on a flat (single-level) and multi-level (2,3) HFile and confirm this bug is now fixed. This
079   * bug also happens for inline Bloom blocks for the same reasons.
080   */
081  @Test
082  public void testMultiIndexLevelRandomHFileWithBlooms() throws IOException {
083    conf = TEST_UTIL.getConfiguration();
084    TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
085
086    // Try out different HFile versions to ensure reverse scan works on each version
087    for (int hfileVersion = HFile.MIN_FORMAT_VERSION_WITH_TAGS; hfileVersion
088        <= HFile.MAX_FORMAT_VERSION; hfileVersion++) {
089
090      conf.setInt(HFile.FORMAT_VERSION_KEY, hfileVersion);
091      fs = HFileSystem.get(conf);
092
093      // Try out different bloom types because inline Bloom blocks break seekBefore()
094      for (BloomType bloomType : BloomType.values()) {
095
096        // Test out HFile block indices of various sizes/levels
097        for (int testI = 0; testI < INDEX_CHUNK_SIZES.length; testI++) {
098          int indexBlockSize = INDEX_CHUNK_SIZES[testI];
099          int expectedNumLevels = EXPECTED_NUM_LEVELS[testI];
100
101          LOG.info(String.format("Testing HFileVersion: %s, BloomType: %s, Index Levels: %s",
102            hfileVersion, bloomType, expectedNumLevels));
103
104          conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, indexBlockSize);
105          conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, BLOOM_BLOCK_SIZE);
106          conf.setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10);
107
108          ExtendedCell[] cells = new ExtendedCell[NUM_KV];
109
110          Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), String.format(
111            "testMultiIndexLevelRandomHFileWithBlooms-%s-%s-%s", hfileVersion, bloomType, testI));
112
113          // Disable caching to prevent it from hiding any bugs in block seeks/reads
114          conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
115          CacheConfig cacheConf = new CacheConfig(conf);
116
117          // Write the HFile
118          {
119            HFileContext meta = new HFileContextBuilder().withBlockSize(DATA_BLOCK_SIZE).build();
120
121            StoreFileWriter storeFileWriter = new StoreFileWriter.Builder(conf, cacheConf, fs)
122              .withFilePath(hfilePath).withFileContext(meta).withBloomType(bloomType).build();
123
124            for (int i = 0; i < NUM_KV; i++) {
125              byte[] row = RandomKeyValueUtil.randomOrderedKey(RAND, i);
126              byte[] qual = RandomKeyValueUtil.randomRowOrQualifier(RAND);
127              byte[] value = RandomKeyValueUtil.randomValue(RAND);
128              KeyValue kv = new KeyValue(row, FAM, qual, value);
129
130              storeFileWriter.append(kv);
131              cells[i] = kv;
132            }
133
134            storeFileWriter.close();
135          }
136
137          // Read the HFile
138          HFile.Reader reader = HFile.createReader(fs, hfilePath, cacheConf, true, conf);
139
140          // Sanity check the HFile index level
141          assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels());
142
143          // Check that we can seekBefore in either direction and with both pread
144          // enabled and disabled
145          for (boolean pread : new boolean[] { false, true }) {
146            HFileScanner scanner = reader.getScanner(conf, true, pread);
147            checkNoSeekBefore(cells, scanner, 0);
148            for (int i = 1; i < NUM_KV; i++) {
149              checkSeekBefore(cells, scanner, i);
150              checkCell(cells[i - 1], scanner.getCell());
151            }
152            assertTrue(scanner.seekTo());
153            for (int i = NUM_KV - 1; i >= 1; i--) {
154              checkSeekBefore(cells, scanner, i);
155              checkCell(cells[i - 1], scanner.getCell());
156            }
157            checkNoSeekBefore(cells, scanner, 0);
158            scanner.close();
159          }
160
161          reader.close();
162        }
163      }
164    }
165  }
166
167  private void checkSeekBefore(ExtendedCell[] cells, HFileScanner scanner, int i)
168    throws IOException {
169    assertEquals(
170      "Failed to seek to the key before #" + i + " (" + CellUtil.getCellKeyAsString(cells[i]) + ")",
171      true, scanner.seekBefore(cells[i]));
172  }
173
174  private void checkNoSeekBefore(ExtendedCell[] cells, HFileScanner scanner, int i)
175    throws IOException {
176    assertEquals("Incorrectly succeeded in seeking to before first key ("
177      + CellUtil.getCellKeyAsString(cells[i]) + ")", false, scanner.seekBefore(cells[i]));
178  }
179
180  /** Check a key/value pair after it was read by the reader */
181  private void checkCell(ExtendedCell expected, ExtendedCell actual) {
182    assertTrue(String.format("Expected key %s, but was %s", CellUtil.getCellKeyAsString(expected),
183      CellUtil.getCellKeyAsString(actual)), PrivateCellUtil.equals(expected, actual));
184  }
185}