001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR;
021import static org.junit.Assert.assertArrayEquals;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertFalse;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNull;
026import static org.junit.Assert.assertTrue;
027import static org.junit.Assert.fail;
028import static org.mockito.ArgumentMatchers.any;
029import static org.mockito.Mockito.mock;
030import static org.mockito.Mockito.when;
031
032import java.io.IOException;
033import java.nio.ByteBuffer;
034import java.util.ArrayList;
035import java.util.Arrays;
036import java.util.Collections;
037import java.util.Comparator;
038import java.util.List;
039import java.util.Map;
040import java.util.OptionalLong;
041import java.util.TreeSet;
042import java.util.function.BiFunction;
043import org.apache.hadoop.conf.Configuration;
044import org.apache.hadoop.fs.FileSystem;
045import org.apache.hadoop.fs.Path;
046import org.apache.hadoop.hbase.Cell;
047import org.apache.hadoop.hbase.CellUtil;
048import org.apache.hadoop.hbase.HBaseClassTestRule;
049import org.apache.hadoop.hbase.HBaseTestingUtility;
050import org.apache.hadoop.hbase.HConstants;
051import org.apache.hadoop.hbase.HRegionInfo;
052import org.apache.hadoop.hbase.KeyValue;
053import org.apache.hadoop.hbase.KeyValueUtil;
054import org.apache.hadoop.hbase.PrivateCellUtil;
055import org.apache.hadoop.hbase.TableDescriptors;
056import org.apache.hadoop.hbase.TableName;
057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
058import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
059import org.apache.hadoop.hbase.client.RegionInfo;
060import org.apache.hadoop.hbase.client.RegionInfoBuilder;
061import org.apache.hadoop.hbase.client.Scan;
062import org.apache.hadoop.hbase.client.TableDescriptor;
063import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
064import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
065import org.apache.hadoop.hbase.io.HFileLink;
066import org.apache.hadoop.hbase.io.compress.Compression;
067import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
068import org.apache.hadoop.hbase.io.hfile.BlockCache;
069import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
070import org.apache.hadoop.hbase.io.hfile.CacheConfig;
071import org.apache.hadoop.hbase.io.hfile.CacheStats;
072import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
073import org.apache.hadoop.hbase.io.hfile.HFile;
074import org.apache.hadoop.hbase.io.hfile.HFileBlock;
075import org.apache.hadoop.hbase.io.hfile.HFileContext;
076import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
077import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
078import org.apache.hadoop.hbase.io.hfile.HFileScanner;
079import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator;
080import org.apache.hadoop.hbase.io.hfile.ReaderContext;
081import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
082import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator;
083import org.apache.hadoop.hbase.master.MasterServices;
084import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
085import org.apache.hadoop.hbase.testclassification.MediumTests;
086import org.apache.hadoop.hbase.testclassification.RegionServerTests;
087import org.apache.hadoop.hbase.util.BloomFilterFactory;
088import org.apache.hadoop.hbase.util.Bytes;
089import org.apache.hadoop.hbase.util.ChecksumType;
090import org.apache.hadoop.hbase.util.CommonFSUtils;
091import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
092import org.junit.AfterClass;
093import org.junit.Before;
094import org.junit.ClassRule;
095import org.junit.Rule;
096import org.junit.Test;
097import org.junit.experimental.categories.Category;
098import org.junit.rules.TestName;
099import org.mockito.Mockito;
100import org.slf4j.Logger;
101import org.slf4j.LoggerFactory;
102
103import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
104import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
105import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
106
107/**
108 * Test HStoreFile
109 */
110@Category({ RegionServerTests.class, MediumTests.class })
111public class TestHStoreFile {
112
113  @ClassRule
114  public static final HBaseClassTestRule CLASS_RULE =
115    HBaseClassTestRule.forClass(TestHStoreFile.class);
116
117  private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class);
118  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
119  private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
120  private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile");
121  private static final ChecksumType CKTYPE = ChecksumType.CRC32C;
122  private static final int CKBYTES = 512;
123  private static String TEST_FAMILY = "cf";
124  private static final char FIRST_CHAR = 'a';
125  private static final char LAST_CHAR = 'z';
126
127  @Rule
128  public TestName name = new TestName();
129
130  private Configuration conf;
131  private Path testDir;
132  private FileSystem fs;
133
134  @Before
135  public void setUp() throws IOException {
136    conf = TEST_UTIL.getConfiguration();
137    testDir = TEST_UTIL.getDataTestDir(name.getMethodName());
138    fs = testDir.getFileSystem(conf);
139  }
140
141  @AfterClass
142  public static void tearDownAfterClass() {
143    TEST_UTIL.cleanupTestDir();
144  }
145
146  /**
147   * Write a file and then assert that we can read from top and bottom halves using two <<<<<<< HEAD
148   * HalfMapFiles, as well as one HalfMapFile and one HFileLink file. ======= HalfMapFiles. >>>>>>>
149   * 16116fa35e... HBASE-24510 Remove HBaseTestCase and GenericTestUtils (#1859)
150   */
151  @Test
152  public void testBasicHalfAndHFileLinkMapFile() throws Exception {
153    final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testBasicHalfAndHFileLinkMapFile"));
154    // The locations of HFileLink refers hfiles only should be consistent with the table dir
155    // create by CommonFSUtils directory, so we should make the region directory under
156    // the mode of CommonFSUtils.getTableDir here.
157    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
158      CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri);
159
160    HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build();
161    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
162      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
163    writeStoreFile(writer);
164
165    Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
166    HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true);
167    checkHalfHFile(regionFs, sf);
168  }
169
170  private void writeStoreFile(final StoreFileWriter writer) throws IOException {
171    writeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
172      Bytes.toBytes(name.getMethodName()));
173  }
174
175  // pick an split point (roughly halfway)
176  byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR };
177
178  /*
179   * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it.
180   */
181  public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier)
182    throws IOException {
183    long now = EnvironmentEdgeManager.currentTime();
184    try {
185      for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
186        for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
187          byte[] b = new byte[] { (byte) d, (byte) e };
188          writer.append(new KeyValue(b, fam, qualifier, now, b));
189        }
190      }
191    } finally {
192      writer.close();
193    }
194  }
195
196  public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier,
197    int rounds) throws IOException {
198    long now = EnvironmentEdgeManager.currentTime();
199    try {
200      for (int i = 0; i < rounds; i++) {
201        for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) {
202          for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) {
203            byte[] b = new byte[] { (byte) d, (byte) e };
204            byte[] key = new byte[] { (byte) i };
205            writer.append(new KeyValue(key, fam, qualifier, now, b));
206          }
207        }
208      }
209    } finally {
210      writer.close();
211    }
212  }
213
214  /**
215   * Test that our mechanism of writing store files in one region to reference store files in other
216   * regions works.
217   */
218  @Test
219  public void testReference() throws IOException {
220    final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testReferenceTb"));
221    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
222      new Path(testDir, hri.getTable().getNameAsString()), hri);
223
224    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
225    // Make a store file and write data to it.
226    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
227      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
228    writeStoreFile(writer);
229
230    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
231    HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
232    hsf.initReader();
233    StoreFileReader reader = hsf.getReader();
234    // Split on a row, not in middle of row. Midkey returned by reader
235    // may be in middle of row. Create new one with empty column and
236    // timestamp.
237    byte[] midRow = CellUtil.cloneRow(reader.midKey().get());
238    byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get());
239    hsf.closeStoreFile(true);
240
241    // Make a reference
242    HRegionInfo splitHri = new HRegionInfo(hri.getTable(), null, midRow);
243    Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true);
244    HStoreFile refHsf = new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true);
245    refHsf.initReader();
246    // Now confirm that I can read from the reference and that it only gets
247    // keys from top half of the file.
248    HFileScanner s = refHsf.getReader().getScanner(false, false);
249    Cell kv = null;
250    for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) {
251      ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey());
252      kv = KeyValueUtil.createKeyValueFromKey(bb);
253      if (first) {
254        assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0,
255          midRow.length));
256        first = false;
257      }
258    }
259    assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0,
260      finalRow.length));
261  }
262
263  @Test
264  public void testStoreFileReference() throws Exception {
265    final RegionInfo hri =
266      RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build();
267    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs,
268      new Path(testDir, hri.getTable().getNameAsString()), hri);
269    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
270
271    // Make a store file and write data to it.
272    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
273      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
274    writeStoreFile(writer);
275    Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
276    writer.close();
277
278    HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true);
279    file.initReader();
280    StoreFileReader r = file.getReader();
281    assertNotNull(r);
282    StoreFileScanner scanner =
283      new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false);
284
285    // Verify after instantiating scanner refCount is increased
286    assertTrue("Verify file is being referenced", file.isReferencedInReads());
287    scanner.close();
288    // Verify after closing scanner refCount is decreased
289    assertFalse("Verify file is not being referenced", file.isReferencedInReads());
290  }
291
292  @Test
293  public void testEmptyStoreFileRestrictKeyRanges() throws Exception {
294    StoreFileReader reader = mock(StoreFileReader.class);
295    HStore store = mock(HStore.class);
296    byte[] cf = Bytes.toBytes("ty");
297    ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf);
298    when(store.getColumnFamilyDescriptor()).thenReturn(cfd);
299    try (StoreFileScanner scanner =
300      new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) {
301      Scan scan = new Scan();
302      scan.setColumnFamilyTimeRange(cf, 0, 1);
303      assertFalse(scanner.shouldUseScanner(scan, store, 0));
304    }
305  }
306
307  @Test
308  public void testHFileLink() throws IOException {
309    final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testHFileLinkTb"));
310    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
311    Configuration testConf = new Configuration(this.conf);
312    CommonFSUtils.setRootDir(testConf, testDir);
313    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
314      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
315    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
316
317    // Make a store file and write data to it.
318    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
319      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
320    writeStoreFile(writer);
321
322    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
323    Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
324    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
325    Path linkFilePath =
326      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
327
328    // Try to open store file from link
329    StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath, true);
330    HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf);
331    assertTrue(storeFileInfo.isLink());
332    hsf.initReader();
333
334    // Now confirm that I can read from the link
335    int count = 1;
336    HFileScanner s = hsf.getReader().getScanner(false, false);
337    s.seekTo();
338    while (s.next()) {
339      count++;
340    }
341    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
342  }
343
344  /**
345   * This test creates an hfile and then the dir structures and files to verify that references to
346   * hfilelinks (created by snapshot clones) can be properly interpreted.
347   */
348  @Test
349  public void testReferenceToHFileLink() throws IOException {
350    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
351    Configuration testConf = new Configuration(this.conf);
352    CommonFSUtils.setRootDir(testConf, testDir);
353
354    // adding legal table name chars to verify regex handles it.
355    HRegionInfo hri = new HRegionInfo(TableName.valueOf("_original-evil-name"));
356    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
357      CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
358
359    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
360    // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file>
361    StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs)
362      .withFilePath(regionFs.createTempName()).withFileContext(meta).build();
363    writeStoreFile(writer);
364    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
365
366    // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table>
367    RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build();
368    HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
369      CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone);
370    Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY);
371    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
372    Path linkFilePath =
373      new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
374
375    // create splits of the link.
376    // <root>/clone/splitA/<cf>/<reftohfilelink>,
377    // <root>/clone/splitB/<cf>/<reftohfilelink>
378    HRegionInfo splitHriA = new HRegionInfo(hri.getTable(), null, SPLITKEY);
379    HRegionInfo splitHriB = new HRegionInfo(hri.getTable(), SPLITKEY, null);
380    HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true);
381    f.initReader();
382    Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true); // top
383    Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false);// bottom
384    f.closeStoreFile(true);
385    // OK test the thing
386    CommonFSUtils.logFileSystemState(fs, testDir, LOG);
387
388    // There is a case where a file with the hfilelink pattern is actually a daughter
389    // reference to a hfile link. This code in StoreFile that handles this case.
390
391    // Try to open store file from link
392    HStoreFile hsfA = new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true);
393    hsfA.initReader();
394
395    // Now confirm that I can read from the ref to link
396    int count = 1;
397    HFileScanner s = hsfA.getReader().getScanner(false, false);
398    s.seekTo();
399    while (s.next()) {
400      count++;
401    }
402    assertTrue(count > 0); // read some rows here
403
404    // Try to open store file from link
405    HStoreFile hsfB = new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true);
406    hsfB.initReader();
407
408    // Now confirm that I can read from the ref to link
409    HFileScanner sB = hsfB.getReader().getScanner(false, false);
410    sB.seekTo();
411
412    // count++ as seekTo() will advance the scanner
413    count++;
414    while (sB.next()) {
415      count++;
416    }
417
418    // read the rest of the rows
419    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
420  }
421
422  private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f)
423    throws IOException {
424    f.initReader();
425    Cell midkey = f.getReader().midKey().get();
426    KeyValue midKV = (KeyValue) midkey;
427    // 1. test using the midRow as the splitKey, this test will generate two Reference files
428    // in the children
429    byte[] midRow = CellUtil.cloneRow(midKV);
430    // Create top split.
431    HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTable(), null, midRow);
432    Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true);
433    // Create bottom split.
434    HRegionInfo bottomHri = new HRegionInfo(regionFs.getRegionInfo().getTable(), midRow, null);
435    Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false);
436    // Make readers on top and bottom.
437    HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
438    topF.initReader();
439    StoreFileReader top = topF.getReader();
440    HStoreFile bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
441    bottomF.initReader();
442    StoreFileReader bottom = bottomF.getReader();
443    ByteBuffer previous = null;
444    LOG.info("Midkey: " + midKV.toString());
445    ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey());
446    try {
447      // Now make two HalfMapFiles and assert they can read the full backing
448      // file, one from the top and the other from the bottom.
449      // Test bottom half first.
450      // Now test reading from the top.
451      boolean first = true;
452      ByteBuffer key = null;
453      HFileScanner topScanner = top.getScanner(false, false);
454      while (
455        (!topScanner.isSeeked() && topScanner.seekTo())
456          || (topScanner.isSeeked() && topScanner.next())
457      ) {
458        key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
459
460        if (
461          (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(),
462            key.arrayOffset(), key.limit())) > 0
463        ) {
464          fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey);
465        }
466        if (first) {
467          first = false;
468          LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key)));
469        }
470      }
471      LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key)));
472
473      first = true;
474      HFileScanner bottomScanner = bottom.getScanner(false, false);
475      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
476        previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
477        key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
478        if (first) {
479          first = false;
480          LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
481        }
482        assertTrue(key.compareTo(bbMidkeyBytes) < 0);
483      }
484      if (previous != null) {
485        LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous)));
486      }
487      // Remove references.
488      regionFs.cleanupDaughterRegion(topHri);
489      regionFs.cleanupDaughterRegion(bottomHri);
490
491      // 2. test using a midkey which will generate one Reference file and one HFileLink file.
492      // First, do a key that is < than first key. Ensure splits behave
493      // properly.
494      byte[] badmidkey = Bytes.toBytes("  .");
495      assertTrue(fs.exists(f.getPath()));
496      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
497      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
498
499      assertNull(bottomPath);
500
501      topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true);
502      topF.initReader();
503      top = topF.getReader();
504      // Now read from the top.
505      first = true;
506      topScanner = top.getScanner(false, false);
507      KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue();
508      while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) {
509        key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey());
510        keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit());
511        assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV,
512          badmidkey, 0, badmidkey.length) >= 0);
513        if (first) {
514          first = false;
515          KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
516          LOG.info("First top when key < bottom: " + keyKV);
517          String tmp =
518            Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
519          for (int i = 0; i < tmp.length(); i++) {
520            assertTrue(tmp.charAt(i) == 'a');
521          }
522        }
523      }
524      KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key);
525      LOG.info("Last top when key < bottom: " + keyKV);
526      String tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
527      for (int i = 0; i < tmp.length(); i++) {
528        assertTrue(tmp.charAt(i) == 'z');
529      }
530      // Remove references.
531      regionFs.cleanupDaughterRegion(topHri);
532      regionFs.cleanupDaughterRegion(bottomHri);
533
534      // Test when badkey is > than last key in file ('||' > 'zz').
535      badmidkey = Bytes.toBytes("|||");
536      topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true);
537      bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false);
538      assertNull(topPath);
539
540      bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true);
541      bottomF.initReader();
542      bottom = bottomF.getReader();
543      first = true;
544      bottomScanner = bottom.getScanner(false, false);
545      while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) {
546        key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey());
547        if (first) {
548          first = false;
549          keyKV = KeyValueUtil.createKeyValueFromKey(key);
550          LOG.info("First bottom when key > top: " + keyKV);
551          tmp = Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength());
552          for (int i = 0; i < tmp.length(); i++) {
553            assertTrue(tmp.charAt(i) == 'a');
554          }
555        }
556      }
557      keyKV = KeyValueUtil.createKeyValueFromKey(key);
558      LOG.info("Last bottom when key > top: " + keyKV);
559      for (int i = 0; i < tmp.length(); i++) {
560        assertTrue(
561          Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()).charAt(i)
562              == 'z');
563      }
564    } finally {
565      if (top != null) {
566        top.close(true); // evict since we are about to delete the file
567      }
568      if (bottom != null) {
569        bottom.close(true); // evict since we are about to delete the file
570      }
571      fs.delete(f.getPath(), true);
572    }
573  }
574
575  private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks,
576    boolean pread) {
577    return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false);
578  }
579
580  private static final String localFormatter = "%010d";
581
582  private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception {
583    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
584    Path f = writer.getPath();
585    long now = EnvironmentEdgeManager.currentTime();
586    for (int i = 0; i < 2000; i += 2) {
587      String row = String.format(localFormatter, i);
588      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
589        now, Bytes.toBytes("value"));
590      writer.append(kv);
591    }
592    writer.close();
593
594    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
595    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
596    storeFileInfo.initHFileInfo(context);
597    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
598    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
599    reader.loadFileInfo();
600    reader.loadBloomfilter();
601    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
602
603    // check false positives rate
604    int falsePos = 0;
605    int falseNeg = 0;
606    for (int i = 0; i < 2000; i++) {
607      String row = String.format(localFormatter, i);
608      TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
609      columns.add(Bytes.toBytes("family:col"));
610
611      Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
612      scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col"));
613      HStore store = mock(HStore.class);
614      when(store.getColumnFamilyDescriptor())
615        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
616      boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
617      if (i % 2 == 0) {
618        if (!exists) {
619          falseNeg++;
620        }
621      } else {
622        if (exists) {
623          falsePos++;
624        }
625      }
626    }
627    reader.close(true); // evict because we are about to delete the file
628    fs.delete(f, true);
629    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
630    int maxFalsePos = (int) (2 * 2000 * err);
631    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
632      + maxFalsePos + ")", falsePos <= maxFalsePos);
633  }
634
635  private static final int BLOCKSIZE_SMALL = 8192;
636
637  @Test
638  public void testBloomFilter() throws Exception {
639    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
640    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
641
642    // write the file
643    if (!fs.exists(ROOT_DIR)) {
644      fs.mkdirs(ROOT_DIR);
645    }
646    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
647    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
648      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
649    // Make a store file and write data to it.
650    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
651      .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build();
652    bloomWriteRead(writer, fs);
653  }
654
655  @Test
656  public void testDeleteFamilyBloomFilter() throws Exception {
657    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01);
658    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
659    float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0);
660
661    // write the file
662    if (!fs.exists(ROOT_DIR)) {
663      fs.mkdirs(ROOT_DIR);
664    }
665    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
666
667    HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
668      .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
669    // Make a store file and write data to it.
670    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
671      .withMaxKeyCount(2000).withFileContext(meta).build();
672
673    // add delete family
674    long now = EnvironmentEdgeManager.currentTime();
675    for (int i = 0; i < 2000; i += 2) {
676      String row = String.format(localFormatter, i);
677      KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"),
678        now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value"));
679      writer.append(kv);
680    }
681    writer.close();
682
683    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
684    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
685    storeFileInfo.initHFileInfo(context);
686    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
687    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
688    reader.loadFileInfo();
689    reader.loadBloomfilter();
690
691    // check false positives rate
692    int falsePos = 0;
693    int falseNeg = 0;
694    for (int i = 0; i < 2000; i++) {
695      String row = String.format(localFormatter, i);
696      byte[] rowKey = Bytes.toBytes(row);
697      boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length);
698      if (i % 2 == 0) {
699        if (!exists) {
700          falseNeg++;
701        }
702      } else {
703        if (exists) {
704          falsePos++;
705        }
706      }
707    }
708    assertEquals(1000, reader.getDeleteFamilyCnt());
709    reader.close(true); // evict because we are about to delete the file
710    fs.delete(f, true);
711    assertEquals("False negatives: " + falseNeg, 0, falseNeg);
712    int maxFalsePos = (int) (2 * 2000 * err);
713    assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than "
714      + maxFalsePos, falsePos <= maxFalsePos);
715  }
716
717  /**
718   * Test for HBASE-8012
719   */
720  @Test
721  public void testReseek() throws Exception {
722    // write the file
723    if (!fs.exists(ROOT_DIR)) {
724      fs.mkdirs(ROOT_DIR);
725    }
726    Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
727
728    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
729    // Make a store file and write data to it.
730    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
731      .withFileContext(meta).build();
732
733    writeStoreFile(writer);
734    writer.close();
735
736    ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build();
737    StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
738    storeFileInfo.initHFileInfo(context);
739    StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
740    storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
741
742    // Now do reseek with empty KV to position to the beginning of the file
743
744    KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY);
745    StoreFileScanner s = getStoreFileScanner(reader, false, false);
746    s.reseek(k);
747
748    assertNotNull("Intial reseek should position at the beginning of the file", s.peek());
749  }
750
751  @Test
752  public void testBloomTypes() throws Exception {
753    float err = (float) 0.01;
754    FileSystem fs = FileSystem.getLocal(conf);
755    conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err);
756    conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true);
757
758    int rowCount = 50;
759    int colCount = 10;
760    int versions = 2;
761
762    // run once using columns and once using rows
763    BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW };
764    int[] expKeys = { rowCount * colCount, rowCount };
765    // below line deserves commentary. it is expected bloom false positives
766    // column = rowCount*2*colCount inserts
767    // row-level = only rowCount*2 inserts, but failures will be magnified by
768    // 2nd for loop for every column (2*colCount)
769    float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err };
770
771    if (!fs.exists(ROOT_DIR)) {
772      fs.mkdirs(ROOT_DIR);
773    }
774    for (int x : new int[] { 0, 1 }) {
775      // write the file
776      Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR);
777
778      HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL)
779        .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build();
780      // Make a store file and write data to it.
781      StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f)
782        .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build();
783
784      long now = EnvironmentEdgeManager.currentTime();
785      for (int i = 0; i < rowCount * 2; i += 2) { // rows
786        for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers
787          String row = String.format(localFormatter, i);
788          String col = String.format(localFormatter, j);
789          for (int k = 0; k < versions; ++k) { // versions
790            KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"),
791              Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L));
792            writer.append(kv);
793          }
794        }
795      }
796      writer.close();
797
798      ReaderContext context =
799        new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen())
800          .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build();
801      StoreFileInfo storeFileInfo = new StoreFileInfo(conf, fs, f, true);
802      storeFileInfo.initHFileInfo(context);
803      StoreFileReader reader = storeFileInfo.createReader(context, cacheConf);
804      storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader());
805      reader.loadFileInfo();
806      reader.loadBloomfilter();
807      StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
808      assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount());
809
810      HStore store = mock(HStore.class);
811      when(store.getColumnFamilyDescriptor())
812        .thenReturn(ColumnFamilyDescriptorBuilder.of("family"));
813      // check false positives rate
814      int falsePos = 0;
815      int falseNeg = 0;
816      for (int i = 0; i < rowCount * 2; ++i) { // rows
817        for (int j = 0; j < colCount * 2; ++j) { // column qualifiers
818          String row = String.format(localFormatter, i);
819          String col = String.format(localFormatter, j);
820          TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
821          columns.add(Bytes.toBytes("col" + col));
822
823          Scan scan =
824            new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true);
825          scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col)));
826
827          boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);
828          boolean shouldRowExist = i % 2 == 0;
829          boolean shouldColExist = j % 2 == 0;
830          shouldColExist = shouldColExist || bt[x] == BloomType.ROW;
831          if (shouldRowExist && shouldColExist) {
832            if (!exists) {
833              falseNeg++;
834            }
835          } else {
836            if (exists) {
837              falsePos++;
838            }
839          }
840        }
841      }
842      reader.close(true); // evict because we are about to delete the file
843      fs.delete(f, true);
844      System.out.println(bt[x].toString());
845      System.out.println("  False negatives: " + falseNeg);
846      System.out.println("  False positives: " + falsePos);
847      assertEquals(0, falseNeg);
848      assertTrue(falsePos < 2 * expErr[x]);
849    }
850  }
851
852  @Test
853  public void testSeqIdComparator() {
854    assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"),
855      mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"),
856      mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"),
857      mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"),
858      mockStoreFile(false, 76, -1, 5, "/foo/3"));
859  }
860
861  /**
862   * Assert that the given comparator orders the given storefiles in the same way that they're
863   * passed.
864   */
865  private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) {
866    ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs);
867    Collections.shuffle(sorted);
868    Collections.sort(sorted, comparator);
869    LOG.debug("sfs: " + Joiner.on(",").join(sfs));
870    LOG.debug("sorted: " + Joiner.on(",").join(sorted));
871    assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted));
872  }
873
874  /**
875   * Create a mock StoreFile with the given attributes.
876   */
877  private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId,
878    String path) {
879    HStoreFile mock = Mockito.mock(HStoreFile.class);
880    StoreFileReader reader = Mockito.mock(StoreFileReader.class);
881
882    Mockito.doReturn(size).when(reader).length();
883
884    Mockito.doReturn(reader).when(mock).getReader();
885    Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult();
886    Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp();
887    Mockito.doReturn(seqId).when(mock).getMaxSequenceId();
888    Mockito.doReturn(new Path(path)).when(mock).getPath();
889    String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp
890      + " seqId=" + seqId + " path=" + path;
891    Mockito.doReturn(name).when(mock).toString();
892    return mock;
893  }
894
895  /**
896   * Generate a list of KeyValues for testing based on given parameters
897   * @return the rows key-value list
898   */
899  List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) {
900    List<KeyValue> kvList = new ArrayList<>();
901    for (int i = 1; i <= numRows; i++) {
902      byte[] b = Bytes.toBytes(i);
903      LOG.info(Bytes.toString(b));
904      LOG.info(Bytes.toString(b));
905      for (long timestamp : timestamps) {
906        kvList.add(new KeyValue(b, family, qualifier, timestamp, b));
907      }
908    }
909    return kvList;
910  }
911
912  /**
913   * Test to ensure correctness when using StoreFile with multiple timestamps
914   */
915  @Test
916  public void testMultipleTimestamps() throws IOException {
917    byte[] family = Bytes.toBytes("familyname");
918    byte[] qualifier = Bytes.toBytes("qualifier");
919    int numRows = 10;
920    long[] timestamps = new long[] { 20, 10, 5, 1 };
921    Scan scan = new Scan();
922
923    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
924    Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family));
925    Path dir = new Path(storedir, "1234567890");
926    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
927    // Make a store file and write data to it.
928    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
929      .withOutputDir(dir).withFileContext(meta).build();
930
931    List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family);
932
933    for (KeyValue kv : kvList) {
934      writer.append(kv);
935    }
936    writer.appendMetadata(0, false);
937    writer.close();
938
939    HStoreFile hsf =
940      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
941    HStore store = mock(HStore.class);
942    when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family));
943    hsf.initReader();
944    StoreFileReader reader = hsf.getReader();
945    StoreFileScanner scanner = getStoreFileScanner(reader, false, false);
946    TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR);
947    columns.add(qualifier);
948
949    scan.setTimeRange(20, 100);
950    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
951
952    scan.setTimeRange(1, 2);
953    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
954
955    scan.setTimeRange(8, 10);
956    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
957
958    // lets make sure it still works with column family time ranges
959    scan.setColumnFamilyTimeRange(family, 7, 50);
960    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
961
962    // This test relies on the timestamp range optimization
963    scan = new Scan();
964    scan.setTimeRange(27, 50);
965    assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
966
967    // should still use the scanner because we override the family time range
968    scan = new Scan();
969    scan.setTimeRange(27, 50);
970    scan.setColumnFamilyTimeRange(family, 7, 50);
971    assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE));
972  }
973
974  @Test
975  public void testCacheOnWriteEvictOnClose() throws Exception {
976    Configuration conf = this.conf;
977
978    // Find a home for our files (regiondir ("7e0102") and familyname).
979    Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC");
980
981    // Grab the block cache and get the initial hit/miss counts
982    BlockCache bc = BlockCacheFactory.createBlockCache(conf);
983    assertNotNull(bc);
984    CacheStats cs = bc.getStats();
985    long startHit = cs.getHitCount();
986    long startMiss = cs.getMissCount();
987    long startEvicted = cs.getEvictedCount();
988
989    // Let's write a StoreFile with three blocks, with cache on write off
990    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false);
991    CacheConfig cacheConf = new CacheConfig(conf, bc);
992    Path pathCowOff = new Path(baseDir, "123456789");
993    StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3);
994    HStoreFile hsf =
995      new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
996    LOG.debug(hsf.getPath().toString());
997
998    // Read this file, we should see 3 misses
999    hsf.initReader();
1000    StoreFileReader reader = hsf.getReader();
1001    reader.loadFileInfo();
1002    StoreFileScanner scanner = getStoreFileScanner(reader, true, true);
1003    scanner.seek(KeyValue.LOWESTKEY);
1004    while (scanner.next() != null) {
1005      continue;
1006    }
1007    assertEquals(startHit, cs.getHitCount());
1008    assertEquals(startMiss + 3, cs.getMissCount());
1009    assertEquals(startEvicted, cs.getEvictedCount());
1010    startMiss += 3;
1011    scanner.close();
1012    reader.close(cacheConf.shouldEvictOnClose());
1013
1014    // Now write a StoreFile with three blocks, with cache on write on
1015    conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);
1016    cacheConf = new CacheConfig(conf, bc);
1017    Path pathCowOn = new Path(baseDir, "123456788");
1018    writer = writeStoreFile(conf, cacheConf, pathCowOn, 3);
1019    hsf = new HStoreFile(this.fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1020
1021    // Read this file, we should see 3 hits
1022    hsf.initReader();
1023    reader = hsf.getReader();
1024    scanner = getStoreFileScanner(reader, true, true);
1025    scanner.seek(KeyValue.LOWESTKEY);
1026    while (scanner.next() != null) {
1027      continue;
1028    }
1029    assertEquals(startHit + 3, cs.getHitCount());
1030    assertEquals(startMiss, cs.getMissCount());
1031    assertEquals(startEvicted, cs.getEvictedCount());
1032    startHit += 3;
1033    scanner.close();
1034    reader.close(cacheConf.shouldEvictOnClose());
1035
1036    // Let's read back the two files to ensure the blocks exactly match
1037    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1038    hsf.initReader();
1039    StoreFileReader readerOne = hsf.getReader();
1040    readerOne.loadFileInfo();
1041    StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true);
1042    scannerOne.seek(KeyValue.LOWESTKEY);
1043    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1044    hsf.initReader();
1045    StoreFileReader readerTwo = hsf.getReader();
1046    readerTwo.loadFileInfo();
1047    StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true);
1048    scannerTwo.seek(KeyValue.LOWESTKEY);
1049    Cell kv1 = null;
1050    Cell kv2 = null;
1051    while ((kv1 = scannerOne.next()) != null) {
1052      kv2 = scannerTwo.next();
1053      assertTrue(kv1.equals(kv2));
1054      KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1);
1055      KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2);
1056      assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(),
1057        keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0);
1058      assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(),
1059        kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0);
1060    }
1061    assertNull(scannerTwo.next());
1062    assertEquals(startHit + 6, cs.getHitCount());
1063    assertEquals(startMiss, cs.getMissCount());
1064    assertEquals(startEvicted, cs.getEvictedCount());
1065    startHit += 6;
1066    scannerOne.close();
1067    readerOne.close(cacheConf.shouldEvictOnClose());
1068    scannerTwo.close();
1069    readerTwo.close(cacheConf.shouldEvictOnClose());
1070
1071    // Let's close the first file with evict on close turned on
1072    conf.setBoolean("hbase.rs.evictblocksonclose", true);
1073    cacheConf = new CacheConfig(conf, bc);
1074    hsf = new HStoreFile(this.fs, pathCowOff, conf, cacheConf, BloomType.NONE, true);
1075    hsf.initReader();
1076    reader = hsf.getReader();
1077    reader.close(cacheConf.shouldEvictOnClose());
1078
1079    // We should have 3 new evictions but the evict count stat should not change. Eviction because
1080    // of HFile invalidation is not counted along with normal evictions
1081    assertEquals(startHit, cs.getHitCount());
1082    assertEquals(startMiss, cs.getMissCount());
1083    assertEquals(startEvicted, cs.getEvictedCount());
1084
1085    // Let's close the second file with evict on close turned off
1086    conf.setBoolean("hbase.rs.evictblocksonclose", false);
1087    cacheConf = new CacheConfig(conf, bc);
1088    hsf = new HStoreFile(this.fs, pathCowOn, conf, cacheConf, BloomType.NONE, true);
1089    hsf.initReader();
1090    reader = hsf.getReader();
1091    reader.close(cacheConf.shouldEvictOnClose());
1092
1093    // We expect no changes
1094    assertEquals(startHit, cs.getHitCount());
1095    assertEquals(startMiss, cs.getMissCount());
1096    assertEquals(startEvicted, cs.getEvictedCount());
1097  }
1098
1099  private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri,
1100    final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef)
1101    throws IOException {
1102    Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null);
1103    if (null == path) {
1104      return null;
1105    }
1106    List<Path> splitFiles = new ArrayList<>();
1107    splitFiles.add(path);
1108    MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class);
1109    MasterServices mockServices = mock(MasterServices.class);
1110    when(mockEnv.getMasterServices()).thenReturn(mockServices);
1111    when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration());
1112    TableDescriptors mockTblDescs = mock(TableDescriptors.class);
1113    when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs);
1114    TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable())
1115      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build();
1116    when(mockTblDescs.get(any())).thenReturn(mockTblDesc);
1117    Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv);
1118    return new Path(new Path(regionDir, family), path.getName());
1119  }
1120
1121  private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path,
1122    int numBlocks) throws IOException {
1123    // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs
1124    int numKVs = 5 * numBlocks;
1125    List<KeyValue> kvs = new ArrayList<>(numKVs);
1126    byte[] b = Bytes.toBytes("x");
1127    int totalSize = 0;
1128    for (int i = numKVs; i > 0; i--) {
1129      KeyValue kv = new KeyValue(b, b, b, i, b);
1130      kvs.add(kv);
1131      // kv has memstoreTS 0, which takes 1 byte to store.
1132      totalSize += kv.getLength() + 1;
1133    }
1134    int blockSize = totalSize / numBlocks;
1135    HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE)
1136      .withBytesPerCheckSum(CKBYTES).build();
1137    // Make a store file and write data to it.
1138    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1139      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1140    // We'll write N-1 KVs to ensure we don't write an extra block
1141    kvs.remove(kvs.size() - 1);
1142    for (KeyValue kv : kvs) {
1143      writer.append(kv);
1144    }
1145    writer.appendMetadata(0, false);
1146    writer.close();
1147    return writer;
1148  }
1149
1150  /**
1151   * Check if data block encoding information is saved correctly in HFile's file info.
1152   */
1153  @Test
1154  public void testDataBlockEncodingMetaData() throws IOException {
1155    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1156    Path dir = new Path(new Path(testDir, "7e0102"), "familyname");
1157    Path path = new Path(dir, "1234567890");
1158
1159    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1160    cacheConf = new CacheConfig(conf);
1161    HFileContext meta =
1162      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1163        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1164    // Make a store file and write data to it.
1165    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1166      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1167    writer.close();
1168
1169    HStoreFile storeFile =
1170      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1171    storeFile.initReader();
1172    StoreFileReader reader = storeFile.getReader();
1173
1174    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1175    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1176    assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
1177  }
1178
1179  @Test
1180  public void testDataBlockSizeEncoded() throws Exception {
1181    // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname.
1182    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1183    Path path = new Path(dir, "1234567890");
1184
1185    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1186
1187    conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
1188
1189    cacheConf = new CacheConfig(conf);
1190    HFileContext meta =
1191      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1192        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
1193    // Make a store file and write data to it.
1194    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1195      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1196    writeStoreFile(writer);
1197
1198    HStoreFile storeFile =
1199      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1200    storeFile.initReader();
1201    StoreFileReader reader = storeFile.getReader();
1202
1203    Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
1204    byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
1205    assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
1206
1207    HFile.Reader fReader =
1208      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1209
1210    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1211    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1212    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1213    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1214    HFileBlock block;
1215    while (offset <= max) {
1216      block = fReader.readBlock(offset, -1, /* cacheBlock */
1217        false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */
1218        false, null, null);
1219      offset += block.getOnDiskSizeWithHeader();
1220      double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
1221      if (offset <= max) {
1222        assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05));
1223      }
1224    }
1225  }
1226
1227  @Test
1228  public void testDataBlockSizeCompressed() throws Exception {
1229    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR,
1230      PreviousBlockCompressionRatePredicator.class.getName());
1231    testDataBlockSizeWithCompressionRatePredicator(12,
1232      (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true);
1233  }
1234
1235  @Test
1236  public void testDataBlockSizeUnCompressed() throws Exception {
1237    conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName());
1238    testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10);
1239  }
1240
1241  private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount,
1242    BiFunction<Integer, Integer, Boolean> validation) throws Exception {
1243    Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
1244    Path path = new Path(dir, "1234567890");
1245    DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
1246    cacheConf = new CacheConfig(conf);
1247    HFileContext meta =
1248      new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
1249        .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo)
1250        .withCompression(Compression.Algorithm.GZ).build();
1251    // Make a store file and write data to it.
1252    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs)
1253      .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
1254    writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()),
1255      Bytes.toBytes(name.getMethodName()), 200);
1256    writer.close();
1257    HStoreFile storeFile =
1258      new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE, true);
1259    storeFile.initReader();
1260    HFile.Reader fReader =
1261      HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf);
1262    FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath());
1263    long fileSize = fs.getFileStatus(writer.getPath()).getLen();
1264    FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
1265    long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset();
1266    HFileBlock block;
1267    int blockCount = 0;
1268    while (offset <= max) {
1269      block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false,
1270        /* isCompaction */ false, /* updateCacheMetrics */ false, null, null);
1271      offset += block.getOnDiskSizeWithHeader();
1272      blockCount++;
1273      assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount));
1274    }
1275    assertEquals(expectedBlockCount, blockCount);
1276  }
1277
1278}