001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR; 021import static org.junit.Assert.assertArrayEquals; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertFalse; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNull; 026import static org.junit.Assert.assertTrue; 027import static org.junit.Assert.fail; 028import static org.mockito.ArgumentMatchers.any; 029import static org.mockito.Mockito.mock; 030import static org.mockito.Mockito.when; 031 032import java.io.IOException; 033import java.nio.ByteBuffer; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.List; 039import java.util.Map; 040import java.util.OptionalLong; 041import java.util.TreeSet; 042import java.util.function.BiFunction; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FileSystem; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.hbase.Cell; 047import org.apache.hadoop.hbase.CellUtil; 048import org.apache.hadoop.hbase.ExtendedCell; 049import org.apache.hadoop.hbase.HBaseClassTestRule; 050import org.apache.hadoop.hbase.HBaseTestingUtil; 051import org.apache.hadoop.hbase.HConstants; 052import org.apache.hadoop.hbase.KeyValue; 053import org.apache.hadoop.hbase.KeyValueUtil; 054import org.apache.hadoop.hbase.PrivateCellUtil; 055import org.apache.hadoop.hbase.TableDescriptors; 056import org.apache.hadoop.hbase.TableName; 057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 058import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 059import org.apache.hadoop.hbase.client.RegionInfo; 060import org.apache.hadoop.hbase.client.RegionInfoBuilder; 061import org.apache.hadoop.hbase.client.Scan; 062import org.apache.hadoop.hbase.client.TableDescriptor; 063import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 064import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 065import org.apache.hadoop.hbase.io.HFileLink; 066import org.apache.hadoop.hbase.io.compress.Compression; 067import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 068import org.apache.hadoop.hbase.io.hfile.BlockCache; 069import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; 070import org.apache.hadoop.hbase.io.hfile.CacheConfig; 071import org.apache.hadoop.hbase.io.hfile.CacheStats; 072import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; 073import org.apache.hadoop.hbase.io.hfile.HFile; 074import org.apache.hadoop.hbase.io.hfile.HFileBlock; 075import org.apache.hadoop.hbase.io.hfile.HFileContext; 076import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 077import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 078import org.apache.hadoop.hbase.io.hfile.HFileScanner; 079import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator; 080import org.apache.hadoop.hbase.io.hfile.ReaderContext; 081import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 082import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator; 083import org.apache.hadoop.hbase.master.MasterServices; 084import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 085import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 086import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 087import org.apache.hadoop.hbase.testclassification.MediumTests; 088import org.apache.hadoop.hbase.testclassification.RegionServerTests; 089import org.apache.hadoop.hbase.util.BloomFilterFactory; 090import org.apache.hadoop.hbase.util.Bytes; 091import org.apache.hadoop.hbase.util.ChecksumType; 092import org.apache.hadoop.hbase.util.CommonFSUtils; 093import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 094import org.junit.AfterClass; 095import org.junit.Before; 096import org.junit.ClassRule; 097import org.junit.Rule; 098import org.junit.Test; 099import org.junit.experimental.categories.Category; 100import org.junit.rules.TestName; 101import org.mockito.Mockito; 102import org.slf4j.Logger; 103import org.slf4j.LoggerFactory; 104 105import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 106import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 107import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 108 109/** 110 * Test HStoreFile 111 */ 112@Category({ RegionServerTests.class, MediumTests.class }) 113public class TestHStoreFile { 114 115 @ClassRule 116 public static final HBaseClassTestRule CLASS_RULE = 117 HBaseClassTestRule.forClass(TestHStoreFile.class); 118 119 private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class); 120 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 121 private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); 122 private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile"); 123 private static final ChecksumType CKTYPE = ChecksumType.CRC32C; 124 private static final int CKBYTES = 512; 125 private static String TEST_FAMILY = "cf"; 126 private static final char FIRST_CHAR = 'a'; 127 private static final char LAST_CHAR = 'z'; 128 129 @Rule 130 public TestName name = new TestName(); 131 132 private Configuration conf; 133 private Path testDir; 134 private FileSystem fs; 135 136 @Before 137 public void setUp() throws IOException { 138 conf = TEST_UTIL.getConfiguration(); 139 testDir = TEST_UTIL.getDataTestDir(name.getMethodName()); 140 fs = testDir.getFileSystem(conf); 141 } 142 143 @AfterClass 144 public static void tearDownAfterClass() { 145 TEST_UTIL.cleanupTestDir(); 146 } 147 148 /** 149 * Write a file and then assert that we can read from top and bottom halves using two 150 * HalfMapFiles, as well as one HalfMapFile and one HFileLink file. 151 */ 152 @Test 153 public void testBasicHalfAndHFileLinkMapFile() throws Exception { 154 final RegionInfo hri = 155 RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build(); 156 // The locations of HFileLink refers hfiles only should be consistent with the table dir 157 // create by CommonFSUtils directory, so we should make the region directory under 158 // the mode of CommonFSUtils.getTableDir here. 159 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 160 CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri); 161 162 HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build(); 163 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 164 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 165 writeStoreFile(writer); 166 167 Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 168 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 169 StoreContext.getBuilder() 170 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 171 .withRegionFileSystem(regionFs).build()); 172 HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true, sft); 173 checkHalfHFile(regionFs, sf, sft); 174 } 175 176 private void writeStoreFile(final StoreFileWriter writer) throws IOException { 177 writeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 178 Bytes.toBytes(name.getMethodName())); 179 } 180 181 // pick an split point (roughly halfway) 182 byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR }; 183 184 /* 185 * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it. 186 */ 187 public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier) 188 throws IOException { 189 long now = EnvironmentEdgeManager.currentTime(); 190 try { 191 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 192 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 193 byte[] b = new byte[] { (byte) d, (byte) e }; 194 writer.append(new KeyValue(b, fam, qualifier, now, b)); 195 } 196 } 197 } finally { 198 writer.close(); 199 } 200 } 201 202 public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier, 203 int rounds) throws IOException { 204 long now = EnvironmentEdgeManager.currentTime(); 205 try { 206 for (int i = 0; i < rounds; i++) { 207 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 208 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 209 byte[] b = new byte[] { (byte) d, (byte) e }; 210 byte[] key = new byte[] { (byte) i }; 211 writer.append(new KeyValue(key, fam, qualifier, now, b)); 212 } 213 } 214 } 215 } finally { 216 writer.close(); 217 } 218 } 219 220 /** 221 * Test that our mechanism of writing store files in one region to reference store files in other 222 * regions works. 223 */ 224 @Test 225 public void testReference() throws IOException { 226 final RegionInfo hri = 227 RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build(); 228 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 229 new Path(testDir, hri.getTable().getNameAsString()), hri); 230 231 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 232 // Make a store file and write data to it. 233 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 234 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 235 writeStoreFile(writer); 236 237 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 238 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 239 StoreContext.getBuilder() 240 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 241 .withRegionFileSystem(regionFs).build()); 242 HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 243 hsf.initReader(); 244 StoreFileReader reader = hsf.getReader(); 245 // Split on a row, not in middle of row. Midkey returned by reader 246 // may be in middle of row. Create new one with empty column and 247 // timestamp. 248 byte[] midRow = CellUtil.cloneRow(reader.midKey().get()); 249 byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get()); 250 hsf.closeStoreFile(true); 251 252 // Make a reference 253 RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build(); 254 Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true, sft); 255 HStoreFile refHsf = 256 new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true, sft); 257 refHsf.initReader(); 258 // Now confirm that I can read from the reference and that it only gets 259 // keys from top half of the file. 260 try (HFileScanner s = refHsf.getReader().getScanner(false, false, false)) { 261 Cell kv = null; 262 for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { 263 ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey()); 264 kv = KeyValueUtil.createKeyValueFromKey(bb); 265 if (first) { 266 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0, 267 midRow.length)); 268 first = false; 269 } 270 } 271 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0, 272 finalRow.length)); 273 } 274 } 275 276 @Test 277 public void testStoreFileReference() throws Exception { 278 final RegionInfo hri = 279 RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build(); 280 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 281 new Path(testDir, hri.getTable().getNameAsString()), hri); 282 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 283 284 // Make a store file and write data to it. 285 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 286 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 287 writeStoreFile(writer); 288 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 289 writer.close(); 290 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 291 StoreContext.getBuilder() 292 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 293 .withRegionFileSystem(regionFs).build()); 294 HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 295 file.initReader(); 296 StoreFileReader r = file.getReader(); 297 assertNotNull(r); 298 StoreFileScanner scanner = 299 new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false); 300 301 // Verify after instantiating scanner refCount is increased 302 assertTrue("Verify file is being referenced", file.isReferencedInReads()); 303 scanner.close(); 304 // Verify after closing scanner refCount is decreased 305 assertFalse("Verify file is not being referenced", file.isReferencedInReads()); 306 } 307 308 @Test 309 public void testEmptyStoreFileRestrictKeyRanges() throws Exception { 310 StoreFileReader reader = mock(StoreFileReader.class); 311 HStore store = mock(HStore.class); 312 byte[] cf = Bytes.toBytes("ty"); 313 ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf); 314 when(store.getColumnFamilyDescriptor()).thenReturn(cfd); 315 try (StoreFileScanner scanner = 316 new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) { 317 Scan scan = new Scan(); 318 scan.setColumnFamilyTimeRange(cf, 0, 1); 319 assertFalse(scanner.shouldUseScanner(scan, store, 0)); 320 } 321 } 322 323 @Test 324 public void testHFileLink() throws IOException { 325 final RegionInfo hri = 326 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 327 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 328 Configuration testConf = new Configuration(this.conf); 329 CommonFSUtils.setRootDir(testConf, testDir); 330 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 331 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 332 final RegionInfo dstHri = 333 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 334 HRegionFileSystem dstRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 335 CommonFSUtils.getTableDir(testDir, dstHri.getTable()), dstHri); 336 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 337 338 // Make a store file and write data to it. 339 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 340 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 341 writeStoreFile(writer); 342 343 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 344 Path dstPath = 345 new Path(regionFs.getTableDir(), new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)); 346 HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); 347 Path linkFilePath = 348 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 349 350 // Try to open store file from link 351 352 // this should be the SFT for the destination link file path, though it is not 353 // being used right now, for the next patch file link creation logic also would 354 // move to SFT interface. 355 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, false, 356 StoreContext.getBuilder() 357 .withFamilyStoreDirectoryPath(new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)) 358 .withRegionFileSystem(dstRegionFs).build()); 359 StoreFileInfo storeFileInfo = sft.getStoreFileInfo(linkFilePath, true); 360 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 361 assertTrue(storeFileInfo.isLink()); 362 hsf.initReader(); 363 364 // Now confirm that I can read from the link 365 int count = 0; 366 try (StoreFileScanner scanner = hsf.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 367 scanner.seek(KeyValue.LOWESTKEY); 368 while (scanner.next() != null) { 369 count++; 370 } 371 } 372 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 373 } 374 375 @Test 376 public void testsample() { 377 Path p1 = new Path("/r1/c1"); 378 Path p2 = new Path("f1"); 379 System.out.println(new Path(p1, p2).toString()); 380 } 381 382 /** 383 * This test creates an hfile and then the dir structures and files to verify that references to 384 * hfilelinks (created by snapshot clones) can be properly interpreted. 385 */ 386 @Test 387 public void testReferenceToHFileLink() throws IOException { 388 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 389 Configuration testConf = new Configuration(this.conf); 390 CommonFSUtils.setRootDir(testConf, testDir); 391 392 // adding legal table name chars to verify regex handles it. 393 RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build(); 394 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 395 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 396 397 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 398 // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file> 399 StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs) 400 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 401 writeStoreFile(writer); 402 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 403 404 // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table> 405 RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build(); 406 HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 407 CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone); 408 Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY); 409 HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName()); 410 Path linkFilePath = 411 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 412 413 // create splits of the link. 414 // <root>/clone/splitA/<cf>/<reftohfilelink>, 415 // <root>/clone/splitB/<cf>/<reftohfilelink> 416 RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build(); 417 RegionInfo splitHriB = 418 RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build(); 419 420 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, true, 421 StoreContext.getBuilder() 422 .withFamilyStoreDirectoryPath(new Path(hriClone.getRegionNameAsString(), TEST_FAMILY)) 423 .withRegionFileSystem(cloneRegionFs).build()); 424 425 HRegionFileSystem splitRegionAFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 426 CommonFSUtils.getTableDir(testDir, splitHriA.getTable()), splitHriA); 427 StoreFileTracker sftA = StoreFileTrackerFactory.create(testConf, true, 428 StoreContext.getBuilder() 429 .withFamilyStoreDirectoryPath(new Path(splitHriA.getRegionNameAsString(), TEST_FAMILY)) 430 .withRegionFileSystem(splitRegionAFs).build()); 431 HRegionFileSystem splitRegionBFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 432 CommonFSUtils.getTableDir(testDir, splitHriB.getTable()), splitHriB); 433 StoreFileTracker sftB = StoreFileTrackerFactory.create(testConf, true, 434 StoreContext.getBuilder() 435 .withFamilyStoreDirectoryPath(new Path(splitHriB.getRegionNameAsString(), TEST_FAMILY)) 436 .withRegionFileSystem(splitRegionBFs).build()); 437 HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true, sft); 438 f.initReader(); 439 // top 440 Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true, sft); 441 // bottom 442 Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false, sft); 443 f.closeStoreFile(true); 444 // OK test the thing 445 CommonFSUtils.logFileSystemState(fs, testDir, LOG); 446 447 // There is a case where a file with the hfilelink pattern is actually a daughter 448 // reference to a hfile link. This code in StoreFile that handles this case. 449 450 // Try to open store file from link 451 HStoreFile hsfA = 452 new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true, sftA); 453 hsfA.initReader(); 454 455 // Now confirm that I can read from the ref to link 456 int count = 0; 457 try (StoreFileScanner scanner = hsfA.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 458 scanner.seek(KeyValue.LOWESTKEY); 459 while (scanner.next() != null) { 460 count++; 461 } 462 assertTrue(count > 0); // read some rows here 463 } 464 465 // Try to open store file from link 466 HStoreFile hsfB = 467 new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true, sftB); 468 hsfB.initReader(); 469 470 // Now confirm that I can read from the ref to link 471 try (StoreFileScanner scanner = hsfB.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 472 scanner.seek(KeyValue.LOWESTKEY); 473 while (scanner.next() != null) { 474 count++; 475 } 476 } 477 478 // read the rest of the rows 479 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 480 } 481 482 private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f, 483 StoreFileTracker sft) throws IOException { 484 f.initReader(); 485 Cell midkey = f.getReader().midKey().get(); 486 KeyValue midKV = (KeyValue) midkey; 487 // 1. test using the midRow as the splitKey, this test will generate two Reference files 488 // in the children 489 byte[] midRow = CellUtil.cloneRow(midKV); 490 // Create top split. 491 RegionInfo topHri = 492 RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build(); 493 Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true, sft); 494 // Create bottom split. 495 RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()) 496 .setStartKey(SPLITKEY).build(); 497 Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false, sft); 498 // Make readers on top and bottom. 499 HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 500 topF.initReader(); 501 StoreFileReader top = topF.getReader(); 502 HStoreFile bottomF = 503 new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 504 bottomF.initReader(); 505 StoreFileReader bottom = bottomF.getReader(); 506 ByteBuffer previous = null; 507 LOG.info("Midkey: " + midKV.toString()); 508 ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey()); 509 try { 510 // Now make two HalfMapFiles and assert they can read the full backing 511 // file, one from the top and the other from the bottom. 512 // Test bottom half first. 513 // Now test reading from the top. 514 boolean first = true; 515 ByteBuffer key = null; 516 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 517 while ( 518 (!topScanner.isSeeked() && topScanner.seekTo()) 519 || (topScanner.isSeeked() && topScanner.next()) 520 ) { 521 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 522 523 if ( 524 (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(), 525 key.arrayOffset(), key.limit())) > 0 526 ) { 527 fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey); 528 } 529 if (first) { 530 first = false; 531 LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key))); 532 } 533 } 534 } 535 LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); 536 537 first = true; 538 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 539 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 540 previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 541 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 542 if (first) { 543 first = false; 544 LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 545 } 546 assertTrue(key.compareTo(bbMidkeyBytes) < 0); 547 } 548 if (previous != null) { 549 LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 550 } 551 } 552 // Remove references. 553 regionFs.cleanupDaughterRegion(topHri); 554 regionFs.cleanupDaughterRegion(bottomHri); 555 556 // 2. test using a midkey which will generate one Reference file and one HFileLink file. 557 // First, do a key that is < than first key. Ensure splits behave 558 // properly. 559 byte[] badmidkey = Bytes.toBytes(" ."); 560 assertTrue(fs.exists(f.getPath())); 561 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 562 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 563 564 assertNull(bottomPath); 565 566 topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 567 topF.initReader(); 568 top = topF.getReader(); 569 // Now read from the top. 570 first = true; 571 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 572 KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue(); 573 while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { 574 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 575 keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit()); 576 assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV, 577 badmidkey, 0, badmidkey.length) >= 0); 578 if (first) { 579 first = false; 580 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 581 LOG.info("First top when key < bottom: " + keyKV); 582 String tmp = 583 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 584 for (int i = 0; i < tmp.length(); i++) { 585 assertTrue(tmp.charAt(i) == 'a'); 586 } 587 } 588 } 589 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 590 LOG.info("Last top when key < bottom: " + keyKV); 591 String tmp = 592 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 593 for (int i = 0; i < tmp.length(); i++) { 594 assertTrue(tmp.charAt(i) == 'z'); 595 } 596 } 597 // Remove references. 598 regionFs.cleanupDaughterRegion(topHri); 599 regionFs.cleanupDaughterRegion(bottomHri); 600 601 // Test when badkey is > than last key in file ('||' > 'zz'). 602 badmidkey = Bytes.toBytes("|||"); 603 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 604 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 605 assertNull(topPath); 606 607 bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 608 bottomF.initReader(); 609 bottom = bottomF.getReader(); 610 first = true; 611 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 612 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 613 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 614 if (first) { 615 first = false; 616 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 617 LOG.info("First bottom when key > top: " + keyKV); 618 String tmp = 619 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 620 for (int i = 0; i < tmp.length(); i++) { 621 assertTrue(tmp.charAt(i) == 'a'); 622 } 623 } 624 } 625 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 626 LOG.info("Last bottom when key > top: " + keyKV); 627 String tmp = 628 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 629 for (int i = 0; i < tmp.length(); i++) { 630 assertTrue(Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()) 631 .charAt(i) == 'z'); 632 } 633 } 634 } finally { 635 if (top != null) { 636 top.close(true); // evict since we are about to delete the file 637 } 638 if (bottom != null) { 639 bottom.close(true); // evict since we are about to delete the file 640 } 641 fs.delete(f.getPath(), true); 642 } 643 } 644 645 private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks, 646 boolean pread) { 647 return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false); 648 } 649 650 private static final String localFormatter = "%010d"; 651 652 private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception { 653 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 654 Path f = writer.getPath(); 655 long now = EnvironmentEdgeManager.currentTime(); 656 for (int i = 0; i < 2000; i += 2) { 657 String row = String.format(localFormatter, i); 658 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 659 now, Bytes.toBytes("value")); 660 writer.append(kv); 661 } 662 writer.close(); 663 664 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 665 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 666 storeFileInfo.initHFileInfo(context); 667 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 668 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 669 reader.loadFileInfo(); 670 reader.loadBloomfilter(); 671 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 672 673 // check false positives rate 674 int falsePos = 0; 675 int falseNeg = 0; 676 for (int i = 0; i < 2000; i++) { 677 String row = String.format(localFormatter, i); 678 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 679 columns.add(Bytes.toBytes("family:col")); 680 681 Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 682 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col")); 683 HStore store = mock(HStore.class); 684 when(store.getColumnFamilyDescriptor()) 685 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 686 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 687 if (i % 2 == 0) { 688 if (!exists) { 689 falseNeg++; 690 } 691 } else { 692 if (exists) { 693 falsePos++; 694 } 695 } 696 } 697 reader.close(true); // evict because we are about to delete the file 698 fs.delete(f, true); 699 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 700 int maxFalsePos = (int) (2 * 2000 * err); 701 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 702 + maxFalsePos + ")", falsePos <= maxFalsePos); 703 } 704 705 private static final int BLOCKSIZE_SMALL = 8192; 706 707 @Test 708 public void testBloomFilter() throws Exception { 709 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 710 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 711 712 // write the file 713 if (!fs.exists(ROOT_DIR)) { 714 fs.mkdirs(ROOT_DIR); 715 } 716 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 717 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 718 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 719 // Make a store file and write data to it. 720 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 721 .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build(); 722 bloomWriteRead(writer, fs); 723 } 724 725 @Test 726 public void testDeleteFamilyBloomFilter() throws Exception { 727 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 728 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 729 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 730 731 // write the file 732 if (!fs.exists(ROOT_DIR)) { 733 fs.mkdirs(ROOT_DIR); 734 } 735 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 736 737 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 738 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 739 // Make a store file and write data to it. 740 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 741 .withMaxKeyCount(2000).withFileContext(meta).build(); 742 743 // add delete family 744 long now = EnvironmentEdgeManager.currentTime(); 745 for (int i = 0; i < 2000; i += 2) { 746 String row = String.format(localFormatter, i); 747 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 748 now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value")); 749 writer.append(kv); 750 } 751 writer.close(); 752 753 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 754 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 755 storeFileInfo.initHFileInfo(context); 756 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 757 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 758 reader.loadFileInfo(); 759 reader.loadBloomfilter(); 760 761 // check false positives rate 762 int falsePos = 0; 763 int falseNeg = 0; 764 for (int i = 0; i < 2000; i++) { 765 String row = String.format(localFormatter, i); 766 byte[] rowKey = Bytes.toBytes(row); 767 boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length); 768 if (i % 2 == 0) { 769 if (!exists) { 770 falseNeg++; 771 } 772 } else { 773 if (exists) { 774 falsePos++; 775 } 776 } 777 } 778 assertEquals(1000, reader.getDeleteFamilyCnt()); 779 reader.close(true); // evict because we are about to delete the file 780 fs.delete(f, true); 781 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 782 int maxFalsePos = (int) (2 * 2000 * err); 783 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 784 + maxFalsePos, falsePos <= maxFalsePos); 785 } 786 787 /** 788 * Test for HBASE-8012 789 */ 790 @Test 791 public void testReseek() throws Exception { 792 // write the file 793 if (!fs.exists(ROOT_DIR)) { 794 fs.mkdirs(ROOT_DIR); 795 } 796 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 797 798 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 799 // Make a store file and write data to it. 800 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 801 .withFileContext(meta).build(); 802 803 writeStoreFile(writer); 804 writer.close(); 805 806 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 807 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 808 storeFileInfo.initHFileInfo(context); 809 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 810 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 811 812 // Now do reseek with empty KV to position to the beginning of the file 813 814 KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); 815 StoreFileScanner s = getStoreFileScanner(reader, false, false); 816 s.reseek(k); 817 818 assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); 819 } 820 821 @Test 822 public void testBloomTypes() throws Exception { 823 float err = (float) 0.01; 824 FileSystem fs = FileSystem.getLocal(conf); 825 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err); 826 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 827 828 int rowCount = 50; 829 int colCount = 10; 830 int versions = 2; 831 832 // run once using columns and once using rows 833 BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW }; 834 int[] expKeys = { rowCount * colCount, rowCount }; 835 // below line deserves commentary. it is expected bloom false positives 836 // column = rowCount*2*colCount inserts 837 // row-level = only rowCount*2 inserts, but failures will be magnified by 838 // 2nd for loop for every column (2*colCount) 839 float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err }; 840 841 if (!fs.exists(ROOT_DIR)) { 842 fs.mkdirs(ROOT_DIR); 843 } 844 for (int x : new int[] { 0, 1 }) { 845 // write the file 846 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 847 848 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 849 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 850 // Make a store file and write data to it. 851 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 852 .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build(); 853 854 long now = EnvironmentEdgeManager.currentTime(); 855 for (int i = 0; i < rowCount * 2; i += 2) { // rows 856 for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers 857 String row = String.format(localFormatter, i); 858 String col = String.format(localFormatter, j); 859 for (int k = 0; k < versions; ++k) { // versions 860 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 861 Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L)); 862 writer.append(kv); 863 } 864 } 865 } 866 writer.close(); 867 868 ReaderContext context = 869 new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()) 870 .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build(); 871 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 872 storeFileInfo.initHFileInfo(context); 873 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 874 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 875 reader.loadFileInfo(); 876 reader.loadBloomfilter(); 877 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 878 assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount()); 879 880 HStore store = mock(HStore.class); 881 when(store.getColumnFamilyDescriptor()) 882 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 883 // check false positives rate 884 int falsePos = 0; 885 int falseNeg = 0; 886 for (int i = 0; i < rowCount * 2; ++i) { // rows 887 for (int j = 0; j < colCount * 2; ++j) { // column qualifiers 888 String row = String.format(localFormatter, i); 889 String col = String.format(localFormatter, j); 890 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 891 columns.add(Bytes.toBytes("col" + col)); 892 893 Scan scan = 894 new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 895 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col))); 896 897 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 898 boolean shouldRowExist = i % 2 == 0; 899 boolean shouldColExist = j % 2 == 0; 900 shouldColExist = shouldColExist || bt[x] == BloomType.ROW; 901 if (shouldRowExist && shouldColExist) { 902 if (!exists) { 903 falseNeg++; 904 } 905 } else { 906 if (exists) { 907 falsePos++; 908 } 909 } 910 } 911 } 912 reader.close(true); // evict because we are about to delete the file 913 fs.delete(f, true); 914 System.out.println(bt[x].toString()); 915 System.out.println(" False negatives: " + falseNeg); 916 System.out.println(" False positives: " + falsePos); 917 assertEquals(0, falseNeg); 918 assertTrue(falsePos < 2 * expErr[x]); 919 } 920 } 921 922 @Test 923 public void testSeqIdComparator() { 924 assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"), 925 mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"), 926 mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"), 927 mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"), 928 mockStoreFile(false, 76, -1, 5, "/foo/3")); 929 } 930 931 /** 932 * Assert that the given comparator orders the given storefiles in the same way that they're 933 * passed. 934 */ 935 private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) { 936 ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs); 937 Collections.shuffle(sorted); 938 Collections.sort(sorted, comparator); 939 LOG.debug("sfs: " + Joiner.on(",").join(sfs)); 940 LOG.debug("sorted: " + Joiner.on(",").join(sorted)); 941 assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted)); 942 } 943 944 /** 945 * Create a mock StoreFile with the given attributes. 946 */ 947 private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId, 948 String path) { 949 HStoreFile mock = Mockito.mock(HStoreFile.class); 950 StoreFileReader reader = Mockito.mock(StoreFileReader.class); 951 952 Mockito.doReturn(size).when(reader).length(); 953 954 Mockito.doReturn(reader).when(mock).getReader(); 955 Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult(); 956 Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp(); 957 Mockito.doReturn(seqId).when(mock).getMaxSequenceId(); 958 Mockito.doReturn(new Path(path)).when(mock).getPath(); 959 String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp 960 + " seqId=" + seqId + " path=" + path; 961 Mockito.doReturn(name).when(mock).toString(); 962 return mock; 963 } 964 965 /** 966 * Generate a list of KeyValues for testing based on given parameters 967 * @return the rows key-value list 968 */ 969 List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) { 970 List<KeyValue> kvList = new ArrayList<>(); 971 for (int i = 1; i <= numRows; i++) { 972 byte[] b = Bytes.toBytes(i); 973 LOG.info(Bytes.toString(b)); 974 LOG.info(Bytes.toString(b)); 975 for (long timestamp : timestamps) { 976 kvList.add(new KeyValue(b, family, qualifier, timestamp, b)); 977 } 978 } 979 return kvList; 980 } 981 982 /** 983 * Test to ensure correctness when using StoreFile with multiple timestamps 984 */ 985 @Test 986 public void testMultipleTimestamps() throws IOException { 987 byte[] family = Bytes.toBytes("familyname"); 988 byte[] qualifier = Bytes.toBytes("qualifier"); 989 int numRows = 10; 990 long[] timestamps = new long[] { 20, 10, 5, 1 }; 991 Scan scan = new Scan(); 992 993 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 994 Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family)); 995 Path dir = new Path(storedir, "1234567890"); 996 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 997 // Make a store file and write data to it. 998 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 999 .withOutputDir(dir).withFileContext(meta).build(); 1000 1001 List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family); 1002 1003 for (KeyValue kv : kvList) { 1004 writer.append(kv); 1005 } 1006 writer.appendMetadata(0, false); 1007 writer.close(); 1008 1009 StoreFileInfo storeFileInfo = 1010 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1011 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1012 HStore store = mock(HStore.class); 1013 when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family)); 1014 hsf.initReader(); 1015 StoreFileReader reader = hsf.getReader(); 1016 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 1017 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 1018 columns.add(qualifier); 1019 1020 scan.setTimeRange(20, 100); 1021 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1022 1023 scan.setTimeRange(1, 2); 1024 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1025 1026 scan.setTimeRange(8, 10); 1027 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1028 1029 // lets make sure it still works with column family time ranges 1030 scan.setColumnFamilyTimeRange(family, 7, 50); 1031 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1032 1033 // This test relies on the timestamp range optimization 1034 scan = new Scan(); 1035 scan.setTimeRange(27, 50); 1036 assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1037 1038 // should still use the scanner because we override the family time range 1039 scan = new Scan(); 1040 scan.setTimeRange(27, 50); 1041 scan.setColumnFamilyTimeRange(family, 7, 50); 1042 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1043 } 1044 1045 @Test 1046 public void testCacheOnWriteEvictOnClose() throws Exception { 1047 Configuration conf = this.conf; 1048 1049 // Find a home for our files (regiondir ("7e0102") and familyname). 1050 Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC"); 1051 1052 // Grab the block cache and get the initial hit/miss counts 1053 BlockCache bc = BlockCacheFactory.createBlockCache(conf); 1054 assertNotNull(bc); 1055 CacheStats cs = bc.getStats(); 1056 long startHit = cs.getHitCount(); 1057 long startMiss = cs.getMissCount(); 1058 long startEvicted = cs.getEvictedCount(); 1059 1060 // Let's write a StoreFile with three blocks, with cache on write off 1061 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false); 1062 CacheConfig cacheConf = new CacheConfig(conf, bc); 1063 Path pathCowOff = new Path(baseDir, "123456789"); 1064 StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3); 1065 StoreFileInfo storeFileInfo = 1066 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1067 HStoreFile hsfCowOff = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1068 LOG.debug(hsfCowOff.getPath().toString()); 1069 1070 // Read this file, we should see 3 misses 1071 hsfCowOff.initReader(); 1072 StoreFileReader reader = hsfCowOff.getReader(); 1073 reader.loadFileInfo(); 1074 StoreFileScanner scanner = getStoreFileScanner(reader, true, true); 1075 scanner.seek(KeyValue.LOWESTKEY); 1076 while (scanner.next() != null) { 1077 continue; 1078 } 1079 assertEquals(startHit, cs.getHitCount()); 1080 assertEquals(startMiss + 3, cs.getMissCount()); 1081 assertEquals(startEvicted, cs.getEvictedCount()); 1082 startMiss += 3; 1083 scanner.close(); 1084 reader.close(cacheConf.shouldEvictOnClose()); 1085 1086 // Now write a StoreFile with three blocks, with cache on write on 1087 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true); 1088 cacheConf = new CacheConfig(conf, bc); 1089 Path pathCowOn = new Path(baseDir, "123456788"); 1090 writer = writeStoreFile(conf, cacheConf, pathCowOn, 3); 1091 storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1092 HStoreFile hsfCowOn = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1093 1094 // Read this file, we should see 3 hits 1095 hsfCowOn.initReader(); 1096 reader = hsfCowOn.getReader(); 1097 scanner = getStoreFileScanner(reader, true, true); 1098 scanner.seek(KeyValue.LOWESTKEY); 1099 while (scanner.next() != null) { 1100 continue; 1101 } 1102 assertEquals(startHit + 3, cs.getHitCount()); 1103 assertEquals(startMiss, cs.getMissCount()); 1104 assertEquals(startEvicted, cs.getEvictedCount()); 1105 startHit += 3; 1106 scanner.close(); 1107 reader.close(cacheConf.shouldEvictOnClose()); 1108 1109 // Let's read back the two files to ensure the blocks exactly match 1110 hsfCowOff.initReader(); 1111 StoreFileReader readerOne = hsfCowOff.getReader(); 1112 readerOne.loadFileInfo(); 1113 StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true); 1114 scannerOne.seek(KeyValue.LOWESTKEY); 1115 hsfCowOn.initReader(); 1116 StoreFileReader readerTwo = hsfCowOn.getReader(); 1117 readerTwo.loadFileInfo(); 1118 StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true); 1119 scannerTwo.seek(KeyValue.LOWESTKEY); 1120 ExtendedCell kv1 = null; 1121 ExtendedCell kv2 = null; 1122 while ((kv1 = scannerOne.next()) != null) { 1123 kv2 = scannerTwo.next(); 1124 assertTrue(kv1.equals(kv2)); 1125 KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1); 1126 KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2); 1127 assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(), 1128 keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0); 1129 assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(), 1130 kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0); 1131 } 1132 assertNull(scannerTwo.next()); 1133 assertEquals(startHit + 6, cs.getHitCount()); 1134 assertEquals(startMiss, cs.getMissCount()); 1135 assertEquals(startEvicted, cs.getEvictedCount()); 1136 startHit += 6; 1137 scannerOne.close(); 1138 readerOne.close(cacheConf.shouldEvictOnClose()); 1139 scannerTwo.close(); 1140 readerTwo.close(cacheConf.shouldEvictOnClose()); 1141 1142 // Let's close the first file with evict on close turned on 1143 conf.setBoolean("hbase.rs.evictblocksonclose", true); 1144 cacheConf = new CacheConfig(conf, bc); 1145 hsfCowOff.initReader(); 1146 reader = hsfCowOff.getReader(); 1147 reader.close(cacheConf.shouldEvictOnClose()); 1148 1149 // We should have 3 new evictions but the evict count stat should not change. Eviction because 1150 // of HFile invalidation is not counted along with normal evictions 1151 assertEquals(startHit, cs.getHitCount()); 1152 assertEquals(startMiss, cs.getMissCount()); 1153 assertEquals(startEvicted, cs.getEvictedCount()); 1154 1155 // Let's close the second file with evict on close turned off 1156 conf.setBoolean("hbase.rs.evictblocksonclose", false); 1157 cacheConf = new CacheConfig(conf, bc); 1158 hsfCowOn.initReader(); 1159 reader = hsfCowOn.getReader(); 1160 reader.close(cacheConf.shouldEvictOnClose()); 1161 1162 // We expect no changes 1163 assertEquals(startHit, cs.getHitCount()); 1164 assertEquals(startMiss, cs.getMissCount()); 1165 assertEquals(startEvicted, cs.getEvictedCount()); 1166 } 1167 1168 private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri, 1169 final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef, 1170 StoreFileTracker sft) throws IOException { 1171 Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null, sft); 1172 if (null == path) { 1173 return null; 1174 } 1175 List<Path> splitFiles = new ArrayList<>(); 1176 splitFiles.add(path); 1177 MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class); 1178 MasterServices mockServices = mock(MasterServices.class); 1179 when(mockEnv.getMasterServices()).thenReturn(mockServices); 1180 when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration()); 1181 TableDescriptors mockTblDescs = mock(TableDescriptors.class); 1182 when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs); 1183 TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable()) 1184 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 1185 when(mockTblDescs.get(any())).thenReturn(mockTblDesc); 1186 Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv); 1187 return new Path(new Path(regionDir, family), path.getName()); 1188 } 1189 1190 private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path, 1191 int numBlocks) throws IOException { 1192 // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs 1193 int numKVs = 5 * numBlocks; 1194 List<KeyValue> kvs = new ArrayList<>(numKVs); 1195 byte[] b = Bytes.toBytes("x"); 1196 int totalSize = 0; 1197 for (int i = numKVs; i > 0; i--) { 1198 KeyValue kv = new KeyValue(b, b, b, i, b); 1199 kvs.add(kv); 1200 // kv has memstoreTS 0, which takes 1 byte to store. 1201 totalSize += kv.getLength() + 1; 1202 } 1203 int blockSize = totalSize / numBlocks; 1204 HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE) 1205 .withBytesPerCheckSum(CKBYTES).build(); 1206 // Make a store file and write data to it. 1207 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1208 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1209 // We'll write N-1 KVs to ensure we don't write an extra block 1210 kvs.remove(kvs.size() - 1); 1211 for (KeyValue kv : kvs) { 1212 writer.append(kv); 1213 } 1214 writer.appendMetadata(0, false); 1215 writer.close(); 1216 return writer; 1217 } 1218 1219 /** 1220 * Check if data block encoding information is saved correctly in HFile's file info. 1221 */ 1222 @Test 1223 public void testDataBlockEncodingMetaData() throws IOException { 1224 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1225 Path dir = new Path(new Path(testDir, "7e0102"), "familyname"); 1226 Path path = new Path(dir, "1234567890"); 1227 1228 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1229 cacheConf = new CacheConfig(conf); 1230 HFileContext meta = 1231 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1232 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1233 // Make a store file and write data to it. 1234 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1235 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1236 writer.close(); 1237 1238 StoreFileInfo storeFileInfo = 1239 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1240 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1241 storeFile.initReader(); 1242 StoreFileReader reader = storeFile.getReader(); 1243 1244 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1245 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1246 assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value); 1247 } 1248 1249 @Test 1250 public void testDataBlockSizeEncoded() throws Exception { 1251 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1252 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1253 Path path = new Path(dir, "1234567890"); 1254 1255 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1256 1257 conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); 1258 1259 cacheConf = new CacheConfig(conf); 1260 HFileContext meta = 1261 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1262 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1263 // Make a store file and write data to it. 1264 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1265 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1266 writeStoreFile(writer); 1267 1268 StoreFileInfo storeFileInfo = 1269 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1270 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1271 storeFile.initReader(); 1272 StoreFileReader reader = storeFile.getReader(); 1273 1274 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1275 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1276 assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); 1277 1278 HFile.Reader fReader = 1279 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1280 1281 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1282 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1283 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1284 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1285 HFileBlock block; 1286 while (offset <= max) { 1287 block = fReader.readBlock(offset, -1, /* cacheBlock */ 1288 false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */ 1289 false, null, null); 1290 offset += block.getOnDiskSizeWithHeader(); 1291 double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; 1292 if (offset <= max) { 1293 assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); 1294 } 1295 } 1296 } 1297 1298 @Test 1299 public void testDataBlockSizeCompressed() throws Exception { 1300 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, 1301 PreviousBlockCompressionRatePredicator.class.getName()); 1302 testDataBlockSizeWithCompressionRatePredicator(12, 1303 (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true); 1304 } 1305 1306 @Test 1307 public void testDataBlockSizeUnCompressed() throws Exception { 1308 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName()); 1309 testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10); 1310 } 1311 1312 private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount, 1313 BiFunction<Integer, Integer, Boolean> validation) throws Exception { 1314 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1315 Path path = new Path(dir, "1234567890"); 1316 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1317 cacheConf = new CacheConfig(conf); 1318 HFileContext meta = 1319 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1320 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo) 1321 .withCompression(Compression.Algorithm.GZ).build(); 1322 // Make a store file and write data to it. 1323 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1324 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1325 writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 1326 Bytes.toBytes(name.getMethodName()), 200); 1327 writer.close(); 1328 StoreFileInfo storeFileInfo = 1329 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1330 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1331 storeFile.initReader(); 1332 HFile.Reader fReader = 1333 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1334 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1335 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1336 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1337 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1338 HFileBlock block; 1339 int blockCount = 0; 1340 while (offset <= max) { 1341 block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 1342 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 1343 offset += block.getOnDiskSizeWithHeader(); 1344 blockCount++; 1345 assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount)); 1346 } 1347 assertEquals(expectedBlockCount, blockCount); 1348 } 1349 1350}