001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.BLOCK_COMPRESSED_SIZE_PREDICATOR; 021import static org.junit.Assert.assertArrayEquals; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertFalse; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNull; 026import static org.junit.Assert.assertTrue; 027import static org.junit.Assert.fail; 028import static org.mockito.ArgumentMatchers.any; 029import static org.mockito.Mockito.mock; 030import static org.mockito.Mockito.when; 031 032import java.io.IOException; 033import java.nio.ByteBuffer; 034import java.util.ArrayList; 035import java.util.Arrays; 036import java.util.Collections; 037import java.util.Comparator; 038import java.util.List; 039import java.util.Map; 040import java.util.OptionalLong; 041import java.util.TreeSet; 042import java.util.function.BiFunction; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.fs.FileSystem; 045import org.apache.hadoop.fs.Path; 046import org.apache.hadoop.hbase.Cell; 047import org.apache.hadoop.hbase.CellUtil; 048import org.apache.hadoop.hbase.ExtendedCell; 049import org.apache.hadoop.hbase.HBaseClassTestRule; 050import org.apache.hadoop.hbase.HBaseTestingUtil; 051import org.apache.hadoop.hbase.HConstants; 052import org.apache.hadoop.hbase.KeyValue; 053import org.apache.hadoop.hbase.KeyValueUtil; 054import org.apache.hadoop.hbase.PrivateCellUtil; 055import org.apache.hadoop.hbase.TableDescriptors; 056import org.apache.hadoop.hbase.TableName; 057import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 058import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 059import org.apache.hadoop.hbase.client.RegionInfo; 060import org.apache.hadoop.hbase.client.RegionInfoBuilder; 061import org.apache.hadoop.hbase.client.Scan; 062import org.apache.hadoop.hbase.client.TableDescriptor; 063import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 064import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 065import org.apache.hadoop.hbase.io.HFileLink; 066import org.apache.hadoop.hbase.io.compress.Compression; 067import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 068import org.apache.hadoop.hbase.io.hfile.BlockCache; 069import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; 070import org.apache.hadoop.hbase.io.hfile.CacheConfig; 071import org.apache.hadoop.hbase.io.hfile.CacheStats; 072import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer; 073import org.apache.hadoop.hbase.io.hfile.HFile; 074import org.apache.hadoop.hbase.io.hfile.HFileBlock; 075import org.apache.hadoop.hbase.io.hfile.HFileContext; 076import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 077import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 078import org.apache.hadoop.hbase.io.hfile.HFileScanner; 079import org.apache.hadoop.hbase.io.hfile.PreviousBlockCompressionRatePredicator; 080import org.apache.hadoop.hbase.io.hfile.ReaderContext; 081import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 082import org.apache.hadoop.hbase.io.hfile.UncompressedBlockSizePredicator; 083import org.apache.hadoop.hbase.master.MasterServices; 084import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 085import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 086import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 087import org.apache.hadoop.hbase.testclassification.MediumTests; 088import org.apache.hadoop.hbase.testclassification.RegionServerTests; 089import org.apache.hadoop.hbase.util.BloomFilterFactory; 090import org.apache.hadoop.hbase.util.Bytes; 091import org.apache.hadoop.hbase.util.ChecksumType; 092import org.apache.hadoop.hbase.util.CommonFSUtils; 093import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 094import org.junit.AfterClass; 095import org.junit.Before; 096import org.junit.ClassRule; 097import org.junit.Rule; 098import org.junit.Test; 099import org.junit.experimental.categories.Category; 100import org.junit.rules.TestName; 101import org.mockito.Mockito; 102import org.slf4j.Logger; 103import org.slf4j.LoggerFactory; 104 105import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 106import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 107import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 108 109/** 110 * Test HStoreFile 111 */ 112@Category({ RegionServerTests.class, MediumTests.class }) 113public class TestHStoreFile { 114 115 @ClassRule 116 public static final HBaseClassTestRule CLASS_RULE = 117 HBaseClassTestRule.forClass(TestHStoreFile.class); 118 119 private static final Logger LOG = LoggerFactory.getLogger(TestHStoreFile.class); 120 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 121 private CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); 122 private static Path ROOT_DIR = TEST_UTIL.getDataTestDir("TestStoreFile"); 123 private static final ChecksumType CKTYPE = ChecksumType.CRC32C; 124 private static final int CKBYTES = 512; 125 private static String TEST_FAMILY = "cf"; 126 private static final char FIRST_CHAR = 'a'; 127 private static final char LAST_CHAR = 'z'; 128 129 @Rule 130 public TestName name = new TestName(); 131 132 private Configuration conf; 133 private Path testDir; 134 private FileSystem fs; 135 136 @Before 137 public void setUp() throws IOException { 138 conf = TEST_UTIL.getConfiguration(); 139 testDir = TEST_UTIL.getDataTestDir(name.getMethodName()); 140 fs = testDir.getFileSystem(conf); 141 } 142 143 @AfterClass 144 public static void tearDownAfterClass() { 145 TEST_UTIL.cleanupTestDir(); 146 } 147 148 /** 149 * Write a file and then assert that we can read from top and bottom halves using two 150 * HalfMapFiles, as well as one HalfMapFile and one HFileLink file. 151 */ 152 @Test 153 public void testBasicHalfAndHFileLinkMapFile() throws Exception { 154 final RegionInfo hri = 155 RegionInfoBuilder.newBuilder(TableName.valueOf("testBasicHalfAndHFileLinkMapFile")).build(); 156 // The locations of HFileLink refers hfiles only should be consistent with the table dir 157 // create by CommonFSUtils directory, so we should make the region directory under 158 // the mode of CommonFSUtils.getTableDir here. 159 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 160 CommonFSUtils.getTableDir(CommonFSUtils.getRootDir(conf), hri.getTable()), hri); 161 162 HFileContext meta = new HFileContextBuilder().withBlockSize(2 * 1024).build(); 163 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 164 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 165 writeStoreFile(writer); 166 167 Path sfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 168 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 169 StoreContext.getBuilder() 170 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 171 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 172 .withRegionFileSystem(regionFs).build()); 173 HStoreFile sf = new HStoreFile(this.fs, sfPath, conf, cacheConf, BloomType.NONE, true, sft); 174 checkHalfHFile(regionFs, sf, sft); 175 } 176 177 private void writeStoreFile(final StoreFileWriter writer) throws IOException { 178 writeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 179 Bytes.toBytes(name.getMethodName())); 180 } 181 182 // pick an split point (roughly halfway) 183 byte[] SPLITKEY = new byte[] { (LAST_CHAR + FIRST_CHAR) / 2, FIRST_CHAR }; 184 185 /* 186 * Writes HStoreKey and ImmutableBytes data to passed writer and then closes it. 187 */ 188 public static void writeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier) 189 throws IOException { 190 long now = EnvironmentEdgeManager.currentTime(); 191 try { 192 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 193 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 194 byte[] b = new byte[] { (byte) d, (byte) e }; 195 writer.append(new KeyValue(b, fam, qualifier, now, b)); 196 } 197 } 198 } finally { 199 writer.close(); 200 } 201 } 202 203 public static void writeLargeStoreFile(final StoreFileWriter writer, byte[] fam, byte[] qualifier, 204 int rounds) throws IOException { 205 long now = EnvironmentEdgeManager.currentTime(); 206 try { 207 for (int i = 0; i < rounds; i++) { 208 for (char d = FIRST_CHAR; d <= LAST_CHAR; d++) { 209 for (char e = FIRST_CHAR; e <= LAST_CHAR; e++) { 210 byte[] b = new byte[] { (byte) d, (byte) e }; 211 byte[] key = new byte[] { (byte) i }; 212 writer.append(new KeyValue(key, fam, qualifier, now, b)); 213 } 214 } 215 } 216 } finally { 217 writer.close(); 218 } 219 } 220 221 /** 222 * Test that our mechanism of writing store files in one region to reference store files in other 223 * regions works. 224 */ 225 @Test 226 public void testReference() throws IOException { 227 final RegionInfo hri = 228 RegionInfoBuilder.newBuilder(TableName.valueOf("testReferenceTb")).build(); 229 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 230 new Path(testDir, hri.getTable().getNameAsString()), hri); 231 232 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 233 // Make a store file and write data to it. 234 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 235 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 236 writeStoreFile(writer); 237 238 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 239 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 240 StoreContext.getBuilder() 241 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 242 .withRegionFileSystem(regionFs).build()); 243 HStoreFile hsf = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 244 hsf.initReader(); 245 StoreFileReader reader = hsf.getReader(); 246 // Split on a row, not in middle of row. Midkey returned by reader 247 // may be in middle of row. Create new one with empty column and 248 // timestamp. 249 byte[] midRow = CellUtil.cloneRow(reader.midKey().get()); 250 byte[] finalRow = CellUtil.cloneRow(reader.getLastKey().get()); 251 hsf.closeStoreFile(true); 252 253 // Make a reference 254 RegionInfo splitHri = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(midRow).build(); 255 Path refPath = splitStoreFile(regionFs, splitHri, TEST_FAMILY, hsf, midRow, true, sft); 256 HStoreFile refHsf = 257 new HStoreFile(this.fs, refPath, conf, cacheConf, BloomType.NONE, true, sft); 258 refHsf.initReader(); 259 // Now confirm that I can read from the reference and that it only gets 260 // keys from top half of the file. 261 try (HFileScanner s = refHsf.getReader().getScanner(false, false, false)) { 262 Cell kv = null; 263 for (boolean first = true; (!s.isSeeked() && s.seekTo()) || s.next();) { 264 ByteBuffer bb = ByteBuffer.wrap(((KeyValue) s.getKey()).getKey()); 265 kv = KeyValueUtil.createKeyValueFromKey(bb); 266 if (first) { 267 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), midRow, 0, 268 midRow.length)); 269 first = false; 270 } 271 } 272 assertTrue(Bytes.equals(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength(), finalRow, 0, 273 finalRow.length)); 274 } 275 } 276 277 @Test 278 public void testStoreFileReference() throws Exception { 279 final RegionInfo hri = 280 RegionInfoBuilder.newBuilder(TableName.valueOf("testStoreFileReference")).build(); 281 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(conf, fs, 282 new Path(testDir, hri.getTable().getNameAsString()), hri); 283 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 284 285 // Make a store file and write data to it. 286 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 287 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 288 writeStoreFile(writer); 289 Path hsfPath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 290 writer.close(); 291 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, false, 292 StoreContext.getBuilder() 293 .withFamilyStoreDirectoryPath(new Path(regionFs.getRegionDir(), TEST_FAMILY)) 294 .withRegionFileSystem(regionFs).build()); 295 HStoreFile file = new HStoreFile(this.fs, hsfPath, conf, cacheConf, BloomType.NONE, true, sft); 296 file.initReader(); 297 StoreFileReader r = file.getReader(); 298 assertNotNull(r); 299 StoreFileScanner scanner = 300 new StoreFileScanner(r, mock(HFileScanner.class), false, false, 0, 0, false, false); 301 302 // Verify after instantiating scanner refCount is increased 303 assertTrue("Verify file is being referenced", file.isReferencedInReads()); 304 scanner.close(); 305 // Verify after closing scanner refCount is decreased 306 assertFalse("Verify file is not being referenced", file.isReferencedInReads()); 307 } 308 309 @Test 310 public void testEmptyStoreFileRestrictKeyRanges() throws Exception { 311 StoreFileReader reader = mock(StoreFileReader.class); 312 HStore store = mock(HStore.class); 313 byte[] cf = Bytes.toBytes("ty"); 314 ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.of(cf); 315 when(store.getColumnFamilyDescriptor()).thenReturn(cfd); 316 try (StoreFileScanner scanner = 317 new StoreFileScanner(reader, mock(HFileScanner.class), false, false, 0, 0, true, false)) { 318 Scan scan = new Scan(); 319 scan.setColumnFamilyTimeRange(cf, 0, 1); 320 assertFalse(scanner.shouldUseScanner(scan, store, 0)); 321 } 322 } 323 324 @Test 325 public void testHFileLink() throws IOException { 326 final RegionInfo hri = 327 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 328 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 329 Configuration testConf = new Configuration(this.conf); 330 CommonFSUtils.setRootDir(testConf, testDir); 331 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 332 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 333 final RegionInfo dstHri = 334 RegionInfoBuilder.newBuilder(TableName.valueOf("testHFileLinkTb")).build(); 335 HRegionFileSystem dstRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 336 CommonFSUtils.getTableDir(testDir, dstHri.getTable()), dstHri); 337 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 338 339 // Make a store file and write data to it. 340 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 341 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 342 writeStoreFile(writer); 343 344 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 345 Path dstPath = 346 new Path(regionFs.getTableDir(), new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)); 347 Path linkFilePath = 348 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 349 350 // Try to open store file from link 351 352 // this should be the SFT for the destination link file path, though it is not 353 // being used right now, for the next patch file link creation logic also would 354 // move to SFT interface. 355 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, false, 356 StoreContext.getBuilder() 357 .withFamilyStoreDirectoryPath(new Path(dstHri.getRegionNameAsString(), TEST_FAMILY)) 358 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 359 .withRegionFileSystem(dstRegionFs).build()); 360 sft.createHFileLink(hri.getTable(), hri.getEncodedName(), storeFilePath.getName(), true); 361 StoreFileInfo storeFileInfo = sft.getStoreFileInfo(linkFilePath, true); 362 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 363 assertTrue(storeFileInfo.isLink()); 364 hsf.initReader(); 365 366 // Now confirm that I can read from the link 367 int count = 0; 368 try (StoreFileScanner scanner = hsf.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 369 scanner.seek(KeyValue.LOWESTKEY); 370 while (scanner.next() != null) { 371 count++; 372 } 373 } 374 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 375 } 376 377 @Test 378 public void testsample() { 379 Path p1 = new Path("/r1/c1"); 380 Path p2 = new Path("f1"); 381 System.out.println(new Path(p1, p2).toString()); 382 } 383 384 /** 385 * This test creates an hfile and then the dir structures and files to verify that references to 386 * hfilelinks (created by snapshot clones) can be properly interpreted. 387 */ 388 @Test 389 public void testReferenceToHFileLink() throws IOException { 390 // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/ 391 Configuration testConf = new Configuration(this.conf); 392 CommonFSUtils.setRootDir(testConf, testDir); 393 394 // adding legal table name chars to verify regex handles it. 395 RegionInfo hri = RegionInfoBuilder.newBuilder(TableName.valueOf("_original-evil-name")).build(); 396 HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 397 CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); 398 399 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 400 // Make a store file and write data to it. <root>/<tablename>/<rgn>/<cf>/<file> 401 StoreFileWriter writer = new StoreFileWriter.Builder(testConf, cacheConf, this.fs) 402 .withFilePath(regionFs.createTempName()).withFileContext(meta).build(); 403 writeStoreFile(writer); 404 Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); 405 406 // create link to store file. <root>/clone/region/<cf>/<hfile>-<region>-<table> 407 RegionInfo hriClone = RegionInfoBuilder.newBuilder(TableName.valueOf("clone")).build(); 408 HRegionFileSystem cloneRegionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 409 CommonFSUtils.getTableDir(testDir, hri.getTable()), hriClone); 410 Path dstPath = cloneRegionFs.getStoreDir(TEST_FAMILY); 411 Path linkFilePath = 412 new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName())); 413 414 // create splits of the link. 415 // <root>/clone/splitA/<cf>/<reftohfilelink>, 416 // <root>/clone/splitB/<cf>/<reftohfilelink> 417 RegionInfo splitHriA = RegionInfoBuilder.newBuilder(hri.getTable()).setEndKey(SPLITKEY).build(); 418 RegionInfo splitHriB = 419 RegionInfoBuilder.newBuilder(hri.getTable()).setStartKey(SPLITKEY).build(); 420 421 StoreFileTracker sft = StoreFileTrackerFactory.create(testConf, true, 422 StoreContext.getBuilder() 423 .withFamilyStoreDirectoryPath(new Path(hriClone.getRegionNameAsString(), TEST_FAMILY)) 424 .withColumnFamilyDescriptor(ColumnFamilyDescriptorBuilder.of(TEST_FAMILY)) 425 .withRegionFileSystem(cloneRegionFs).build()); 426 sft.createHFileLink(hri.getTable(), hri.getEncodedName(), storeFilePath.getName(), true); 427 428 HRegionFileSystem splitRegionAFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 429 CommonFSUtils.getTableDir(testDir, splitHriA.getTable()), splitHriA); 430 StoreFileTracker sftA = StoreFileTrackerFactory.create(testConf, true, 431 StoreContext.getBuilder() 432 .withFamilyStoreDirectoryPath(new Path(splitHriA.getRegionNameAsString(), TEST_FAMILY)) 433 .withRegionFileSystem(splitRegionAFs).build()); 434 HRegionFileSystem splitRegionBFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, 435 CommonFSUtils.getTableDir(testDir, splitHriB.getTable()), splitHriB); 436 StoreFileTracker sftB = StoreFileTrackerFactory.create(testConf, true, 437 StoreContext.getBuilder() 438 .withFamilyStoreDirectoryPath(new Path(splitHriB.getRegionNameAsString(), TEST_FAMILY)) 439 .withRegionFileSystem(splitRegionBFs).build()); 440 HStoreFile f = new HStoreFile(fs, linkFilePath, testConf, cacheConf, BloomType.NONE, true, sft); 441 f.initReader(); 442 // top 443 Path pathA = splitStoreFile(cloneRegionFs, splitHriA, TEST_FAMILY, f, SPLITKEY, true, sft); 444 // bottom 445 Path pathB = splitStoreFile(cloneRegionFs, splitHriB, TEST_FAMILY, f, SPLITKEY, false, sft); 446 f.closeStoreFile(true); 447 // OK test the thing 448 CommonFSUtils.logFileSystemState(fs, testDir, LOG); 449 450 // There is a case where a file with the hfilelink pattern is actually a daughter 451 // reference to a hfile link. This code in StoreFile that handles this case. 452 453 // Try to open store file from link 454 HStoreFile hsfA = 455 new HStoreFile(this.fs, pathA, testConf, cacheConf, BloomType.NONE, true, sftA); 456 hsfA.initReader(); 457 458 // Now confirm that I can read from the ref to link 459 int count = 0; 460 try (StoreFileScanner scanner = hsfA.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 461 scanner.seek(KeyValue.LOWESTKEY); 462 while (scanner.next() != null) { 463 count++; 464 } 465 assertTrue(count > 0); // read some rows here 466 } 467 468 // Try to open store file from link 469 HStoreFile hsfB = 470 new HStoreFile(this.fs, pathB, testConf, cacheConf, BloomType.NONE, true, sftB); 471 hsfB.initReader(); 472 473 // Now confirm that I can read from the ref to link 474 try (StoreFileScanner scanner = hsfB.getPreadScanner(false, Long.MAX_VALUE, 0, false)) { 475 scanner.seek(KeyValue.LOWESTKEY); 476 while (scanner.next() != null) { 477 count++; 478 } 479 } 480 481 // read the rest of the rows 482 assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count); 483 } 484 485 private void checkHalfHFile(final HRegionFileSystem regionFs, final HStoreFile f, 486 StoreFileTracker sft) throws IOException { 487 f.initReader(); 488 Cell midkey = f.getReader().midKey().get(); 489 KeyValue midKV = (KeyValue) midkey; 490 // 1. test using the midRow as the splitKey, this test will generate two Reference files 491 // in the children 492 byte[] midRow = CellUtil.cloneRow(midKV); 493 // Create top split. 494 RegionInfo topHri = 495 RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()).setEndKey(SPLITKEY).build(); 496 Path topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, midRow, true, sft); 497 // Create bottom split. 498 RegionInfo bottomHri = RegionInfoBuilder.newBuilder(regionFs.getRegionInfo().getTable()) 499 .setStartKey(SPLITKEY).build(); 500 Path bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, midRow, false, sft); 501 // Make readers on top and bottom. 502 HStoreFile topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 503 topF.initReader(); 504 StoreFileReader top = topF.getReader(); 505 HStoreFile bottomF = 506 new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 507 bottomF.initReader(); 508 StoreFileReader bottom = bottomF.getReader(); 509 ByteBuffer previous = null; 510 LOG.info("Midkey: " + midKV.toString()); 511 ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey()); 512 try { 513 // Now make two HalfMapFiles and assert they can read the full backing 514 // file, one from the top and the other from the bottom. 515 // Test bottom half first. 516 // Now test reading from the top. 517 boolean first = true; 518 ByteBuffer key = null; 519 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 520 while ( 521 (!topScanner.isSeeked() && topScanner.seekTo()) 522 || (topScanner.isSeeked() && topScanner.next()) 523 ) { 524 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 525 526 if ( 527 (PrivateCellUtil.compare(topScanner.getReader().getComparator(), midKV, key.array(), 528 key.arrayOffset(), key.limit())) > 0 529 ) { 530 fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + midkey); 531 } 532 if (first) { 533 first = false; 534 LOG.info("First in top: " + Bytes.toString(Bytes.toBytes(key))); 535 } 536 } 537 } 538 LOG.info("Last in top: " + Bytes.toString(Bytes.toBytes(key))); 539 540 first = true; 541 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 542 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 543 previous = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 544 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 545 if (first) { 546 first = false; 547 LOG.info("First in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 548 } 549 assertTrue(key.compareTo(bbMidkeyBytes) < 0); 550 } 551 if (previous != null) { 552 LOG.info("Last in bottom: " + Bytes.toString(Bytes.toBytes(previous))); 553 } 554 } 555 // Remove references. 556 regionFs.cleanupDaughterRegion(topHri); 557 regionFs.cleanupDaughterRegion(bottomHri); 558 559 // 2. test using a midkey which will generate one Reference file and one HFileLink file. 560 // First, do a key that is < than first key. Ensure splits behave 561 // properly. 562 byte[] badmidkey = Bytes.toBytes(" ."); 563 assertTrue(fs.exists(f.getPath())); 564 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 565 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 566 567 assertNull(bottomPath); 568 569 topF = new HStoreFile(this.fs, topPath, conf, cacheConf, BloomType.NONE, true, sft); 570 topF.initReader(); 571 top = topF.getReader(); 572 // Now read from the top. 573 first = true; 574 try (HFileScanner topScanner = top.getScanner(false, false, false)) { 575 KeyValue.KeyOnlyKeyValue keyOnlyKV = new KeyValue.KeyOnlyKeyValue(); 576 while ((!topScanner.isSeeked() && topScanner.seekTo()) || topScanner.next()) { 577 key = ByteBuffer.wrap(((KeyValue) topScanner.getKey()).getKey()); 578 keyOnlyKV.setKey(key.array(), 0 + key.arrayOffset(), key.limit()); 579 assertTrue(PrivateCellUtil.compare(topScanner.getReader().getComparator(), keyOnlyKV, 580 badmidkey, 0, badmidkey.length) >= 0); 581 if (first) { 582 first = false; 583 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 584 LOG.info("First top when key < bottom: " + keyKV); 585 String tmp = 586 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 587 for (int i = 0; i < tmp.length(); i++) { 588 assertTrue(tmp.charAt(i) == 'a'); 589 } 590 } 591 } 592 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 593 LOG.info("Last top when key < bottom: " + keyKV); 594 String tmp = 595 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 596 for (int i = 0; i < tmp.length(); i++) { 597 assertTrue(tmp.charAt(i) == 'z'); 598 } 599 } 600 // Remove references. 601 regionFs.cleanupDaughterRegion(topHri); 602 regionFs.cleanupDaughterRegion(bottomHri); 603 604 // Test when badkey is > than last key in file ('||' > 'zz'). 605 badmidkey = Bytes.toBytes("|||"); 606 topPath = splitStoreFile(regionFs, topHri, TEST_FAMILY, f, badmidkey, true, sft); 607 bottomPath = splitStoreFile(regionFs, bottomHri, TEST_FAMILY, f, badmidkey, false, sft); 608 assertNull(topPath); 609 610 bottomF = new HStoreFile(this.fs, bottomPath, conf, cacheConf, BloomType.NONE, true, sft); 611 bottomF.initReader(); 612 bottom = bottomF.getReader(); 613 first = true; 614 try (HFileScanner bottomScanner = bottom.getScanner(false, false, false)) { 615 while ((!bottomScanner.isSeeked() && bottomScanner.seekTo()) || bottomScanner.next()) { 616 key = ByteBuffer.wrap(((KeyValue) bottomScanner.getKey()).getKey()); 617 if (first) { 618 first = false; 619 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 620 LOG.info("First bottom when key > top: " + keyKV); 621 String tmp = 622 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 623 for (int i = 0; i < tmp.length(); i++) { 624 assertTrue(tmp.charAt(i) == 'a'); 625 } 626 } 627 } 628 KeyValue keyKV = KeyValueUtil.createKeyValueFromKey(key); 629 LOG.info("Last bottom when key > top: " + keyKV); 630 String tmp = 631 Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()); 632 for (int i = 0; i < tmp.length(); i++) { 633 assertTrue(Bytes.toString(keyKV.getRowArray(), keyKV.getRowOffset(), keyKV.getRowLength()) 634 .charAt(i) == 'z'); 635 } 636 } 637 } finally { 638 if (top != null) { 639 top.close(true); // evict since we are about to delete the file 640 } 641 if (bottom != null) { 642 bottom.close(true); // evict since we are about to delete the file 643 } 644 fs.delete(f.getPath(), true); 645 } 646 } 647 648 private static StoreFileScanner getStoreFileScanner(StoreFileReader reader, boolean cacheBlocks, 649 boolean pread) { 650 return reader.getStoreFileScanner(cacheBlocks, pread, false, 0, 0, false); 651 } 652 653 private static final String localFormatter = "%010d"; 654 655 private void bloomWriteRead(StoreFileWriter writer, FileSystem fs) throws Exception { 656 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 657 Path f = writer.getPath(); 658 long now = EnvironmentEdgeManager.currentTime(); 659 for (int i = 0; i < 2000; i += 2) { 660 String row = String.format(localFormatter, i); 661 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 662 now, Bytes.toBytes("value")); 663 writer.append(kv); 664 } 665 writer.close(); 666 667 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 668 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 669 storeFileInfo.initHFileInfo(context); 670 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 671 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 672 reader.loadFileInfo(); 673 reader.loadBloomfilter(); 674 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 675 676 // check false positives rate 677 int falsePos = 0; 678 int falseNeg = 0; 679 for (int i = 0; i < 2000; i++) { 680 String row = String.format(localFormatter, i); 681 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 682 columns.add(Bytes.toBytes("family:col")); 683 684 Scan scan = new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 685 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes("family:col")); 686 HStore store = mock(HStore.class); 687 when(store.getColumnFamilyDescriptor()) 688 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 689 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 690 if (i % 2 == 0) { 691 if (!exists) { 692 falseNeg++; 693 } 694 } else { 695 if (exists) { 696 falsePos++; 697 } 698 } 699 } 700 reader.close(true); // evict because we are about to delete the file 701 fs.delete(f, true); 702 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 703 int maxFalsePos = (int) (2 * 2000 * err); 704 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 705 + maxFalsePos + ")", falsePos <= maxFalsePos); 706 } 707 708 private static final int BLOCKSIZE_SMALL = 8192; 709 710 @Test 711 public void testBloomFilter() throws Exception { 712 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 713 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 714 715 // write the file 716 if (!fs.exists(ROOT_DIR)) { 717 fs.mkdirs(ROOT_DIR); 718 } 719 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 720 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 721 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 722 // Make a store file and write data to it. 723 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 724 .withBloomType(BloomType.ROW).withMaxKeyCount(2000).withFileContext(meta).build(); 725 bloomWriteRead(writer, fs); 726 } 727 728 @Test 729 public void testDeleteFamilyBloomFilter() throws Exception { 730 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, (float) 0.01); 731 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 732 float err = conf.getFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, 0); 733 734 // write the file 735 if (!fs.exists(ROOT_DIR)) { 736 fs.mkdirs(ROOT_DIR); 737 } 738 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 739 740 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 741 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 742 // Make a store file and write data to it. 743 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 744 .withMaxKeyCount(2000).withFileContext(meta).build(); 745 746 // add delete family 747 long now = EnvironmentEdgeManager.currentTime(); 748 for (int i = 0; i < 2000; i += 2) { 749 String row = String.format(localFormatter, i); 750 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("col"), 751 now, KeyValue.Type.DeleteFamily, Bytes.toBytes("value")); 752 writer.append(kv); 753 } 754 writer.close(); 755 756 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 757 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 758 storeFileInfo.initHFileInfo(context); 759 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 760 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 761 reader.loadFileInfo(); 762 reader.loadBloomfilter(); 763 764 // check false positives rate 765 int falsePos = 0; 766 int falseNeg = 0; 767 for (int i = 0; i < 2000; i++) { 768 String row = String.format(localFormatter, i); 769 byte[] rowKey = Bytes.toBytes(row); 770 boolean exists = reader.passesDeleteFamilyBloomFilter(rowKey, 0, rowKey.length); 771 if (i % 2 == 0) { 772 if (!exists) { 773 falseNeg++; 774 } 775 } else { 776 if (exists) { 777 falsePos++; 778 } 779 } 780 } 781 assertEquals(1000, reader.getDeleteFamilyCnt()); 782 reader.close(true); // evict because we are about to delete the file 783 fs.delete(f, true); 784 assertEquals("False negatives: " + falseNeg, 0, falseNeg); 785 int maxFalsePos = (int) (2 * 2000 * err); 786 assertTrue("Too many false positives: " + falsePos + " (err=" + err + ", expected no more than " 787 + maxFalsePos, falsePos <= maxFalsePos); 788 } 789 790 /** 791 * Test for HBASE-8012 792 */ 793 @Test 794 public void testReseek() throws Exception { 795 // write the file 796 if (!fs.exists(ROOT_DIR)) { 797 fs.mkdirs(ROOT_DIR); 798 } 799 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 800 801 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 802 // Make a store file and write data to it. 803 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 804 .withFileContext(meta).build(); 805 806 writeStoreFile(writer); 807 writer.close(); 808 809 ReaderContext context = new ReaderContextBuilder().withFileSystemAndPath(fs, f).build(); 810 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 811 storeFileInfo.initHFileInfo(context); 812 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 813 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 814 815 // Now do reseek with empty KV to position to the beginning of the file 816 817 KeyValue k = KeyValueUtil.createFirstOnRow(HConstants.EMPTY_BYTE_ARRAY); 818 StoreFileScanner s = getStoreFileScanner(reader, false, false); 819 s.reseek(k); 820 821 assertNotNull("Intial reseek should position at the beginning of the file", s.peek()); 822 } 823 824 @Test 825 public void testBloomTypes() throws Exception { 826 float err = (float) 0.01; 827 FileSystem fs = FileSystem.getLocal(conf); 828 conf.setFloat(BloomFilterFactory.IO_STOREFILE_BLOOM_ERROR_RATE, err); 829 conf.setBoolean(BloomFilterFactory.IO_STOREFILE_BLOOM_ENABLED, true); 830 831 int rowCount = 50; 832 int colCount = 10; 833 int versions = 2; 834 835 // run once using columns and once using rows 836 BloomType[] bt = { BloomType.ROWCOL, BloomType.ROW }; 837 int[] expKeys = { rowCount * colCount, rowCount }; 838 // below line deserves commentary. it is expected bloom false positives 839 // column = rowCount*2*colCount inserts 840 // row-level = only rowCount*2 inserts, but failures will be magnified by 841 // 2nd for loop for every column (2*colCount) 842 float[] expErr = { 2 * rowCount * colCount * err, 2 * rowCount * 2 * colCount * err }; 843 844 if (!fs.exists(ROOT_DIR)) { 845 fs.mkdirs(ROOT_DIR); 846 } 847 for (int x : new int[] { 0, 1 }) { 848 // write the file 849 Path f = StoreFileWriter.getUniqueFile(fs, ROOT_DIR); 850 851 HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL) 852 .withChecksumType(CKTYPE).withBytesPerCheckSum(CKBYTES).build(); 853 // Make a store file and write data to it. 854 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(f) 855 .withBloomType(bt[x]).withMaxKeyCount(expKeys[x]).withFileContext(meta).build(); 856 857 long now = EnvironmentEdgeManager.currentTime(); 858 for (int i = 0; i < rowCount * 2; i += 2) { // rows 859 for (int j = 0; j < colCount * 2; j += 2) { // column qualifiers 860 String row = String.format(localFormatter, i); 861 String col = String.format(localFormatter, j); 862 for (int k = 0; k < versions; ++k) { // versions 863 KeyValue kv = new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), 864 Bytes.toBytes("col" + col), now - k, Bytes.toBytes(-1L)); 865 writer.append(kv); 866 } 867 } 868 } 869 writer.close(); 870 871 ReaderContext context = 872 new ReaderContextBuilder().withFilePath(f).withFileSize(fs.getFileStatus(f).getLen()) 873 .withFileSystem(fs).withInputStreamWrapper(new FSDataInputStreamWrapper(fs, f)).build(); 874 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, f, true); 875 storeFileInfo.initHFileInfo(context); 876 StoreFileReader reader = storeFileInfo.createReader(context, cacheConf); 877 storeFileInfo.getHFileInfo().initMetaAndIndex(reader.getHFileReader()); 878 reader.loadFileInfo(); 879 reader.loadBloomfilter(); 880 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 881 assertEquals(expKeys[x], reader.getGeneralBloomFilter().getKeyCount()); 882 883 HStore store = mock(HStore.class); 884 when(store.getColumnFamilyDescriptor()) 885 .thenReturn(ColumnFamilyDescriptorBuilder.of("family")); 886 // check false positives rate 887 int falsePos = 0; 888 int falseNeg = 0; 889 for (int i = 0; i < rowCount * 2; ++i) { // rows 890 for (int j = 0; j < colCount * 2; ++j) { // column qualifiers 891 String row = String.format(localFormatter, i); 892 String col = String.format(localFormatter, j); 893 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 894 columns.add(Bytes.toBytes("col" + col)); 895 896 Scan scan = 897 new Scan().withStartRow(Bytes.toBytes(row)).withStopRow(Bytes.toBytes(row), true); 898 scan.addColumn(Bytes.toBytes("family"), Bytes.toBytes(("col" + col))); 899 900 boolean exists = scanner.shouldUseScanner(scan, store, Long.MIN_VALUE); 901 boolean shouldRowExist = i % 2 == 0; 902 boolean shouldColExist = j % 2 == 0; 903 shouldColExist = shouldColExist || bt[x] == BloomType.ROW; 904 if (shouldRowExist && shouldColExist) { 905 if (!exists) { 906 falseNeg++; 907 } 908 } else { 909 if (exists) { 910 falsePos++; 911 } 912 } 913 } 914 } 915 reader.close(true); // evict because we are about to delete the file 916 fs.delete(f, true); 917 System.out.println(bt[x].toString()); 918 System.out.println(" False negatives: " + falseNeg); 919 System.out.println(" False positives: " + falsePos); 920 assertEquals(0, falseNeg); 921 assertTrue(falsePos < 2 * expErr[x]); 922 } 923 } 924 925 @Test 926 public void testSeqIdComparator() { 927 assertOrdering(StoreFileComparators.SEQ_ID, mockStoreFile(true, 100, 1000, -1, "/foo/123"), 928 mockStoreFile(true, 100, 1000, -1, "/foo/124"), mockStoreFile(true, 99, 1000, -1, "/foo/126"), 929 mockStoreFile(true, 98, 2000, -1, "/foo/126"), mockStoreFile(false, 3453, -1, 1, "/foo/1"), 930 mockStoreFile(false, 2, -1, 3, "/foo/2"), mockStoreFile(false, 1000, -1, 5, "/foo/2"), 931 mockStoreFile(false, 76, -1, 5, "/foo/3")); 932 } 933 934 /** 935 * Assert that the given comparator orders the given storefiles in the same way that they're 936 * passed. 937 */ 938 private void assertOrdering(Comparator<? super HStoreFile> comparator, HStoreFile... sfs) { 939 ArrayList<HStoreFile> sorted = Lists.newArrayList(sfs); 940 Collections.shuffle(sorted); 941 Collections.sort(sorted, comparator); 942 LOG.debug("sfs: " + Joiner.on(",").join(sfs)); 943 LOG.debug("sorted: " + Joiner.on(",").join(sorted)); 944 assertTrue(Iterables.elementsEqual(Arrays.asList(sfs), sorted)); 945 } 946 947 /** 948 * Create a mock StoreFile with the given attributes. 949 */ 950 private HStoreFile mockStoreFile(boolean bulkLoad, long size, long bulkTimestamp, long seqId, 951 String path) { 952 HStoreFile mock = Mockito.mock(HStoreFile.class); 953 StoreFileReader reader = Mockito.mock(StoreFileReader.class); 954 955 Mockito.doReturn(size).when(reader).length(); 956 957 Mockito.doReturn(reader).when(mock).getReader(); 958 Mockito.doReturn(bulkLoad).when(mock).isBulkLoadResult(); 959 Mockito.doReturn(OptionalLong.of(bulkTimestamp)).when(mock).getBulkLoadTimestamp(); 960 Mockito.doReturn(seqId).when(mock).getMaxSequenceId(); 961 Mockito.doReturn(new Path(path)).when(mock).getPath(); 962 String name = "mock storefile, bulkLoad=" + bulkLoad + " bulkTimestamp=" + bulkTimestamp 963 + " seqId=" + seqId + " path=" + path; 964 Mockito.doReturn(name).when(mock).toString(); 965 return mock; 966 } 967 968 /** 969 * Generate a list of KeyValues for testing based on given parameters 970 * @return the rows key-value list 971 */ 972 List<KeyValue> getKeyValueSet(long[] timestamps, int numRows, byte[] qualifier, byte[] family) { 973 List<KeyValue> kvList = new ArrayList<>(); 974 for (int i = 1; i <= numRows; i++) { 975 byte[] b = Bytes.toBytes(i); 976 LOG.info(Bytes.toString(b)); 977 LOG.info(Bytes.toString(b)); 978 for (long timestamp : timestamps) { 979 kvList.add(new KeyValue(b, family, qualifier, timestamp, b)); 980 } 981 } 982 return kvList; 983 } 984 985 /** 986 * Test to ensure correctness when using StoreFile with multiple timestamps 987 */ 988 @Test 989 public void testMultipleTimestamps() throws IOException { 990 byte[] family = Bytes.toBytes("familyname"); 991 byte[] qualifier = Bytes.toBytes("qualifier"); 992 int numRows = 10; 993 long[] timestamps = new long[] { 20, 10, 5, 1 }; 994 Scan scan = new Scan(); 995 996 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 997 Path storedir = new Path(new Path(testDir, "7e0102"), Bytes.toString(family)); 998 Path dir = new Path(storedir, "1234567890"); 999 HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build(); 1000 // Make a store file and write data to it. 1001 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1002 .withOutputDir(dir).withFileContext(meta).build(); 1003 1004 List<KeyValue> kvList = getKeyValueSet(timestamps, numRows, qualifier, family); 1005 1006 for (KeyValue kv : kvList) { 1007 writer.append(kv); 1008 } 1009 writer.appendMetadata(0, false); 1010 writer.close(); 1011 1012 StoreFileInfo storeFileInfo = 1013 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1014 HStoreFile hsf = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1015 HStore store = mock(HStore.class); 1016 when(store.getColumnFamilyDescriptor()).thenReturn(ColumnFamilyDescriptorBuilder.of(family)); 1017 hsf.initReader(); 1018 StoreFileReader reader = hsf.getReader(); 1019 StoreFileScanner scanner = getStoreFileScanner(reader, false, false); 1020 TreeSet<byte[]> columns = new TreeSet<>(Bytes.BYTES_COMPARATOR); 1021 columns.add(qualifier); 1022 1023 scan.setTimeRange(20, 100); 1024 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1025 1026 scan.setTimeRange(1, 2); 1027 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1028 1029 scan.setTimeRange(8, 10); 1030 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1031 1032 // lets make sure it still works with column family time ranges 1033 scan.setColumnFamilyTimeRange(family, 7, 50); 1034 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1035 1036 // This test relies on the timestamp range optimization 1037 scan = new Scan(); 1038 scan.setTimeRange(27, 50); 1039 assertTrue(!scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1040 1041 // should still use the scanner because we override the family time range 1042 scan = new Scan(); 1043 scan.setTimeRange(27, 50); 1044 scan.setColumnFamilyTimeRange(family, 7, 50); 1045 assertTrue(scanner.shouldUseScanner(scan, store, Long.MIN_VALUE)); 1046 } 1047 1048 @Test 1049 public void testCacheOnWriteEvictOnClose() throws Exception { 1050 Configuration conf = this.conf; 1051 1052 // Find a home for our files (regiondir ("7e0102") and familyname). 1053 Path baseDir = new Path(new Path(testDir, "7e0102"), "twoCOWEOC"); 1054 1055 // Grab the block cache and get the initial hit/miss counts 1056 BlockCache bc = BlockCacheFactory.createBlockCache(conf); 1057 assertNotNull(bc); 1058 CacheStats cs = bc.getStats(); 1059 long startHit = cs.getHitCount(); 1060 long startMiss = cs.getMissCount(); 1061 long startEvicted = cs.getEvictedCount(); 1062 1063 // Let's write a StoreFile with three blocks, with cache on write off 1064 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, false); 1065 CacheConfig cacheConf = new CacheConfig(conf, bc); 1066 Path pathCowOff = new Path(baseDir, "123456789"); 1067 StoreFileWriter writer = writeStoreFile(conf, cacheConf, pathCowOff, 3); 1068 StoreFileInfo storeFileInfo = 1069 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1070 HStoreFile hsfCowOff = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1071 LOG.debug(hsfCowOff.getPath().toString()); 1072 1073 // Read this file, we should see 3 misses 1074 hsfCowOff.initReader(); 1075 StoreFileReader reader = hsfCowOff.getReader(); 1076 reader.loadFileInfo(); 1077 StoreFileScanner scanner = getStoreFileScanner(reader, true, true); 1078 scanner.seek(KeyValue.LOWESTKEY); 1079 while (scanner.next() != null) { 1080 continue; 1081 } 1082 assertEquals(startHit, cs.getHitCount()); 1083 assertEquals(startMiss + 3, cs.getMissCount()); 1084 assertEquals(startEvicted, cs.getEvictedCount()); 1085 startMiss += 3; 1086 scanner.close(); 1087 reader.close(cacheConf.shouldEvictOnClose()); 1088 1089 // Now write a StoreFile with three blocks, with cache on write on 1090 conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true); 1091 cacheConf = new CacheConfig(conf, bc); 1092 Path pathCowOn = new Path(baseDir, "123456788"); 1093 writer = writeStoreFile(conf, cacheConf, pathCowOn, 3); 1094 storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1095 HStoreFile hsfCowOn = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1096 1097 // Read this file, we should see 3 hits 1098 hsfCowOn.initReader(); 1099 reader = hsfCowOn.getReader(); 1100 scanner = getStoreFileScanner(reader, true, true); 1101 scanner.seek(KeyValue.LOWESTKEY); 1102 while (scanner.next() != null) { 1103 continue; 1104 } 1105 assertEquals(startHit + 3, cs.getHitCount()); 1106 assertEquals(startMiss, cs.getMissCount()); 1107 assertEquals(startEvicted, cs.getEvictedCount()); 1108 startHit += 3; 1109 scanner.close(); 1110 reader.close(cacheConf.shouldEvictOnClose()); 1111 1112 // Let's read back the two files to ensure the blocks exactly match 1113 hsfCowOff.initReader(); 1114 StoreFileReader readerOne = hsfCowOff.getReader(); 1115 readerOne.loadFileInfo(); 1116 StoreFileScanner scannerOne = getStoreFileScanner(readerOne, true, true); 1117 scannerOne.seek(KeyValue.LOWESTKEY); 1118 hsfCowOn.initReader(); 1119 StoreFileReader readerTwo = hsfCowOn.getReader(); 1120 readerTwo.loadFileInfo(); 1121 StoreFileScanner scannerTwo = getStoreFileScanner(readerTwo, true, true); 1122 scannerTwo.seek(KeyValue.LOWESTKEY); 1123 ExtendedCell kv1 = null; 1124 ExtendedCell kv2 = null; 1125 while ((kv1 = scannerOne.next()) != null) { 1126 kv2 = scannerTwo.next(); 1127 assertTrue(kv1.equals(kv2)); 1128 KeyValue keyv1 = KeyValueUtil.ensureKeyValue(kv1); 1129 KeyValue keyv2 = KeyValueUtil.ensureKeyValue(kv2); 1130 assertTrue(Bytes.compareTo(keyv1.getBuffer(), keyv1.getKeyOffset(), keyv1.getKeyLength(), 1131 keyv2.getBuffer(), keyv2.getKeyOffset(), keyv2.getKeyLength()) == 0); 1132 assertTrue(Bytes.compareTo(kv1.getValueArray(), kv1.getValueOffset(), kv1.getValueLength(), 1133 kv2.getValueArray(), kv2.getValueOffset(), kv2.getValueLength()) == 0); 1134 } 1135 assertNull(scannerTwo.next()); 1136 assertEquals(startHit + 6, cs.getHitCount()); 1137 assertEquals(startMiss, cs.getMissCount()); 1138 assertEquals(startEvicted, cs.getEvictedCount()); 1139 startHit += 6; 1140 scannerOne.close(); 1141 readerOne.close(cacheConf.shouldEvictOnClose()); 1142 scannerTwo.close(); 1143 readerTwo.close(cacheConf.shouldEvictOnClose()); 1144 1145 // Let's close the first file with evict on close turned on 1146 conf.setBoolean("hbase.rs.evictblocksonclose", true); 1147 cacheConf = new CacheConfig(conf, bc); 1148 hsfCowOff.initReader(); 1149 reader = hsfCowOff.getReader(); 1150 reader.close(cacheConf.shouldEvictOnClose()); 1151 1152 // We should have 3 new evictions but the evict count stat should not change. Eviction because 1153 // of HFile invalidation is not counted along with normal evictions 1154 assertEquals(startHit, cs.getHitCount()); 1155 assertEquals(startMiss, cs.getMissCount()); 1156 assertEquals(startEvicted, cs.getEvictedCount()); 1157 1158 // Let's close the second file with evict on close turned off 1159 conf.setBoolean("hbase.rs.evictblocksonclose", false); 1160 cacheConf = new CacheConfig(conf, bc); 1161 hsfCowOn.initReader(); 1162 reader = hsfCowOn.getReader(); 1163 reader.close(cacheConf.shouldEvictOnClose()); 1164 1165 // We expect no changes 1166 assertEquals(startHit, cs.getHitCount()); 1167 assertEquals(startMiss, cs.getMissCount()); 1168 assertEquals(startEvicted, cs.getEvictedCount()); 1169 } 1170 1171 private Path splitStoreFile(final HRegionFileSystem regionFs, final RegionInfo hri, 1172 final String family, final HStoreFile sf, final byte[] splitKey, boolean isTopRef, 1173 StoreFileTracker sft) throws IOException { 1174 Path path = regionFs.splitStoreFile(hri, family, sf, splitKey, isTopRef, null, sft); 1175 if (null == path) { 1176 return null; 1177 } 1178 List<Path> splitFiles = new ArrayList<>(); 1179 splitFiles.add(path); 1180 MasterProcedureEnv mockEnv = mock(MasterProcedureEnv.class); 1181 MasterServices mockServices = mock(MasterServices.class); 1182 when(mockEnv.getMasterServices()).thenReturn(mockServices); 1183 when(mockEnv.getMasterConfiguration()).thenReturn(new Configuration()); 1184 TableDescriptors mockTblDescs = mock(TableDescriptors.class); 1185 when(mockServices.getTableDescriptors()).thenReturn(mockTblDescs); 1186 TableDescriptor mockTblDesc = TableDescriptorBuilder.newBuilder(hri.getTable()) 1187 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(family)).build(); 1188 when(mockTblDescs.get(any())).thenReturn(mockTblDesc); 1189 Path regionDir = regionFs.commitDaughterRegion(hri, splitFiles, mockEnv); 1190 return new Path(new Path(regionDir, family), path.getName()); 1191 } 1192 1193 private StoreFileWriter writeStoreFile(Configuration conf, CacheConfig cacheConf, Path path, 1194 int numBlocks) throws IOException { 1195 // Let's put ~5 small KVs in each block, so let's make 5*numBlocks KVs 1196 int numKVs = 5 * numBlocks; 1197 List<KeyValue> kvs = new ArrayList<>(numKVs); 1198 byte[] b = Bytes.toBytes("x"); 1199 int totalSize = 0; 1200 for (int i = numKVs; i > 0; i--) { 1201 KeyValue kv = new KeyValue(b, b, b, i, b); 1202 kvs.add(kv); 1203 // kv has memstoreTS 0, which takes 1 byte to store. 1204 totalSize += kv.getLength() + 1; 1205 } 1206 int blockSize = totalSize / numBlocks; 1207 HFileContext meta = new HFileContextBuilder().withBlockSize(blockSize).withChecksumType(CKTYPE) 1208 .withBytesPerCheckSum(CKBYTES).build(); 1209 // Make a store file and write data to it. 1210 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1211 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1212 // We'll write N-1 KVs to ensure we don't write an extra block 1213 kvs.remove(kvs.size() - 1); 1214 for (KeyValue kv : kvs) { 1215 writer.append(kv); 1216 } 1217 writer.appendMetadata(0, false); 1218 writer.close(); 1219 return writer; 1220 } 1221 1222 /** 1223 * Check if data block encoding information is saved correctly in HFile's file info. 1224 */ 1225 @Test 1226 public void testDataBlockEncodingMetaData() throws IOException { 1227 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1228 Path dir = new Path(new Path(testDir, "7e0102"), "familyname"); 1229 Path path = new Path(dir, "1234567890"); 1230 1231 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1232 cacheConf = new CacheConfig(conf); 1233 HFileContext meta = 1234 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1235 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1236 // Make a store file and write data to it. 1237 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1238 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1239 writer.close(); 1240 1241 StoreFileInfo storeFileInfo = 1242 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1243 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1244 storeFile.initReader(); 1245 StoreFileReader reader = storeFile.getReader(); 1246 1247 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1248 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1249 assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value); 1250 } 1251 1252 @Test 1253 public void testDataBlockSizeEncoded() throws Exception { 1254 // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. 1255 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1256 Path path = new Path(dir, "1234567890"); 1257 1258 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1259 1260 conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1); 1261 1262 cacheConf = new CacheConfig(conf); 1263 HFileContext meta = 1264 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1265 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build(); 1266 // Make a store file and write data to it. 1267 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1268 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1269 writeStoreFile(writer); 1270 1271 StoreFileInfo storeFileInfo = 1272 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1273 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1274 storeFile.initReader(); 1275 StoreFileReader reader = storeFile.getReader(); 1276 1277 Map<byte[], byte[]> fileInfo = reader.loadFileInfo(); 1278 byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING); 1279 assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value)); 1280 1281 HFile.Reader fReader = 1282 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1283 1284 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1285 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1286 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1287 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1288 HFileBlock block; 1289 while (offset <= max) { 1290 block = fReader.readBlock(offset, -1, /* cacheBlock */ 1291 false, /* pread */ false, /* isCompaction */ false, /* updateCacheMetrics */ 1292 false, null, null); 1293 offset += block.getOnDiskSizeWithHeader(); 1294 double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL; 1295 if (offset <= max) { 1296 assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05)); 1297 } 1298 } 1299 } 1300 1301 @Test 1302 public void testDataBlockSizeCompressed() throws Exception { 1303 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, 1304 PreviousBlockCompressionRatePredicator.class.getName()); 1305 testDataBlockSizeWithCompressionRatePredicator(12, 1306 (s, c) -> (c > 2 && c < 11) ? s >= BLOCKSIZE_SMALL * 10 : true); 1307 } 1308 1309 @Test 1310 public void testDataBlockSizeUnCompressed() throws Exception { 1311 conf.set(BLOCK_COMPRESSED_SIZE_PREDICATOR, UncompressedBlockSizePredicator.class.getName()); 1312 testDataBlockSizeWithCompressionRatePredicator(200, (s, c) -> s < BLOCKSIZE_SMALL * 10); 1313 } 1314 1315 private void testDataBlockSizeWithCompressionRatePredicator(int expectedBlockCount, 1316 BiFunction<Integer, Integer, Boolean> validation) throws Exception { 1317 Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname"); 1318 Path path = new Path(dir, "1234567890"); 1319 DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF; 1320 cacheConf = new CacheConfig(conf); 1321 HFileContext meta = 1322 new HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE) 1323 .withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo) 1324 .withCompression(Compression.Algorithm.GZ).build(); 1325 // Make a store file and write data to it. 1326 StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs) 1327 .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build(); 1328 writeLargeStoreFile(writer, Bytes.toBytes(name.getMethodName()), 1329 Bytes.toBytes(name.getMethodName()), 200); 1330 writer.close(); 1331 StoreFileInfo storeFileInfo = 1332 StoreFileInfo.createStoreFileInfoForHFile(conf, fs, writer.getPath(), true); 1333 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 1334 storeFile.initReader(); 1335 HFile.Reader fReader = 1336 HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true, conf); 1337 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, writer.getPath()); 1338 long fileSize = fs.getFileStatus(writer.getPath()).getLen(); 1339 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 1340 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 1341 HFileBlock block; 1342 int blockCount = 0; 1343 while (offset <= max) { 1344 block = fReader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 1345 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 1346 offset += block.getOnDiskSizeWithHeader(); 1347 blockCount++; 1348 assertTrue(validation.apply(block.getUncompressedSizeWithoutHeader(), blockCount)); 1349 } 1350 assertEquals(expectedBlockCount, blockCount); 1351 } 1352 1353}