001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertNotSame; 026import static org.junit.Assert.assertNull; 027import static org.junit.Assert.assertTrue; 028import static org.junit.Assert.fail; 029 030import java.io.IOException; 031import java.lang.reflect.Field; 032import java.util.ArrayList; 033import java.util.Collection; 034import java.util.List; 035import java.util.Map; 036import java.util.Optional; 037import java.util.concurrent.CountDownLatch; 038import java.util.concurrent.ExecutionException; 039import java.util.concurrent.TimeUnit; 040import java.util.concurrent.TimeoutException; 041import java.util.concurrent.atomic.AtomicBoolean; 042import org.apache.hadoop.conf.Configuration; 043import org.apache.hadoop.fs.FileSystem; 044import org.apache.hadoop.fs.Path; 045import org.apache.hadoop.hbase.CellComparator; 046import org.apache.hadoop.hbase.Coprocessor; 047import org.apache.hadoop.hbase.CoprocessorEnvironment; 048import org.apache.hadoop.hbase.DoNotRetryIOException; 049import org.apache.hadoop.hbase.HBaseClassTestRule; 050import org.apache.hadoop.hbase.HBaseTestingUtil; 051import org.apache.hadoop.hbase.HConstants; 052import org.apache.hadoop.hbase.MasterNotRunningException; 053import org.apache.hadoop.hbase.PrivateCellUtil; 054import org.apache.hadoop.hbase.ServerName; 055import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 056import org.apache.hadoop.hbase.StartTestingClusterOption; 057import org.apache.hadoop.hbase.TableName; 058import org.apache.hadoop.hbase.ZooKeeperConnectionException; 059import org.apache.hadoop.hbase.client.Admin; 060import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 061import org.apache.hadoop.hbase.client.Consistency; 062import org.apache.hadoop.hbase.client.Delete; 063import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 064import org.apache.hadoop.hbase.client.Get; 065import org.apache.hadoop.hbase.client.Mutation; 066import org.apache.hadoop.hbase.client.Put; 067import org.apache.hadoop.hbase.client.RegionInfo; 068import org.apache.hadoop.hbase.client.Result; 069import org.apache.hadoop.hbase.client.ResultScanner; 070import org.apache.hadoop.hbase.client.Scan; 071import org.apache.hadoop.hbase.client.Table; 072import org.apache.hadoop.hbase.client.TableDescriptor; 073import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 074import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro; 075import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 077import org.apache.hadoop.hbase.coprocessor.MasterObserver; 078import org.apache.hadoop.hbase.coprocessor.ObserverContext; 079import org.apache.hadoop.hbase.io.HFileLink; 080import org.apache.hadoop.hbase.io.Reference; 081import org.apache.hadoop.hbase.master.HMaster; 082import org.apache.hadoop.hbase.master.MasterRpcServices; 083import org.apache.hadoop.hbase.master.RegionState; 084import org.apache.hadoop.hbase.master.RegionState.State; 085import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 086import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; 087import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 088import org.apache.hadoop.hbase.master.assignment.RegionStates; 089import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 090import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 091import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; 092import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 093import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 094import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; 095import org.apache.hadoop.hbase.testclassification.LargeTests; 096import org.apache.hadoop.hbase.testclassification.RegionServerTests; 097import org.apache.hadoop.hbase.util.Bytes; 098import org.apache.hadoop.hbase.util.CommonFSUtils; 099import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 100import org.apache.hadoop.hbase.util.FSUtils; 101import org.apache.hadoop.hbase.util.FutureUtils; 102import org.apache.hadoop.hbase.util.HBaseFsck; 103import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 104import org.apache.hadoop.hbase.util.Threads; 105import org.apache.zookeeper.KeeperException; 106import org.apache.zookeeper.KeeperException.NodeExistsException; 107import org.junit.After; 108import org.junit.AfterClass; 109import org.junit.Assert; 110import org.junit.Before; 111import org.junit.BeforeClass; 112import org.junit.ClassRule; 113import org.junit.Rule; 114import org.junit.Test; 115import org.junit.experimental.categories.Category; 116import org.junit.rules.TestName; 117import org.mockito.Mockito; 118import org.slf4j.Logger; 119import org.slf4j.LoggerFactory; 120 121import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 122import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; 123import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 124 125import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 127import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 128import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 129 130/** 131 * The below tests are testing split region against a running cluster 132 */ 133@Category({ RegionServerTests.class, LargeTests.class }) 134public class TestSplitTransactionOnCluster { 135 136 @ClassRule 137 public static final HBaseClassTestRule CLASS_RULE = 138 HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class); 139 140 private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class); 141 private Admin admin = null; 142 private SingleProcessHBaseCluster cluster = null; 143 private static final int NB_SERVERS = 3; 144 145 static final HBaseTestingUtil TESTING_UTIL = new HBaseTestingUtil(); 146 147 @Rule 148 public TestName name = new TestName(); 149 150 @BeforeClass 151 public static void before() throws Exception { 152 TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000); 153 StartTestingClusterOption option = StartTestingClusterOption.builder() 154 .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build(); 155 TESTING_UTIL.startMiniCluster(option); 156 } 157 158 @AfterClass 159 public static void after() throws Exception { 160 TESTING_UTIL.shutdownMiniCluster(); 161 } 162 163 @Before 164 public void setup() throws IOException { 165 TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS); 166 this.admin = TESTING_UTIL.getAdmin(); 167 this.cluster = TESTING_UTIL.getMiniHBaseCluster(); 168 } 169 170 @After 171 public void tearDown() throws Exception { 172 this.admin.close(); 173 for (TableDescriptor htd : this.admin.listTableDescriptors()) { 174 LOG.info("Tear down, remove table=" + htd.getTableName()); 175 TESTING_UTIL.deleteTable(htd.getTableName()); 176 } 177 } 178 179 private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions) 180 throws IOException, InterruptedException { 181 assertEquals(1, regions.size()); 182 RegionInfo hri = regions.get(0).getRegionInfo(); 183 AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); 184 return hri; 185 } 186 187 private void requestSplitRegion(final HRegionServer rsServer, final Region region, 188 final byte[] midKey) throws IOException { 189 long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0); 190 // wait for the split to complete or get interrupted. If the split completes successfully, 191 // the procedure will return true; if the split fails, the procedure would throw exception. 192 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 193 } 194 195 @Test 196 public void testRITStateForRollback() throws Exception { 197 final TableName tableName = TableName.valueOf(name.getMethodName()); 198 final HMaster master = cluster.getMaster(); 199 try { 200 // Create table then get the single region for our new table. 201 Table t = createTableAndWait(tableName, Bytes.toBytes("cf")); 202 final List<HRegion> regions = cluster.getRegions(tableName); 203 final RegionInfo hri = getAndCheckSingleTableRegion(regions); 204 insertData(tableName, admin, t); 205 t.close(); 206 207 // Turn off balancer so it doesn't cut in and mess up our placements. 208 this.admin.balancerSwitch(false, true); 209 // Turn off the meta scanner so it don't remove parent on us. 210 master.setCatalogJanitorEnabled(false); 211 212 // find a splittable region 213 final HRegion region = findSplittableRegion(regions); 214 assertTrue("not able to find a splittable region", region != null); 215 216 // install master co-processor to fail splits 217 master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class, 218 Coprocessor.PRIORITY_USER, master.getConfiguration()); 219 220 // split async 221 this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 }); 222 223 // we have to wait until the SPLITTING state is seen by the master 224 FailingSplitMasterObserver observer = 225 master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class); 226 assertNotNull(observer); 227 observer.latch.await(); 228 229 LOG.info("Waiting for region to come out of RIT"); 230 while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) { 231 Threads.sleep(100); 232 } 233 assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)); 234 } finally { 235 admin.balancerSwitch(true, false); 236 master.setCatalogJanitorEnabled(true); 237 abortAndWaitForMaster(); 238 TESTING_UTIL.deleteTable(tableName); 239 } 240 } 241 242 @Test 243 public void testSplitFailedCompactionAndSplit() throws Exception { 244 final TableName tableName = TableName.valueOf(name.getMethodName()); 245 // Create table then get the single region for our new table. 246 byte[] cf = Bytes.toBytes("cf"); 247 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 248 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 249 admin.createTable(htd); 250 251 for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) { 252 Thread.sleep(100); 253 } 254 assertEquals(1, cluster.getRegions(tableName).size()); 255 256 HRegion region = cluster.getRegions(tableName).get(0); 257 HStore store = region.getStore(cf); 258 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 259 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 260 261 Table t = TESTING_UTIL.getConnection().getTable(tableName); 262 // insert data 263 insertData(tableName, admin, t); 264 insertData(tableName, admin, t); 265 266 int fileNum = store.getStorefiles().size(); 267 // 0, Compaction Request 268 store.triggerMajorCompaction(); 269 Optional<CompactionContext> cc = store.requestCompaction(); 270 assertTrue(cc.isPresent()); 271 // 1, A timeout split 272 // 1.1 close region 273 assertEquals(2, region.close(false).get(cf).size()); 274 // 1.2 rollback and Region initialize again 275 region.initialize(); 276 277 // 2, Run Compaction cc 278 assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE)); 279 assertTrue(fileNum > store.getStorefiles().size()); 280 281 // 3, Split 282 requestSplitRegion(regionServer, region, Bytes.toBytes("row3")); 283 assertEquals(2, cluster.getRegions(tableName).size()); 284 } 285 286 @Test 287 public void testSplitCompactWithPriority() throws Exception { 288 final TableName tableName = TableName.valueOf(name.getMethodName()); 289 // Create table then get the single region for our new table. 290 byte[] cf = Bytes.toBytes("cf"); 291 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 292 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build(); 293 admin.createTable(htd); 294 295 assertNotEquals("Unable to retrieve regions of the table", -1, 296 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 297 298 HRegion region = cluster.getRegions(tableName).get(0); 299 HStore store = region.getStore(cf); 300 int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 301 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 302 303 Table table = TESTING_UTIL.getConnection().getTable(tableName); 304 // insert data 305 insertData(tableName, admin, table); 306 insertData(tableName, admin, table, 20); 307 insertData(tableName, admin, table, 40); 308 309 // Compaction Request 310 store.triggerMajorCompaction(); 311 Optional<CompactionContext> compactionContext = store.requestCompaction(); 312 assertTrue(compactionContext.isPresent()); 313 assertFalse(compactionContext.get().getRequest().isAfterSplit()); 314 assertEquals(compactionContext.get().getRequest().getPriority(), 13); 315 316 // Split 317 long procId = 318 cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0); 319 320 // wait for the split to complete or get interrupted. If the split completes successfully, 321 // the procedure will return true; if the split fails, the procedure would throw exception. 322 ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId); 323 Thread.sleep(3000); 324 assertNotEquals("Table is not split properly?", -1, 325 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 326 // we have 2 daughter regions 327 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 328 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 329 HStore hStore1 = hRegion1.getStore(cf); 330 HStore hStore2 = hRegion2.getStore(cf); 331 332 // For hStore1 && hStore2, set mock reference to one of the storeFiles 333 StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo(); 334 StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo(); 335 Field field = StoreFileInfo.class.getDeclaredField("reference"); 336 field.setAccessible(true); 337 field.set(storeFileInfo1, Mockito.mock(Reference.class)); 338 field.set(storeFileInfo2, Mockito.mock(Reference.class)); 339 hStore1.triggerMajorCompaction(); 340 hStore2.triggerMajorCompaction(); 341 342 compactionContext = hStore1.requestCompaction(); 343 assertTrue(compactionContext.isPresent()); 344 // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true && 345 // highest priority for hStore1's compactionContext 346 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 347 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000); 348 349 compactionContext = 350 hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null); 351 assertTrue(compactionContext.isPresent()); 352 // compaction request contains higher priority than default priority of daughter region 353 // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to 354 // be accepted. 355 assertTrue(compactionContext.get().getRequest().isAfterSplit()); 356 assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10); 357 admin.disableTable(tableName); 358 admin.deleteTable(tableName); 359 } 360 361 @Test 362 public void testContinuousSplitUsingLinkFile() throws Exception { 363 final TableName tableName = TableName.valueOf(name.getMethodName()); 364 // Create table then get the single region for our new table. 365 byte[] cf = Bytes.toBytes("cf"); 366 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName) 367 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)); 368 String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName(); 369 builder.setValue(SPLIT_POLICY, splitPolicy); 370 371 admin.createTable(builder.build()); 372 admin.compactionSwitch(false, new ArrayList<>()); 373 374 assertNotEquals("Unable to retrieve regions of the table", -1, 375 TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1)); 376 Table table = TESTING_UTIL.getConnection().getTable(tableName); 377 // insert data 378 insertData(tableName, admin, table, 10); 379 insertData(tableName, admin, table, 20); 380 insertData(tableName, admin, table, 40); 381 int rowCount = 3 * 4; 382 Scan scan = new Scan(); 383 scanValidate(scan, rowCount, table); 384 385 // Split 386 admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(), 387 Bytes.toBytes("row14")); 388 // wait for the split to complete or get interrupted. If the split completes successfully, 389 // the procedure will return true; if the split fails, the procedure would throw exception. 390 Thread.sleep(3000); 391 assertNotEquals("Table is not split properly?", -1, 392 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2)); 393 // we have 2 daughter regions 394 HRegion hRegion1 = cluster.getRegions(tableName).get(0); 395 HRegion hRegion2 = cluster.getRegions(tableName).get(1); 396 HStore hStore1 = hRegion1.getStore(cf); 397 HStore hStore2 = hRegion2.getStore(cf); 398 // the sum of store files of the two children should be equal to their parent 399 assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount()); 400 // both the two children should have link files 401 for (StoreFile sf : hStore1.getStorefiles()) { 402 assertTrue(HFileLink.isHFileLink(sf.getPath())); 403 } 404 for (StoreFile sf : hStore2.getStorefiles()) { 405 assertTrue(HFileLink.isHFileLink(sf.getPath())); 406 } 407 // validate children data 408 scan = new Scan(); 409 scanValidate(scan, rowCount, table); 410 411 // Continuous Split 412 findRegionToSplit(tableName, "row24"); 413 Thread.sleep(3000); 414 assertNotEquals("Table is not split properly?", -1, 415 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3)); 416 // now table has 3 region, each region should have one link file 417 for (HRegion newRegion : cluster.getRegions(tableName)) { 418 assertEquals(1, newRegion.getStore(cf).getStorefilesCount()); 419 assertTrue( 420 HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath())); 421 } 422 423 scan = new Scan(); 424 scanValidate(scan, rowCount, table); 425 426 // Continuous Split, random split HFileLink, generate Reference files. 427 // After this, can not continuous split, because there are reference files. 428 findRegionToSplit(tableName, "row11"); 429 Thread.sleep(3000); 430 assertNotEquals("Table is not split properly?", -1, 431 TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4)); 432 433 scan = new Scan(); 434 scanValidate(scan, rowCount, table); 435 } 436 437 private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception { 438 HRegion toSplit = null; 439 byte[] toSplitKey = Bytes.toBytes(splitRowKey); 440 for (HRegion rg : cluster.getRegions(tableName)) { 441 LOG.debug( 442 "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()=" 443 + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey); 444 if ( 445 (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare( 446 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()), 447 PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0) 448 && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare( 449 PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()), 450 PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0) 451 ) { 452 toSplit = rg; 453 } 454 } 455 assertNotNull(toSplit); 456 admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey); 457 } 458 459 private static void scanValidate(Scan scan, int expectedRowCount, Table table) 460 throws IOException { 461 ResultScanner scanner = table.getScanner(scan); 462 int rows = 0; 463 for (Result result : scanner) { 464 rows++; 465 } 466 scanner.close(); 467 assertEquals(expectedRowCount, rows); 468 } 469 470 public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver { 471 volatile CountDownLatch latch; 472 473 @Override 474 public void start(CoprocessorEnvironment e) throws IOException { 475 latch = new CountDownLatch(1); 476 } 477 478 @Override 479 public Optional<MasterObserver> getMasterObserver() { 480 return Optional.of(this); 481 } 482 483 @Override 484 public void preSplitRegionBeforeMETAAction( 485 final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey, 486 final List<Mutation> metaEntries) throws IOException { 487 latch.countDown(); 488 throw new IOException("Causing rollback of region split"); 489 } 490 } 491 492 @Test 493 public void testSplitRollbackOnRegionClosing() throws Exception { 494 final TableName tableName = TableName.valueOf(name.getMethodName()); 495 496 // Create table then get the single region for our new table. 497 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 498 List<HRegion> regions = cluster.getRegions(tableName); 499 RegionInfo hri = getAndCheckSingleTableRegion(regions); 500 501 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 502 503 RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates(); 504 505 // Turn off balancer so it doesn't cut in and mess up our placements. 506 this.admin.balancerSwitch(false, true); 507 // Turn off the meta scanner so it don't remove parent on us. 508 cluster.getMaster().setCatalogJanitorEnabled(false); 509 try { 510 // Add a bit of load up into the table so splittable. 511 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 512 // Get region pre-split. 513 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 514 printOutRegions(server, "Initial regions: "); 515 int regionCount = cluster.getRegions(hri.getTable()).size(); 516 regionStates.updateRegionState(hri, RegionState.State.CLOSING); 517 518 // Now try splitting.... should fail. And each should successfully 519 // rollback. 520 // We don't roll back here anymore. Instead we fail-fast on construction of the 521 // split transaction. Catch the exception instead. 522 try { 523 FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName())); 524 fail(); 525 } catch (DoNotRetryRegionException e) { 526 // Expected 527 } 528 // Wait around a while and assert count of regions remains constant. 529 for (int i = 0; i < 10; i++) { 530 Thread.sleep(100); 531 assertEquals(regionCount, cluster.getRegions(hri.getTable()).size()); 532 } 533 regionStates.updateRegionState(hri, State.OPEN); 534 // Now try splitting and it should work. 535 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 536 // Get daughters 537 checkAndGetDaughters(tableName); 538 // OK, so split happened after we cleared the blocking node. 539 } finally { 540 admin.balancerSwitch(true, false); 541 cluster.getMaster().setCatalogJanitorEnabled(true); 542 t.close(); 543 } 544 } 545 546 /** 547 * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't 548 * find the immediate daughter of an offlined parent. 549 */ 550 @Test 551 public void testShutdownFixupWhenDaughterHasSplit() throws Exception { 552 final TableName tableName = TableName.valueOf(name.getMethodName()); 553 554 // Create table then get the single region for our new table. 555 Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 556 List<HRegion> regions = cluster.getRegions(tableName); 557 RegionInfo hri = getAndCheckSingleTableRegion(regions); 558 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 559 560 // Turn off balancer so it doesn't cut in and mess up our placements. 561 this.admin.balancerSwitch(false, true); 562 // Turn off the meta scanner so it don't remove parent on us. 563 cluster.getMaster().setCatalogJanitorEnabled(false); 564 try { 565 // Add a bit of load up into the table so splittable. 566 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 567 // Get region pre-split. 568 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 569 printOutRegions(server, "Initial regions: "); 570 // Now split. 571 admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 572 // Get daughters 573 List<HRegion> daughters = checkAndGetDaughters(tableName); 574 // Now split one of the daughters. 575 HRegion daughterRegion = daughters.get(0); 576 RegionInfo daughter = daughterRegion.getRegionInfo(); 577 LOG.info("Daughter we are going to split: " + daughter); 578 clearReferences(daughterRegion); 579 LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences()); 580 admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES); 581 // Get list of daughters 582 daughters = cluster.getRegions(tableName); 583 for (HRegion d : daughters) { 584 LOG.info("Regions before crash: " + d); 585 } 586 // Now crash the server 587 cluster.abortRegionServer(tableRegionIndex); 588 waitUntilRegionServerDead(); 589 awaitDaughters(tableName, daughters.size()); 590 // Assert daughters are online and ONLY the original daughters -- that 591 // fixup didn't insert one during server shutdown recover. 592 regions = cluster.getRegions(tableName); 593 for (HRegion d : daughters) { 594 LOG.info("Regions after crash: " + d); 595 } 596 if (daughters.size() != regions.size()) { 597 LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size()); 598 } 599 assertEquals(daughters.size(), regions.size()); 600 for (HRegion r : regions) { 601 LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r)); 602 assertTrue("Missing region post crash " + r, daughters.contains(r)); 603 } 604 } finally { 605 LOG.info("EXITING"); 606 admin.balancerSwitch(true, false); 607 cluster.getMaster().setCatalogJanitorEnabled(true); 608 t.close(); 609 } 610 } 611 612 private void clearReferences(HRegion region) throws IOException { 613 // Presumption. 614 assertEquals(1, region.getStores().size()); 615 HStore store = region.getStores().get(0); 616 while (store.hasReferences()) { 617 while (store.storeEngine.getCompactor().isCompacting()) { 618 Threads.sleep(100); 619 } 620 // Run new compaction. Shoudn't be any others running. 621 region.compact(true); 622 store.closeAndArchiveCompactedFiles(); 623 } 624 } 625 626 @Test 627 public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception { 628 TableName userTableName = TableName.valueOf(name.getMethodName()); 629 TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName) 630 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build(); 631 admin.createTable(htd); 632 Table table = TESTING_UTIL.getConnection().getTable(userTableName); 633 try { 634 for (int i = 0; i <= 5; i++) { 635 String row = "row" + i; 636 Put p = new Put(Bytes.toBytes(row)); 637 String val = "Val" + i; 638 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val)); 639 table.put(p); 640 admin.flush(userTableName); 641 Delete d = new Delete(Bytes.toBytes(row)); 642 // Do a normal delete 643 table.delete(d); 644 admin.flush(userTableName); 645 } 646 admin.majorCompact(userTableName); 647 List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 648 .getRegionsOfTable(userTableName); 649 assertEquals(1, regionsOfTable.size()); 650 RegionInfo hRegionInfo = regionsOfTable.get(0); 651 Put p = new Put(Bytes.toBytes("row6")); 652 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 653 table.put(p); 654 p = new Put(Bytes.toBytes("row7")); 655 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 656 table.put(p); 657 p = new Put(Bytes.toBytes("row8")); 658 p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val")); 659 table.put(p); 660 admin.flush(userTableName); 661 admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7")); 662 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 663 .getRegionsOfTable(userTableName); 664 665 while (regionsOfTable.size() != 2) { 666 Thread.sleep(1000); 667 regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates() 668 .getRegionsOfTable(userTableName); 669 LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": " 670 + regionsOfTable); 671 672 } 673 Assert.assertEquals(2, regionsOfTable.size()); 674 675 Scan s = new Scan(); 676 ResultScanner scanner = table.getScanner(s); 677 int mainTableCount = 0; 678 for (Result rr = scanner.next(); rr != null; rr = scanner.next()) { 679 mainTableCount++; 680 } 681 Assert.assertEquals(3, mainTableCount); 682 } finally { 683 table.close(); 684 } 685 } 686 687 /** 688 * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could 689 * remove the parent region the master is killed and restarted. 690 */ 691 @Test 692 public void testMasterRestartAtRegionSplitPendingCatalogJanitor() 693 throws IOException, InterruptedException, NodeExistsException, KeeperException, 694 ServiceException, ExecutionException, TimeoutException { 695 final TableName tableName = TableName.valueOf(name.getMethodName()); 696 // Create table then get the single region for our new table. 697 try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) { 698 List<HRegion> regions = cluster.getRegions(tableName); 699 RegionInfo hri = getAndCheckSingleTableRegion(regions); 700 701 int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri); 702 703 // Turn off balancer so it doesn't cut in and mess up our placements. 704 this.admin.balancerSwitch(false, true); 705 // Turn off the meta scanner so it don't remove parent on us. 706 cluster.getMaster().setCatalogJanitorEnabled(false); 707 // Add a bit of load up into the table so splittable. 708 TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false); 709 // Get region pre-split. 710 HRegionServer server = cluster.getRegionServer(tableRegionIndex); 711 printOutRegions(server, "Initial regions: "); 712 // Call split. 713 this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES); 714 List<HRegion> daughters = checkAndGetDaughters(tableName); 715 716 // Before cleanup, get a new master. 717 HMaster master = abortAndWaitForMaster(); 718 // Now call compact on the daughters and clean up any references. 719 for (HRegion daughter : daughters) { 720 clearReferences(daughter); 721 assertFalse(daughter.hasReferences()); 722 } 723 // BUT calling compact on the daughters is not enough. The CatalogJanitor looks 724 // in the filesystem, and the filesystem content is not same as what the Region 725 // is reading from. Compacted-away files are picked up later by the compacted 726 // file discharger process. It runs infrequently. Make it run so CatalogJanitor 727 // doens't find any references. 728 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 729 boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false); 730 rst.getRegionServer().compactedFileDischarger.run(); 731 rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting); 732 } 733 cluster.getMaster().setCatalogJanitorEnabled(true); 734 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 735 LOG.info("Starting run of CatalogJanitor"); 736 cluster.getMaster().getCatalogJanitor().run(); 737 ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor()); 738 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 739 ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri); 740 assertEquals(null, regionServerOfRegion); 741 } finally { 742 TESTING_UTIL.getAdmin().balancerSwitch(true, false); 743 cluster.getMaster().setCatalogJanitorEnabled(true); 744 } 745 } 746 747 @Test 748 public void testSplitWithRegionReplicas() throws Exception { 749 final TableName tableName = TableName.valueOf(name.getMethodName()); 750 TableDescriptor htd = TESTING_UTIL 751 .createModifyableTableDescriptor(TableName.valueOf(name.getMethodName()), 752 ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER, 753 ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED) 754 .setRegionReplication(2).setCoprocessor(SlowMeCopro.class.getName()).build(); 755 // Create table then get the single region for our new table. 756 Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null); 757 List<HRegion> oldRegions; 758 do { 759 oldRegions = cluster.getRegions(tableName); 760 Thread.sleep(10); 761 } while (oldRegions.size() != 2); 762 for (HRegion h : oldRegions) 763 LOG.debug("OLDREGION " + h.getRegionInfo()); 764 try { 765 int regionServerIndex = 766 cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName()); 767 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 768 insertData(tableName, admin, t); 769 // Turn off balancer so it doesn't cut in and mess up our placements. 770 admin.balancerSwitch(false, true); 771 // Turn off the meta scanner so it don't remove parent on us. 772 cluster.getMaster().setCatalogJanitorEnabled(false); 773 boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 774 assertEquals("The specified table should be present.", true, tableExists); 775 final HRegion region = findSplittableRegion(oldRegions); 776 regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName()); 777 regionServer = cluster.getRegionServer(regionServerIndex); 778 assertTrue("not able to find a splittable region", region != null); 779 try { 780 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 781 } catch (IOException e) { 782 e.printStackTrace(); 783 fail("Split execution should have succeeded with no exceptions thrown " + e); 784 } 785 // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName); 786 List<HRegion> newRegions; 787 do { 788 newRegions = cluster.getRegions(tableName); 789 for (HRegion h : newRegions) 790 LOG.debug("NEWREGION " + h.getRegionInfo()); 791 Thread.sleep(1000); 792 } while ( 793 (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1))) 794 || newRegions.size() != 4 795 ); 796 tableExists = TESTING_UTIL.getAdmin().tableExists(tableName); 797 assertEquals("The specified table should be present.", true, tableExists); 798 // exists works on stale and we see the put after the flush 799 byte[] b1 = Bytes.toBytes("row1"); 800 Get g = new Get(b1); 801 g.setConsistency(Consistency.STRONG); 802 // The following GET will make a trip to the meta to get the new location of the 1st daughter 803 // In the process it will also get the location of the replica of the daughter (initially 804 // pointing to the parent's replica) 805 Result r = t.get(g); 806 Assert.assertFalse(r.isStale()); 807 LOG.info("exists stale after flush done"); 808 809 SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1)); 810 g = new Get(b1); 811 g.setConsistency(Consistency.TIMELINE); 812 // This will succeed because in the previous GET we get the location of the replica 813 r = t.get(g); 814 Assert.assertTrue(r.isStale()); 815 SlowMeCopro.getPrimaryCdl().get().countDown(); 816 } finally { 817 SlowMeCopro.getPrimaryCdl().get().countDown(); 818 admin.balancerSwitch(true, false); 819 cluster.getMaster().setCatalogJanitorEnabled(true); 820 t.close(); 821 } 822 } 823 824 private void insertData(final TableName tableName, Admin admin, Table t) throws IOException { 825 insertData(tableName, admin, t, 1); 826 } 827 828 private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException { 829 Put p = new Put(Bytes.toBytes("row" + i)); 830 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1")); 831 t.put(p); 832 p = new Put(Bytes.toBytes("row" + (i + 1))); 833 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2")); 834 t.put(p); 835 p = new Put(Bytes.toBytes("row" + (i + 2))); 836 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3")); 837 t.put(p); 838 p = new Put(Bytes.toBytes("row" + (i + 3))); 839 p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4")); 840 t.put(p); 841 admin.flush(tableName); 842 } 843 844 /** 845 * If a table has regions that have no store files in a region, they should split successfully 846 * into two regions with no store files. 847 */ 848 @Test 849 public void testSplitRegionWithNoStoreFiles() throws Exception { 850 final TableName tableName = TableName.valueOf(name.getMethodName()); 851 // Create table then get the single region for our new table. 852 createTableAndWait(tableName, HConstants.CATALOG_FAMILY); 853 List<HRegion> regions = cluster.getRegions(tableName); 854 RegionInfo hri = getAndCheckSingleTableRegion(regions); 855 ensureTableRegionNotOnSameServerAsMeta(admin, hri); 856 int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName()); 857 HRegionServer regionServer = cluster.getRegionServer(regionServerIndex); 858 // Turn off balancer so it doesn't cut in and mess up our placements. 859 this.admin.balancerSwitch(false, true); 860 // Turn off the meta scanner so it don't remove parent on us. 861 cluster.getMaster().setCatalogJanitorEnabled(false); 862 try { 863 // Precondition: we created a table with no data, no store files. 864 printOutRegions(regionServer, "Initial regions: "); 865 Configuration conf = cluster.getConfiguration(); 866 HBaseFsck.debugLsr(conf, new Path("/")); 867 Path rootDir = CommonFSUtils.getRootDir(conf); 868 FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem(); 869 Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 870 assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size()); 871 872 // find a splittable region. Refresh the regions list 873 regions = cluster.getRegions(tableName); 874 final HRegion region = findSplittableRegion(regions); 875 assertTrue("not able to find a splittable region", region != null); 876 877 // Now split. 878 try { 879 requestSplitRegion(regionServer, region, Bytes.toBytes("row2")); 880 } catch (IOException e) { 881 fail("Split execution should have succeeded with no exceptions thrown"); 882 } 883 884 // Postcondition: split the table with no store files into two regions, but still have no 885 // store files 886 List<HRegion> daughters = cluster.getRegions(tableName); 887 assertEquals(2, daughters.size()); 888 889 // check dirs 890 HBaseFsck.debugLsr(conf, new Path("/")); 891 Map<String, Path> storefilesAfter = 892 FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName); 893 assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0, 894 storefilesAfter.size()); 895 896 hri = region.getRegionInfo(); // split parent 897 AssignmentManager am = cluster.getMaster().getAssignmentManager(); 898 RegionStates regionStates = am.getRegionStates(); 899 long start = EnvironmentEdgeManager.currentTime(); 900 while (!regionStates.isRegionInState(hri, State.SPLIT)) { 901 LOG.debug("Waiting for SPLIT state on: " + hri); 902 assertFalse("Timed out in waiting split parent to be in state SPLIT", 903 EnvironmentEdgeManager.currentTime() - start > 60000); 904 Thread.sleep(500); 905 } 906 assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN)); 907 assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN)); 908 909 // We should not be able to assign it again 910 try { 911 am.assign(hri); 912 } catch (DoNotRetryIOException e) { 913 // Expected 914 } 915 assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri)); 916 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 917 918 // We should not be able to unassign it either 919 try { 920 am.unassign(hri); 921 fail("Should have thrown exception"); 922 } catch (DoNotRetryIOException e) { 923 // Expected 924 } 925 assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri)); 926 assertTrue(regionStates.isRegionInState(hri, State.SPLIT)); 927 } finally { 928 admin.balancerSwitch(true, false); 929 cluster.getMaster().setCatalogJanitorEnabled(true); 930 } 931 } 932 933 @Test 934 public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception { 935 final TableName tableName = TableName.valueOf(name.getMethodName()); 936 try { 937 byte[] cf = Bytes.toBytes("f"); 938 byte[] cf1 = Bytes.toBytes("i_f"); 939 TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) 940 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)) 941 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1)) 942 .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build(); 943 admin.createTable(htd); 944 List<HRegion> regions = awaitTableRegions(tableName); 945 HRegion region = regions.get(0); 946 for (int i = 3; i < 9; i++) { 947 Put p = new Put(Bytes.toBytes("row" + i)); 948 p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 949 p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i)); 950 region.put(p); 951 } 952 region.flush(true); 953 HStore store = region.getStore(cf); 954 Collection<HStoreFile> storefiles = store.getStorefiles(); 955 assertEquals(1, storefiles.size()); 956 assertFalse(region.hasReferences()); 957 HRegionFileSystem hfs = region.getRegionFileSystem(); 958 StoreFileTracker sft = StoreFileTrackerFactory.create(TESTING_UTIL.getConfiguration(), true, 959 store.getStoreContext()); 960 Path referencePath = hfs.splitStoreFile(region.getRegionInfo(), "f", 961 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft); 962 assertNull(referencePath); 963 referencePath = hfs.splitStoreFile(region.getRegionInfo(), "i_f", 964 storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft); 965 assertNotNull(referencePath); 966 } finally { 967 TESTING_UTIL.deleteTable(tableName); 968 } 969 } 970 971 private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException { 972 for (int i = 0; i < 5; ++i) { 973 for (HRegion r : regions) { 974 if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) { 975 return (r); 976 } 977 } 978 Thread.sleep(100); 979 } 980 return null; 981 } 982 983 private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException { 984 List<HRegion> daughters = null; 985 // try up to 10s 986 for (int i = 0; i < 100; i++) { 987 daughters = cluster.getRegions(tableName); 988 if (daughters.size() >= 2) { 989 break; 990 } 991 Thread.sleep(100); 992 } 993 assertTrue(daughters.size() >= 2); 994 return daughters; 995 } 996 997 private HMaster abortAndWaitForMaster() throws IOException, InterruptedException { 998 cluster.abortMaster(0); 999 cluster.waitOnMaster(0); 1000 HMaster master = cluster.startMaster().getMaster(); 1001 cluster.waitForActiveAndReadyMaster(); 1002 // reset the connections 1003 Closeables.close(admin, true); 1004 TESTING_UTIL.invalidateConnection(); 1005 admin = TESTING_UTIL.getAdmin(); 1006 return master; 1007 } 1008 1009 /** 1010 * Ensure single table region is not on same server as the single hbase:meta table region. 1011 * @return Index of the server hosting the single table region 1012 */ 1013 private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri) 1014 throws IOException, MasterNotRunningException, ZooKeeperConnectionException, 1015 InterruptedException { 1016 // Now make sure that the table region is not on same server as that hosting 1017 // hbase:meta We don't want hbase:meta replay polluting our test when we later crash 1018 // the table region serving server. 1019 int metaServerIndex = cluster.getServerWithMeta(); 1020 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex); 1021 int tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1022 assertTrue(tableRegionIndex != -1); 1023 HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex); 1024 LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other=" 1025 + tableRegionServer.getServerName()); 1026 if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) { 1027 HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer); 1028 assertNotNull(hrs); 1029 assertNotNull(hri); 1030 LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName() 1031 + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex); 1032 admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName()); 1033 } 1034 // Wait till table region is up on the server that is NOT carrying hbase:meta. 1035 for (int i = 0; i < 100; i++) { 1036 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1037 if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break; 1038 LOG.debug("Waiting on region move off the hbase:meta server; current index " 1039 + tableRegionIndex + " and metaServerIndex=" + metaServerIndex); 1040 Thread.sleep(100); 1041 } 1042 assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex, 1043 tableRegionIndex != -1 && tableRegionIndex != metaServerIndex); 1044 // Verify for sure table region is not on same server as hbase:meta 1045 tableRegionIndex = cluster.getServerWith(hri.getRegionName()); 1046 assertTrue(tableRegionIndex != -1); 1047 assertNotSame(metaServerIndex, tableRegionIndex); 1048 return tableRegionIndex; 1049 } 1050 1051 /** 1052 * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers 1053 * since crashed servers occupy an index. 1054 * @return A regionserver that is not <code>notThisOne</code> or null if none found 1055 */ 1056 private HRegionServer getOtherRegionServer(final SingleProcessHBaseCluster cluster, 1057 final HRegionServer notThisOne) { 1058 for (RegionServerThread rst : cluster.getRegionServerThreads()) { 1059 HRegionServer hrs = rst.getRegionServer(); 1060 if (hrs.getServerName().equals(notThisOne.getServerName())) continue; 1061 if (hrs.isStopping() || hrs.isStopped()) continue; 1062 return hrs; 1063 } 1064 return null; 1065 } 1066 1067 private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException { 1068 List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); 1069 for (RegionInfo region : regions) { 1070 LOG.info(prefix + region.getRegionNameAsString()); 1071 } 1072 } 1073 1074 private void waitUntilRegionServerDead() throws InterruptedException, IOException { 1075 // Wait until the master processes the RS shutdown 1076 for (int i = 1077 0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1078 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) { 1079 LOG.info("Waiting on server to go down"); 1080 Thread.sleep(100); 1081 } 1082 assertFalse("Waited too long for RS to die", 1083 cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS 1084 || cluster.getLiveRegionServerThreads().size() > NB_SERVERS); 1085 } 1086 1087 private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException { 1088 // Wait till regions are back on line again. 1089 for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) { 1090 LOG.info("Waiting for repair to happen"); 1091 Thread.sleep(1000); 1092 } 1093 if (cluster.getRegions(tableName).size() < numDaughters) { 1094 fail("Waiting too long for daughter regions"); 1095 } 1096 } 1097 1098 private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException { 1099 List<HRegion> regions = null; 1100 for (int i = 0; i < 100; i++) { 1101 regions = cluster.getRegions(tableName); 1102 if (regions.size() > 0) break; 1103 Thread.sleep(100); 1104 } 1105 return regions; 1106 } 1107 1108 private Table createTableAndWait(TableName tableName, byte[] cf) 1109 throws IOException, InterruptedException { 1110 Table t = TESTING_UTIL.createTable(tableName, cf); 1111 awaitTableRegions(tableName); 1112 assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0); 1113 return t; 1114 } 1115 1116 // Make it public so that JVMClusterUtil can access it. 1117 public static class MyMaster extends HMaster { 1118 public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException { 1119 super(conf); 1120 } 1121 1122 @Override 1123 protected MasterRpcServices createRpcServices() throws IOException { 1124 return new MyMasterRpcServices(this); 1125 } 1126 } 1127 1128 static class MyMasterRpcServices extends MasterRpcServices { 1129 static AtomicBoolean enabled = new AtomicBoolean(false); 1130 1131 private HMaster myMaster; 1132 1133 public MyMasterRpcServices(HMaster master) throws IOException { 1134 super(master); 1135 myMaster = master; 1136 } 1137 1138 @Override 1139 public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c, 1140 ReportRegionStateTransitionRequest req) throws ServiceException { 1141 ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req); 1142 if ( 1143 enabled.get() 1144 && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT) 1145 && !resp.hasErrorMessage() 1146 ) { 1147 RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates(); 1148 for (RegionStateNode regionState : regionStates.getRegionsInTransition()) { 1149 /* 1150 * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew()) 1151 * { regionStates.deleteRegion(regionState.getRegion()); } 1152 */ 1153 } 1154 } 1155 return resp; 1156 } 1157 } 1158 1159 static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy { 1160 1161 @Override 1162 protected boolean shouldSplit() { 1163 return true; 1164 } 1165 1166 @Override 1167 public boolean skipStoreFileRangeCheck(String familyName) { 1168 if (familyName.startsWith("i_")) { 1169 return true; 1170 } else { 1171 return false; 1172 } 1173 } 1174 } 1175}