001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.List; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HBaseClassTestRule; 029import org.apache.hadoop.hbase.HBaseTestingUtil; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.TableName; 033import org.apache.hadoop.hbase.client.Admin; 034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.SnapshotDescription; 038import org.apache.hadoop.hbase.client.SnapshotType; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 042import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; 043import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 044import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; 045import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure; 046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 047import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 048import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 049import org.apache.hadoop.hbase.regionserver.HRegion; 050import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 051import org.apache.hadoop.hbase.testclassification.LargeTests; 052import org.apache.hadoop.hbase.testclassification.MasterTests; 053import org.apache.hadoop.hbase.util.Bytes; 054import org.apache.hadoop.hbase.util.Threads; 055import org.junit.After; 056import org.junit.AfterClass; 057import org.junit.Before; 058import org.junit.BeforeClass; 059import org.junit.ClassRule; 060import org.junit.Rule; 061import org.junit.Test; 062import org.junit.experimental.categories.Category; 063import org.junit.rules.TestName; 064import org.slf4j.Logger; 065import org.slf4j.LoggerFactory; 066 067import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 068import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 069 070@Category({ MasterTests.class, LargeTests.class }) 071public class TestMergeTableRegionsProcedure { 072 073 @ClassRule 074 public static final HBaseClassTestRule CLASS_RULE = 075 HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class); 076 077 private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class); 078 @Rule 079 public final TestName name = new TestName(); 080 081 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 082 083 private static final int initialRegionCount = 4; 084 private final static byte[] FAMILY = Bytes.toBytes("FAMILY"); 085 private static Admin admin; 086 087 private ProcedureMetrics mergeProcMetrics; 088 private ProcedureMetrics assignProcMetrics; 089 private ProcedureMetrics unassignProcMetrics; 090 private long mergeSubmittedCount = 0; 091 private long mergeFailedCount = 0; 092 private long assignSubmittedCount = 0; 093 private long assignFailedCount = 0; 094 private long unassignSubmittedCount = 0; 095 private long unassignFailedCount = 0; 096 097 private static void setupConf(Configuration conf) { 098 // Reduce the maximum attempts to speed up the test 099 conf.setInt("hbase.assignment.maximum.attempts", 3); 100 conf.setInt("hbase.master.maximum.ping.server.attempts", 3); 101 conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1); 102 conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); 103 } 104 105 @BeforeClass 106 public static void setupCluster() throws Exception { 107 setupConf(UTIL.getConfiguration()); 108 UTIL.startMiniCluster(1); 109 admin = UTIL.getAdmin(); 110 } 111 112 @AfterClass 113 public static void cleanupTest() throws Exception { 114 UTIL.shutdownMiniCluster(); 115 } 116 117 @Before 118 public void setup() throws Exception { 119 resetProcExecutorTestingKillFlag(); 120 MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster()); 121 MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster()); 122 // Turn off balancer so it doesn't cut in and mess up our placements. 123 admin.balancerSwitch(false, true); 124 // Turn off the meta scanner so it don't remove parent on us. 125 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false); 126 resetProcExecutorTestingKillFlag(); 127 AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 128 mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics(); 129 assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics(); 130 unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics(); 131 } 132 133 @After 134 public void tearDown() throws Exception { 135 resetProcExecutorTestingKillFlag(); 136 for (TableDescriptor htd : admin.listTableDescriptors()) { 137 LOG.info("Tear down, remove table=" + htd.getTableName()); 138 UTIL.deleteTable(htd.getTableName()); 139 } 140 } 141 142 private void resetProcExecutorTestingKillFlag() { 143 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 144 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 145 assertTrue("expected executor to be running", procExec.isRunning()); 146 } 147 148 private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException { 149 List<Put> puts = new ArrayList<>(); 150 for (RegionInfo ri : ris) { 151 Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0 152 ? new byte[] { 'a' } 153 : ri.getStartKey()); 154 put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY, 155 HConstants.CATALOG_FAMILY); 156 puts.add(put); 157 } 158 t.put(puts); 159 return puts.size(); 160 } 161 162 /** 163 * This tests two region merges 164 */ 165 @Test 166 public void testMergeTwoRegions() throws Exception { 167 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 168 UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] { 169 new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } }); 170 testMerge(tableName, 2); 171 } 172 173 private void testMerge(TableName tableName, int mergeCount) throws IOException { 174 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName); 175 int originalRegionCount = ris.size(); 176 assertTrue(originalRegionCount > mergeCount); 177 RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {}); 178 int countOfRowsLoaded = 0; 179 try (Table table = UTIL.getConnection().getTable(tableName)) { 180 countOfRowsLoaded = loadARowPerRegion(table, ris); 181 } 182 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 183 184 // collect AM metrics before test 185 collectAssignmentManagerMetrics(); 186 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 187 MergeTableRegionsProcedure proc = 188 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true); 189 long procId = procExec.submitProcedure(proc); 190 ProcedureTestingUtility.waitProcedure(procExec, procId); 191 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 192 MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection()); 193 assertEquals(originalRegionCount - mergeCount + 1, 194 MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size()); 195 196 assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount()); 197 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 198 assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount()); 199 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 200 assertEquals(unassignSubmittedCount + mergeCount, 201 unassignProcMetrics.getSubmittedCounter().getCount()); 202 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 203 204 // Need to get the references cleaned out. Close of region will move them 205 // to archive so disable and reopen just to get rid of references to later 206 // when the catalogjanitor runs, it can do merged region cleanup. 207 admin.disableTable(tableName); 208 admin.enableTable(tableName); 209 210 // Can I purge the merged regions from hbase:meta? Check that all went 211 // well by looking at the merged row up in hbase:meta. It should have no 212 // more mention of the merged regions; they are purged as last step in 213 // the merged regions cleanup. 214 UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true); 215 UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow(); 216 RegionInfo mergedRegion = proc.getMergedRegion(); 217 RegionStateStore regionStateStore = 218 UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 219 while (ris != null && ris.get(0) != null && ris.get(1) != null) { 220 ris = regionStateStore.getMergeRegions(mergedRegion); 221 LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris); 222 Threads.sleep(1000); 223 } 224 assertEquals(countOfRowsLoaded, UTIL.countRows(tableName)); 225 } 226 227 /** 228 * This tests ten region merges in one go. 229 */ 230 @Test 231 public void testMergeTenRegions() throws Exception { 232 final TableName tableName = TableName.valueOf(this.name.getMethodName()); 233 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 234 UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY); 235 testMerge(tableName, 10); 236 } 237 238 /** 239 * This tests two concurrent region merges 240 */ 241 @Test 242 public void testMergeRegionsConcurrently() throws Exception { 243 final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently"); 244 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 245 246 List<RegionInfo> tableRegions = createTable(tableName); 247 248 RegionInfo[] regionsToMerge1 = new RegionInfo[2]; 249 RegionInfo[] regionsToMerge2 = new RegionInfo[2]; 250 regionsToMerge1[0] = tableRegions.get(0); 251 regionsToMerge1[1] = tableRegions.get(1); 252 regionsToMerge2[0] = tableRegions.get(2); 253 regionsToMerge2[1] = tableRegions.get(3); 254 255 // collect AM metrics before test 256 collectAssignmentManagerMetrics(); 257 258 long procId1 = procExec.submitProcedure( 259 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true)); 260 long procId2 = procExec.submitProcedure( 261 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true)); 262 ProcedureTestingUtility.waitProcedure(procExec, procId1); 263 ProcedureTestingUtility.waitProcedure(procExec, procId2); 264 ProcedureTestingUtility.assertProcNotFailed(procExec, procId1); 265 ProcedureTestingUtility.assertProcNotFailed(procExec, procId2); 266 assertRegionCount(tableName, initialRegionCount - 2); 267 268 assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount()); 269 assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount()); 270 assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount()); 271 assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount()); 272 assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount()); 273 assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount()); 274 } 275 276 @Test 277 public void testRecoveryAndDoubleExecution() throws Exception { 278 final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution"); 279 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 280 281 List<RegionInfo> tableRegions = createTable(tableName); 282 283 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 284 ProcedureTestingUtility.setKillIfHasParent(procExec, false); 285 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 286 287 RegionInfo[] regionsToMerge = new RegionInfo[2]; 288 regionsToMerge[0] = tableRegions.get(0); 289 regionsToMerge[1] = tableRegions.get(1); 290 291 long procId = procExec.submitProcedure( 292 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 293 294 // Restart the executor and execute the step twice 295 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); 296 ProcedureTestingUtility.assertProcNotFailed(procExec, procId); 297 298 assertRegionCount(tableName, initialRegionCount - 1); 299 } 300 301 @Test 302 public void testRollbackAndDoubleExecution() throws Exception { 303 final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution"); 304 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 305 306 List<RegionInfo> tableRegions = createTable(tableName); 307 308 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 309 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 310 311 RegionInfo[] regionsToMerge = new RegionInfo[2]; 312 regionsToMerge[0] = tableRegions.get(0); 313 regionsToMerge[1] = tableRegions.get(1); 314 315 long procId = procExec.submitProcedure( 316 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 317 318 // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback 319 // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is 320 // hardcoded, so you have to look at this test at least once when you add a new step. 321 int lastStep = 8; 322 MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true); 323 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 324 UTIL.waitUntilAllRegionsAssigned(tableName); 325 List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName); 326 assertEquals(initialRegionCount, regions.size()); 327 } 328 329 @Test 330 public void testMergeWithoutPONR() throws Exception { 331 final TableName tableName = TableName.valueOf("testMergeWithoutPONR"); 332 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 333 334 List<RegionInfo> tableRegions = createTable(tableName); 335 336 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 337 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); 338 339 RegionInfo[] regionsToMerge = new RegionInfo[2]; 340 regionsToMerge[0] = tableRegions.get(0); 341 regionsToMerge[1] = tableRegions.get(1); 342 343 long procId = procExec.submitProcedure( 344 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 345 346 // Execute until step 9 of split procedure 347 // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META 348 MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false); 349 350 // Unset Toggle Kill and make ProcExec work correctly 351 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false); 352 MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec); 353 ProcedureTestingUtility.waitProcedure(procExec, procId); 354 355 assertRegionCount(tableName, initialRegionCount - 1); 356 } 357 358 @Test 359 public void testMergingRegionWhileTakingSnapshot() throws Exception { 360 final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot"); 361 final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor(); 362 363 List<RegionInfo> tableRegions = createTable(tableName); 364 365 ProcedureTestingUtility.waitNoProcedureRunning(procExec); 366 367 SnapshotDescription snapshot = 368 new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH); 369 SnapshotProtos.SnapshotDescription snapshotProto = 370 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot); 371 snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto, 372 UTIL.getHBaseCluster().getMaster().getConfiguration()); 373 long snapshotProcId = procExec.submitProcedure( 374 new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto)); 375 UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto, 376 snapshotProcId); 377 378 RegionInfo[] regionsToMerge = new RegionInfo[2]; 379 regionsToMerge[0] = tableRegions.get(0); 380 regionsToMerge[1] = tableRegions.get(1); 381 382 long mergeProcId = procExec.submitProcedure( 383 new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true)); 384 385 ProcedureTestingUtility 386 .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId); 387 ProcedureTestingUtility.waitProcedure( 388 UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId); 389 390 assertProcFailed(procExec, mergeProcId); 391 assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size()); 392 } 393 394 private List<RegionInfo> createTable(final TableName tableName) throws Exception { 395 TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 396 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); 397 byte[][] splitRows = new byte[initialRegionCount - 1][]; 398 for (int i = 0; i < splitRows.length; ++i) { 399 splitRows[i] = Bytes.toBytes(String.format("%d", i)); 400 } 401 admin.createTable(desc, splitRows); 402 return assertRegionCount(tableName, initialRegionCount); 403 } 404 405 public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions) 406 throws Exception { 407 UTIL.waitUntilNoRegionsInTransition(); 408 List<RegionInfo> tableRegions = admin.getRegions(tableName); 409 assertEquals(nregions, tableRegions.size()); 410 return tableRegions; 411 } 412 413 private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() { 414 return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(); 415 } 416 417 private void collectAssignmentManagerMetrics() { 418 mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount(); 419 mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount(); 420 421 assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount(); 422 assignFailedCount = assignProcMetrics.getFailedCounter().getCount(); 423 unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount(); 424 unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount(); 425 } 426}