001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotNull; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.Collections; 026import java.util.HashSet; 027import java.util.List; 028import java.util.Map; 029import org.apache.hadoop.hbase.CatalogFamilyFormat; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.MetaTableAccessor; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.Put; 039import org.apache.hadoop.hbase.client.RegionInfo; 040import org.apache.hadoop.hbase.client.RegionInfoBuilder; 041import org.apache.hadoop.hbase.client.Result; 042import org.apache.hadoop.hbase.client.Table; 043import org.apache.hadoop.hbase.master.HMaster; 044import org.apache.hadoop.hbase.master.MasterServices; 045import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; 047import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; 048import org.apache.hadoop.hbase.master.assignment.RegionStateStore; 049import org.apache.hadoop.hbase.master.assignment.RegionStates; 050import org.apache.hadoop.hbase.master.hbck.HbckChore; 051import org.apache.hadoop.hbase.master.hbck.HbckReport; 052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 054import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 055import org.apache.hadoop.hbase.testclassification.LargeTests; 056import org.apache.hadoop.hbase.testclassification.MasterTests; 057import org.apache.hadoop.hbase.util.Bytes; 058import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 059import org.apache.hadoop.hbase.util.Pair; 060import org.apache.hadoop.hbase.util.Threads; 061import org.junit.AfterClass; 062import org.junit.BeforeClass; 063import org.junit.ClassRule; 064import org.junit.Rule; 065import org.junit.Test; 066import org.junit.experimental.categories.Category; 067import org.junit.rules.TestName; 068 069@Category({ MasterTests.class, LargeTests.class }) 070public class TestMetaFixer { 071 @ClassRule 072 public static final HBaseClassTestRule CLASS_RULE = 073 HBaseClassTestRule.forClass(TestMetaFixer.class); 074 @Rule 075 public TestName name = new TestName(); 076 077 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 078 079 @BeforeClass 080 public static void setupBeforeClass() throws Exception { 081 TEST_UTIL.startMiniCluster(); 082 } 083 084 @AfterClass 085 public static void tearDownAfterClass() throws Exception { 086 TEST_UTIL.shutdownMiniCluster(); 087 } 088 089 private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException { 090 services.getAssignmentManager().getRegionStateStore().deleteRegion(ri); 091 // Delete it from Master context too else it sticks around. 092 services.getAssignmentManager().getRegionStates().deleteRegion(ri); 093 } 094 095 private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount) 096 throws Exception { 097 TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY }); 098 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 099 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 100 int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size(); 101 services.getCatalogJanitor().scan(); 102 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 103 assertTrue(report.isEmpty()); 104 int originalCount = ris.size(); 105 // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount 106 // regions. 107 for (int i = 0; i < replicaCount; i++) { 108 deleteRegion(services, ris.get(3 * replicaCount + i)); 109 deleteRegion(services, ris.get(i)); 110 deleteRegion(services, ris.get(ris.size() - 1 - i)); 111 } 112 assertEquals(initialSize - 3 * replicaCount, 113 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 114 services.getCatalogJanitor().scan(); 115 report = services.getCatalogJanitor().getLastReport(); 116 assertEquals(report.toString(), 3, report.getHoles().size()); 117 MetaFixer fixer = new MetaFixer(services); 118 fixer.fixHoles(report); 119 services.getCatalogJanitor().scan(); 120 report = services.getCatalogJanitor().getLastReport(); 121 assertTrue(report.toString(), report.isEmpty()); 122 assertEquals(initialSize, 123 services.getAssignmentManager().getRegionStates().getRegionStates().size()); 124 125 // wait for RITs to settle -- those are the fixed regions being assigned -- or until the 126 // watchdog TestRule terminates the test. 127 HBaseTestingUtil.await(50, 128 () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0); 129 130 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 131 assertEquals(originalCount, ris.size()); 132 } 133 134 @Test 135 public void testPlugsHoles() throws Exception { 136 TableName tn = TableName.valueOf(this.name.getMethodName()); 137 testPlugsHolesWithReadReplicaInternal(tn, 1); 138 } 139 140 @Test 141 public void testPlugsHolesWithReadReplica() throws Exception { 142 TableName tn = TableName.valueOf(this.name.getMethodName()); 143 testPlugsHolesWithReadReplicaInternal(tn, 3); 144 } 145 146 /** 147 * Just make sure running fixMeta does right thing for the case of a single-region Table where the 148 * region gets dropped. There is nothing much we can do. We can't restore what we don't know about 149 * (at least from a read of hbase:meta). 150 */ 151 @Test 152 public void testOneRegionTable() throws IOException { 153 TableName tn = TableName.valueOf(this.name.getMethodName()); 154 TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY); 155 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 156 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 157 services.getCatalogJanitor().scan(); 158 deleteRegion(services, ris.get(0)); 159 services.getCatalogJanitor().scan(); 160 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 161 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 162 assertTrue(ris.isEmpty()); 163 MetaFixer fixer = new MetaFixer(services); 164 fixer.fixHoles(report); 165 report = services.getCatalogJanitor().getLastReport(); 166 assertTrue(report.isEmpty()); 167 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 168 assertEquals(0, ris.size()); 169 } 170 171 private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) 172 throws IOException { 173 RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()) 174 .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build(); 175 MetaTableAccessor.putsToMetaTable(services.getConnection(), 176 Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion, 177 EnvironmentEdgeManager.currentTime()))); 178 // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. 179 long assign = services.getAssignmentManager().assign(overlapRegion); 180 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign); 181 return overlapRegion; 182 } 183 184 private void testOverlapCommon(final TableName tn) throws Exception { 185 Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 186 TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); 187 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 188 assertTrue(ris.size() > 5); 189 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 190 services.getCatalogJanitor().scan(); 191 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 192 assertTrue(report.isEmpty()); 193 // Make a simple overlap spanning second and third region. 194 makeOverlap(services, ris.get(1), ris.get(3)); 195 makeOverlap(services, ris.get(2), ris.get(3)); 196 makeOverlap(services, ris.get(2), ris.get(4)); 197 } 198 199 @Test 200 public void testOverlap() throws Exception { 201 TableName tn = TableName.valueOf(this.name.getMethodName()); 202 testOverlapCommon(tn); 203 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 204 HbckChore hbckChore = services.getHbckChore(); 205 206 CatalogJanitor cj = services.getCatalogJanitor(); 207 cj.scan(); 208 CatalogJanitorReport report = cj.getLastReport(); 209 assertEquals(6, report.getOverlaps().size()); 210 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 211 MetaFixer fixer = new MetaFixer(services); 212 fixer.fixOverlaps(report); 213 214 HBaseTestingUtil.await(10, () -> { 215 try { 216 if (cj.scan() > 0) { 217 // It submits GC once, then it will immediately kick off another GC to test if 218 // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create 219 // a hole. 220 Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions; 221 for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) { 222 List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells()); 223 if (parents != null) { 224 ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor(); 225 pe.submitProcedure( 226 new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents)); 227 } 228 } 229 return true; 230 } 231 return false; 232 } catch (Exception e) { 233 throw new RuntimeException(e); 234 } 235 }); 236 237 // Wait until all GCs settled down 238 HBaseTestingUtil.await(10, () -> { 239 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 240 }); 241 242 // No orphan regions on FS 243 hbckChore.choreForTesting(); 244 HbckReport hbckReport = hbckChore.getLastReport(); 245 assertNotNull(hbckReport); 246 assertEquals(0, hbckReport.getOrphanRegionsOnFS().size()); 247 248 // No holes reported. 249 cj.scan(); 250 final CatalogJanitorReport postReport = cj.getLastReport(); 251 assertTrue(postReport.isEmpty()); 252 } 253 254 @Test 255 public void testMultipleTableOverlaps() throws Exception { 256 TableName t1 = TableName.valueOf("t1"); 257 TableName t2 = TableName.valueOf("t2"); 258 TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY }); 259 TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY }); 260 TEST_UTIL.waitTableAvailable(t2); 261 262 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 263 services.getCatalogJanitor().scan(); 264 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 265 assertTrue(report.isEmpty()); 266 267 // Make a simple overlap for t1 268 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1); 269 makeOverlap(services, ris.get(1), ris.get(2)); 270 // Make a simple overlap for t2 271 ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2); 272 makeOverlap(services, ris.get(1), ris.get(2)); 273 274 services.getCatalogJanitor().scan(); 275 report = services.getCatalogJanitor().getLastReport(); 276 assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size()); 277 278 MetaFixer fixer = new MetaFixer(services); 279 List<Long> longs = fixer.fixOverlaps(report); 280 long[] procIds = longs.stream().mapToLong(l -> l).toArray(); 281 ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds); 282 283 // After fix, verify no overlaps are left. 284 services.getCatalogJanitor().scan(); 285 report = services.getCatalogJanitor().getLastReport(); 286 assertTrue("After fix there should not have been any overlaps.", report.isEmpty()); 287 } 288 289 @Test 290 public void testOverlapWithSmallMergeCount() throws Exception { 291 TableName tn = TableName.valueOf(this.name.getMethodName()); 292 try { 293 testOverlapCommon(tn); 294 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 295 CatalogJanitor cj = services.getCatalogJanitor(); 296 cj.scan(); 297 CatalogJanitorReport report = cj.getLastReport(); 298 assertEquals(6, report.getOverlaps().size()); 299 assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size()); 300 301 // The max merge count is set to 5 so overlap regions are divided into 302 // two merge requests. 303 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 304 .setInt("hbase.master.metafixer.max.merge.count", 5); 305 306 // Get overlap regions 307 HashSet<String> overlapRegions = new HashSet<>(); 308 for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) { 309 overlapRegions.add(pair.getFirst().getRegionNameAsString()); 310 overlapRegions.add(pair.getSecond().getRegionNameAsString()); 311 } 312 313 MetaFixer fixer = new MetaFixer(services); 314 fixer.fixOverlaps(report); 315 AssignmentManager am = services.getAssignmentManager(); 316 317 HBaseTestingUtil.await(200, () -> { 318 try { 319 cj.scan(); 320 final CatalogJanitorReport postReport = cj.getLastReport(); 321 RegionStates regionStates = am.getRegionStates(); 322 RegionStateStore regionStateStore = am.getRegionStateStore(); 323 // Make sure that two merged regions are opened and GCs are done. 324 if (postReport.getOverlaps().size() == 1) { 325 Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0); 326 if ( 327 (!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) 328 && regionStates.getRegionState(pair.getFirst()).isOpened()) 329 && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) 330 && regionStates.getRegionState(pair.getSecond()).isOpened()) 331 ) { 332 // Make sure GC is done. 333 List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst()); 334 List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond()); 335 336 return (firstParents == null || firstParents.isEmpty()) 337 && (secondParents == null || secondParents.isEmpty()); 338 } 339 } 340 return false; 341 } catch (Exception e) { 342 throw new RuntimeException(e); 343 } 344 }); 345 346 // Second run of fixOverlap should fix all. 347 report = cj.getLastReport(); 348 fixer.fixOverlaps(report); 349 350 HBaseTestingUtil.await(20, () -> { 351 try { 352 // Make sure it GC only once. 353 return (cj.scan() > 0); 354 } catch (Exception e) { 355 throw new RuntimeException(e); 356 } 357 }); 358 359 // No holes reported. 360 cj.scan(); 361 final CatalogJanitorReport postReport = cj.getLastReport(); 362 assertTrue(postReport.isEmpty()); 363 364 } finally { 365 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration() 366 .unset("hbase.master.metafixer.max.merge.count"); 367 368 TEST_UTIL.deleteTable(tn); 369 } 370 } 371 372 /** 373 * This test covers the case that one of merged parent regions is a merged child region that has 374 * not been GCed but there is no reference files anymore. In this case, it will kick off a GC 375 * procedure, but no merge will happen. 376 */ 377 @Test 378 public void testMergeWithMergedChildRegion() throws Exception { 379 TableName tn = TableName.valueOf(this.name.getMethodName()); 380 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 381 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 382 assertTrue(ris.size() > 5); 383 HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); 384 CatalogJanitor cj = services.getCatalogJanitor(); 385 cj.scan(); 386 CatalogJanitorReport report = cj.getLastReport(); 387 assertTrue(report.isEmpty()); 388 RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); 389 390 cj.scan(); 391 report = cj.getLastReport(); 392 assertEquals(2, report.getOverlaps().size()); 393 394 // Mark it as a merged child region. 395 RegionInfo fakedParentRegion = 396 RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build(); 397 398 Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); 399 Put putOfMerged = 400 MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP); 401 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); 402 putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 403 .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY) 404 .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp()) 405 .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build()); 406 407 meta.put(putOfMerged); 408 409 MetaFixer fixer = new MetaFixer(services); 410 fixer.fixOverlaps(report); 411 412 // Wait until all procedures settled down 413 HBaseTestingUtil.await(200, () -> { 414 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 415 }); 416 417 // No merge is done, overlap is still there. 418 cj.scan(); 419 report = cj.getLastReport(); 420 assertEquals(2, report.getOverlaps().size()); 421 422 fixer.fixOverlaps(report); 423 424 // Wait until all procedures settled down 425 HBaseTestingUtil.await(200, () -> { 426 return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); 427 }); 428 429 // Merge is done and no more overlaps 430 cj.scan(); 431 report = cj.getLastReport(); 432 assertEquals(0, report.getOverlaps().size()); 433 } 434 435 /** 436 * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we 437 * can fix this condition. HBASE-24247 438 */ 439 @Test 440 public void testOverlapWithMergeOfNonContiguous() throws Exception { 441 TableName tn = TableName.valueOf(this.name.getMethodName()); 442 TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); 443 List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); 444 assertTrue(ris.size() > 5); 445 MasterServices services = TEST_UTIL.getHBaseCluster().getMaster(); 446 services.getCatalogJanitor().scan(); 447 CatalogJanitorReport report = services.getCatalogJanitor().getLastReport(); 448 assertTrue(report.isEmpty()); 449 // Make a simple overlap spanning second and third region. 450 makeOverlap(services, ris.get(1), ris.get(5)); 451 // Now Delete a region under the overlap to manufacture non-contiguous sub regions. 452 RegionInfo deletedRegion = ris.get(3); 453 long pid = services.getAssignmentManager().unassign(deletedRegion); 454 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 455 Threads.sleep(100); 456 } 457 GCRegionProcedure procedure = 458 new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3)); 459 pid = services.getMasterProcedureExecutor().submitProcedure(procedure); 460 while (!services.getMasterProcedureExecutor().isFinished(pid)) { 461 Threads.sleep(100); 462 } 463 services.getCatalogJanitor().scan(); 464 report = services.getCatalogJanitor().getLastReport(); 465 assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); 466 MetaFixer fixer = new MetaFixer(services); 467 fixer.fixOverlaps(report); 468 HBaseTestingUtil.await(10, () -> { 469 try { 470 services.getCatalogJanitor().scan(); 471 final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport(); 472 return postReport.isEmpty(); 473 } catch (Exception e) { 474 throw new RuntimeException(e); 475 } 476 }); 477 } 478}