001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.time.Instant; 027import java.util.Arrays; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Optional; 032import java.util.concurrent.Callable; 033import java.util.concurrent.CountDownLatch; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Collectors; 036import org.apache.hadoop.hbase.Coprocessor; 037import org.apache.hadoop.hbase.CoprocessorEnvironment; 038import org.apache.hadoop.hbase.HBaseClassTestRule; 039import org.apache.hadoop.hbase.HBaseTestingUtility; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor; 043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; 044import org.apache.hadoop.hbase.coprocessor.MasterObserver; 045import org.apache.hadoop.hbase.coprocessor.ObserverContext; 046import org.apache.hadoop.hbase.master.HMaster; 047import org.apache.hadoop.hbase.master.RegionState; 048import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 049import org.apache.hadoop.hbase.master.hbck.HbckChore; 050import org.apache.hadoop.hbase.master.hbck.HbckReport; 051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 052import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface; 053import org.apache.hadoop.hbase.procedure2.Procedure; 054import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; 055import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 056import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 057import org.apache.hadoop.hbase.regionserver.HRegionServer; 058import org.apache.hadoop.hbase.testclassification.ClientTests; 059import org.apache.hadoop.hbase.testclassification.LargeTests; 060import org.apache.hadoop.hbase.util.Bytes; 061import org.junit.AfterClass; 062import org.junit.Before; 063import org.junit.BeforeClass; 064import org.junit.ClassRule; 065import org.junit.Rule; 066import org.junit.Test; 067import org.junit.experimental.categories.Category; 068import org.junit.rules.TestName; 069import org.junit.runner.RunWith; 070import org.junit.runners.Parameterized; 071import org.junit.runners.Parameterized.Parameter; 072import org.junit.runners.Parameterized.Parameters; 073import org.slf4j.Logger; 074import org.slf4j.LoggerFactory; 075 076import org.apache.hbase.thirdparty.com.google.common.io.Closeables; 077 078import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 079 080/** 081 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down 082 * afterward. Add any testing of HBaseHbck functionality here. 083 */ 084@RunWith(Parameterized.class) 085@Category({ LargeTests.class, ClientTests.class }) 086public class TestHbck { 087 @ClassRule 088 public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class); 089 090 private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class); 091 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 092 093 @Rule 094 public TestName name = new TestName(); 095 096 @SuppressWarnings("checkstyle:VisibilityModifier") 097 @Parameter 098 public boolean async; 099 100 private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); 101 102 private static ProcedureExecutor<MasterProcedureEnv> procExec; 103 104 private static AsyncConnection ASYNC_CONN; 105 106 @Parameters(name = "{index}: async={0}") 107 public static List<Object[]> params() { 108 return Arrays.asList(new Object[] { false }, new Object[] { true }); 109 } 110 111 private Hbck getHbck() throws Exception { 112 if (async) { 113 return ASYNC_CONN.getHbck().get(); 114 } else { 115 return TEST_UTIL.getHbck(); 116 } 117 } 118 119 @BeforeClass 120 public static void setUpBeforeClass() throws Exception { 121 TEST_UTIL.startMiniCluster(3); 122 TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") }); 123 procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor(); 124 ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get(); 125 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 126 FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 127 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 128 TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load( 129 FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER, 130 TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()); 131 } 132 133 @AfterClass 134 public static void tearDownAfterClass() throws Exception { 135 Closeables.close(ASYNC_CONN, true); 136 TEST_UTIL.shutdownMiniCluster(); 137 } 138 139 @Before 140 public void setUp() throws IOException { 141 TEST_UTIL.ensureSomeRegionServersAvailable(3); 142 } 143 144 public static class SuspendProcedure extends 145 ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface { 146 public SuspendProcedure() { 147 super(); 148 } 149 150 @SuppressWarnings({ "rawtypes", "unchecked" }) 151 @Override 152 protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException { 153 // Always suspend the procedure 154 throw new ProcedureSuspendedException(); 155 } 156 157 @Override 158 public TableName getTableName() { 159 return TABLE_NAME; 160 } 161 162 @Override 163 public TableOperationType getTableOperationType() { 164 return TableOperationType.READ; 165 } 166 } 167 168 @Test 169 public void testBypassProcedure() throws Exception { 170 // SuspendProcedure 171 final SuspendProcedure proc = new SuspendProcedure(); 172 long procId = procExec.submitProcedure(proc); 173 Thread.sleep(500); 174 175 // bypass the procedure 176 List<Long> pids = Arrays.<Long> asList(procId); 177 List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false); 178 assertTrue("Failed to by pass procedure!", results.get(0)); 179 TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); 180 LOG.info("{} finished", proc); 181 } 182 183 @Test 184 public void testSetTableStateInMeta() throws Exception { 185 Hbck hbck = getHbck(); 186 // set table state to DISABLED 187 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED)); 188 // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case 189 // will be DISABLED 190 TableState prevState = 191 hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED)); 192 assertTrue("Incorrect previous state! expected=DISABLED, found=" + prevState.getState(), 193 prevState.isDisabled()); 194 } 195 196 @Test 197 public void testSetRegionStateInMeta() throws Exception { 198 Hbck hbck = getHbck(); 199 Admin admin = TEST_UTIL.getAdmin(); 200 TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME); 201 final List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 202 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 203 final Map<String, RegionState.State> beforeStates = new HashMap<>(); 204 final Map<String, RegionState.State> requestStates = new HashMap<>(); 205 regions.forEach(r -> { 206 RegionState beforeState = am.getRegionStates().getRegionState(r); 207 beforeStates.put(r.getEncodedName(), beforeState.getState()); 208 LOG.debug("Before test: {} ; {}", r, beforeState.getState()); 209 requestStates.put(r.getEncodedName(), RegionState.State.CLOSED); 210 }); 211 final Callable<Void> doTest = () -> { 212 // run the entire test with the ProcedureExecution environment paused. This prevents 213 // background operations from modifying AM internal state between the assertions this test 214 // relies upon. 215 Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates); 216 result.forEach((k, v) -> { 217 RegionState.State beforeState = beforeStates.get(k); 218 assertEquals("response state should match before state; " + k, beforeState, v); 219 }); 220 regions.forEach(r -> { 221 RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName()); 222 RegionState.State expectedState = requestStates.get(r.getEncodedName()); 223 LOG.debug("After test: {}, {}", r, afterState); 224 assertEquals("state in AM should match requested state ; " + r, expectedState, 225 afterState.getState()); 226 }); 227 return null; 228 }; 229 ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true); 230 // restore the table as we found it -- fragile? 231 hbck.setRegionStateInMeta(beforeStates); 232 } 233 234 @Test 235 public void testAssigns() throws Exception { 236 Hbck hbck = getHbck(); 237 final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager(); 238 try (Admin admin = TEST_UTIL.getConnection().getAdmin()) { 239 List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream() 240 .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> { 241 final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName()); 242 LOG.info("RS: {}", rs); 243 }).collect(Collectors.toList()); 244 List<Long> pids = hbck 245 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 246 waitOnPids(pids); 247 // Rerun the unassign. Should fail for all Regions since they already unassigned; failed 248 // unassign will manifest as all pids being -1 (ever since HBASE-24885). 249 pids = hbck 250 .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 251 waitOnPids(pids); 252 for (long pid : pids) { 253 assertEquals(Procedure.NO_PROC_ID, pid); 254 } 255 // If we pass override, then we should be able to unassign EVEN THOUGH Regions already 256 // unassigned.... makes for a mess but operator might want to do this at an extreme when 257 // doing fixup of broke cluster. 258 pids = hbck.unassigns( 259 regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true); 260 waitOnPids(pids); 261 for (long pid : pids) { 262 assertNotEquals(Procedure.NO_PROC_ID, pid); 263 } 264 // Clean-up by bypassing all the unassigns we just made so tests can continue. 265 hbck.bypassProcedure(pids, 10000, true, true); 266 for (RegionInfo ri : regions) { 267 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 268 .getRegionStates().getRegionState(ri.getEncodedName()); 269 LOG.info("RS: {}", rs.toString()); 270 assertTrue(rs.toString(), rs.isClosed()); 271 } 272 pids = 273 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 274 waitOnPids(pids); 275 // Rerun the assign. Should fail for all Regions since they already assigned; failed 276 // assign will manifest as all pids being -1 (ever since HBASE-24885). 277 pids = 278 hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList())); 279 for (long pid : pids) { 280 assertEquals(Procedure.NO_PROC_ID, pid); 281 } 282 for (RegionInfo ri : regions) { 283 RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() 284 .getRegionStates().getRegionState(ri.getEncodedName()); 285 LOG.info("RS: {}", rs.toString()); 286 assertTrue(rs.toString(), rs.isOpened()); 287 } 288 // What happens if crappy region list passed? 289 pids = hbck.assigns( 290 Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList())); 291 for (long pid : pids) { 292 assertEquals(Procedure.NO_PROC_ID, pid); 293 } 294 } 295 } 296 297 @Test 298 public void testScheduleSCP() throws Exception { 299 HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME); 300 try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) { 301 TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true); 302 } 303 ServerName serverName = testRs.getServerName(); 304 Hbck hbck = getHbck(); 305 List<Long> pids = 306 hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName))); 307 assertEquals(1, pids.size()); 308 assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0)); 309 LOG.debug("SCP pid is {}", pids.get(0)); 310 311 List<Long> newPids = 312 hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName))); 313 assertEquals(1, pids.size()); 314 assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0)); 315 waitOnPids(pids); 316 } 317 318 @Test 319 public void testRunHbckChore() throws Exception { 320 HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster(); 321 HbckChore hbckChore = master.getHbckChore(); 322 Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport()) 323 .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH); 324 Hbck hbck = getHbck(); 325 TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore); 326 HbckReport report = hbckChore.getLastReport(); 327 assertNotNull(report); 328 assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp)); 329 } 330 331 public static class FailingSplitAfterMetaUpdatedMasterObserver 332 implements MasterCoprocessor, MasterObserver { 333 @SuppressWarnings("checkstyle:VisibilityModifier") 334 public volatile CountDownLatch latch; 335 336 @Override 337 public void start(CoprocessorEnvironment e) throws IOException { 338 resetLatch(); 339 } 340 341 @Override 342 public Optional<MasterObserver> getMasterObserver() { 343 return Optional.of(this); 344 } 345 346 @Override 347 public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx) 348 throws IOException { 349 LOG.info("I'm here"); 350 latch.countDown(); 351 throw new IOException("this procedure will fail at here forever"); 352 } 353 354 public void resetLatch() { 355 this.latch = new CountDownLatch(1); 356 } 357 } 358 359 public static class FailingMergeAfterMetaUpdatedMasterObserver 360 implements MasterCoprocessor, MasterObserver { 361 @SuppressWarnings("checkstyle:VisibilityModifier") 362 public volatile CountDownLatch latch; 363 364 @Override 365 public void start(CoprocessorEnvironment e) throws IOException { 366 resetLatch(); 367 } 368 369 @Override 370 public Optional<MasterObserver> getMasterObserver() { 371 return Optional.of(this); 372 } 373 374 public void resetLatch() { 375 this.latch = new CountDownLatch(1); 376 } 377 378 @Override 379 public void postMergeRegionsCommitAction( 380 final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge, 381 final RegionInfo mergedRegion) throws IOException { 382 latch.countDown(); 383 throw new IOException("this procedure will fail at here forever"); 384 } 385 } 386 387 private void waitOnPids(List<Long> pids) { 388 TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished)); 389 } 390}