001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotEquals;
022import static org.junit.Assert.assertNotNull;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.time.Instant;
027import java.util.Arrays;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Optional;
032import java.util.concurrent.Callable;
033import java.util.concurrent.CountDownLatch;
034import java.util.concurrent.TimeUnit;
035import java.util.stream.Collectors;
036import org.apache.hadoop.hbase.Coprocessor;
037import org.apache.hadoop.hbase.CoprocessorEnvironment;
038import org.apache.hadoop.hbase.HBaseClassTestRule;
039import org.apache.hadoop.hbase.HBaseTestingUtility;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
043import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
044import org.apache.hadoop.hbase.coprocessor.MasterObserver;
045import org.apache.hadoop.hbase.coprocessor.ObserverContext;
046import org.apache.hadoop.hbase.master.HMaster;
047import org.apache.hadoop.hbase.master.RegionState;
048import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
049import org.apache.hadoop.hbase.master.hbck.HbckChore;
050import org.apache.hadoop.hbase.master.hbck.HbckReport;
051import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
052import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface;
053import org.apache.hadoop.hbase.procedure2.Procedure;
054import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
055import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
056import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
057import org.apache.hadoop.hbase.regionserver.HRegionServer;
058import org.apache.hadoop.hbase.testclassification.ClientTests;
059import org.apache.hadoop.hbase.testclassification.LargeTests;
060import org.apache.hadoop.hbase.util.Bytes;
061import org.junit.AfterClass;
062import org.junit.Before;
063import org.junit.BeforeClass;
064import org.junit.ClassRule;
065import org.junit.Rule;
066import org.junit.Test;
067import org.junit.experimental.categories.Category;
068import org.junit.rules.TestName;
069import org.junit.runner.RunWith;
070import org.junit.runners.Parameterized;
071import org.junit.runners.Parameterized.Parameter;
072import org.junit.runners.Parameterized.Parameters;
073import org.slf4j.Logger;
074import org.slf4j.LoggerFactory;
075
076import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
077
078import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
079
080/**
081 * Class to test HBaseHbck. Spins up the minicluster once at test start and then takes it down
082 * afterward. Add any testing of HBaseHbck functionality here.
083 */
084@RunWith(Parameterized.class)
085@Category({ LargeTests.class, ClientTests.class })
086public class TestHbck {
087  @ClassRule
088  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule.forClass(TestHbck.class);
089
090  private static final Logger LOG = LoggerFactory.getLogger(TestHbck.class);
091  private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
092
093  @Rule
094  public TestName name = new TestName();
095
096  @SuppressWarnings("checkstyle:VisibilityModifier")
097  @Parameter
098  public boolean async;
099
100  private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName());
101
102  private static ProcedureExecutor<MasterProcedureEnv> procExec;
103
104  private static AsyncConnection ASYNC_CONN;
105
106  @Parameters(name = "{index}: async={0}")
107  public static List<Object[]> params() {
108    return Arrays.asList(new Object[] { false }, new Object[] { true });
109  }
110
111  private Hbck getHbck() throws Exception {
112    if (async) {
113      return ASYNC_CONN.getHbck().get();
114    } else {
115      return TEST_UTIL.getHbck();
116    }
117  }
118
119  @BeforeClass
120  public static void setUpBeforeClass() throws Exception {
121    TEST_UTIL.startMiniCluster(3);
122    TEST_UTIL.createMultiRegionTable(TABLE_NAME, 3, new byte[][] { Bytes.toBytes("family1") });
123    procExec = TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
124    ASYNC_CONN = ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get();
125    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
126      FailingMergeAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
127      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
128    TEST_UTIL.getHBaseCluster().getMaster().getMasterCoprocessorHost().load(
129      FailingSplitAfterMetaUpdatedMasterObserver.class, Coprocessor.PRIORITY_USER,
130      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration());
131  }
132
133  @AfterClass
134  public static void tearDownAfterClass() throws Exception {
135    Closeables.close(ASYNC_CONN, true);
136    TEST_UTIL.shutdownMiniCluster();
137  }
138
139  @Before
140  public void setUp() throws IOException {
141    TEST_UTIL.ensureSomeRegionServersAvailable(3);
142  }
143
144  public static class SuspendProcedure extends
145    ProcedureTestingUtility.NoopProcedure<MasterProcedureEnv> implements TableProcedureInterface {
146    public SuspendProcedure() {
147      super();
148    }
149
150    @SuppressWarnings({ "rawtypes", "unchecked" })
151    @Override
152    protected Procedure[] execute(final MasterProcedureEnv env) throws ProcedureSuspendedException {
153      // Always suspend the procedure
154      throw new ProcedureSuspendedException();
155    }
156
157    @Override
158    public TableName getTableName() {
159      return TABLE_NAME;
160    }
161
162    @Override
163    public TableOperationType getTableOperationType() {
164      return TableOperationType.READ;
165    }
166  }
167
168  @Test
169  public void testBypassProcedure() throws Exception {
170    // SuspendProcedure
171    final SuspendProcedure proc = new SuspendProcedure();
172    long procId = procExec.submitProcedure(proc);
173    Thread.sleep(500);
174
175    // bypass the procedure
176    List<Long> pids = Arrays.<Long> asList(procId);
177    List<Boolean> results = getHbck().bypassProcedure(pids, 30000, false, false);
178    assertTrue("Failed to by pass procedure!", results.get(0));
179    TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass());
180    LOG.info("{} finished", proc);
181  }
182
183  @Test
184  public void testSetTableStateInMeta() throws Exception {
185    Hbck hbck = getHbck();
186    // set table state to DISABLED
187    hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.DISABLED));
188    // Method {@link Hbck#setTableStateInMeta()} returns previous state, which in this case
189    // will be DISABLED
190    TableState prevState =
191      hbck.setTableStateInMeta(new TableState(TABLE_NAME, TableState.State.ENABLED));
192    assertTrue("Incorrect previous state! expected=DISABLED, found=" + prevState.getState(),
193      prevState.isDisabled());
194  }
195
196  @Test
197  public void testSetRegionStateInMeta() throws Exception {
198    Hbck hbck = getHbck();
199    Admin admin = TEST_UTIL.getAdmin();
200    TEST_UTIL.waitUntilAllRegionsAssigned(TABLE_NAME);
201    final List<RegionInfo> regions = admin.getRegions(TABLE_NAME);
202    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
203    final Map<String, RegionState.State> beforeStates = new HashMap<>();
204    final Map<String, RegionState.State> requestStates = new HashMap<>();
205    regions.forEach(r -> {
206      RegionState beforeState = am.getRegionStates().getRegionState(r);
207      beforeStates.put(r.getEncodedName(), beforeState.getState());
208      LOG.debug("Before test: {} ; {}", r, beforeState.getState());
209      requestStates.put(r.getEncodedName(), RegionState.State.CLOSED);
210    });
211    final Callable<Void> doTest = () -> {
212      // run the entire test with the ProcedureExecution environment paused. This prevents
213      // background operations from modifying AM internal state between the assertions this test
214      // relies upon.
215      Map<String, RegionState.State> result = hbck.setRegionStateInMeta(requestStates);
216      result.forEach((k, v) -> {
217        RegionState.State beforeState = beforeStates.get(k);
218        assertEquals("response state should match before state; " + k, beforeState, v);
219      });
220      regions.forEach(r -> {
221        RegionState afterState = am.getRegionStates().getRegionState(r.getEncodedName());
222        RegionState.State expectedState = requestStates.get(r.getEncodedName());
223        LOG.debug("After test: {}, {}", r, afterState);
224        assertEquals("state in AM should match requested state ; " + r, expectedState,
225          afterState.getState());
226      });
227      return null;
228    };
229    ProcedureTestingUtility.restart(procExec, true, true, null, doTest, null, false, true);
230    // restore the table as we found it -- fragile?
231    hbck.setRegionStateInMeta(beforeStates);
232  }
233
234  @Test
235  public void testAssigns() throws Exception {
236    Hbck hbck = getHbck();
237    final AssignmentManager am = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager();
238    try (Admin admin = TEST_UTIL.getConnection().getAdmin()) {
239      List<RegionInfo> regions = admin.getRegions(TABLE_NAME).stream()
240        .filter(ri -> ri.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID).peek(ri -> {
241          final RegionState rs = am.getRegionStates().getRegionState(ri.getEncodedName());
242          LOG.info("RS: {}", rs);
243        }).collect(Collectors.toList());
244      List<Long> pids = hbck
245        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
246      waitOnPids(pids);
247      // Rerun the unassign. Should fail for all Regions since they already unassigned; failed
248      // unassign will manifest as all pids being -1 (ever since HBASE-24885).
249      pids = hbck
250        .unassigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
251      waitOnPids(pids);
252      for (long pid : pids) {
253        assertEquals(Procedure.NO_PROC_ID, pid);
254      }
255      // If we pass override, then we should be able to unassign EVEN THOUGH Regions already
256      // unassigned.... makes for a mess but operator might want to do this at an extreme when
257      // doing fixup of broke cluster.
258      pids = hbck.unassigns(
259        regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()), true);
260      waitOnPids(pids);
261      for (long pid : pids) {
262        assertNotEquals(Procedure.NO_PROC_ID, pid);
263      }
264      // Clean-up by bypassing all the unassigns we just made so tests can continue.
265      hbck.bypassProcedure(pids, 10000, true, true);
266      for (RegionInfo ri : regions) {
267        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
268          .getRegionStates().getRegionState(ri.getEncodedName());
269        LOG.info("RS: {}", rs.toString());
270        assertTrue(rs.toString(), rs.isClosed());
271      }
272      pids =
273        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
274      waitOnPids(pids);
275      // Rerun the assign. Should fail for all Regions since they already assigned; failed
276      // assign will manifest as all pids being -1 (ever since HBASE-24885).
277      pids =
278        hbck.assigns(regions.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()));
279      for (long pid : pids) {
280        assertEquals(Procedure.NO_PROC_ID, pid);
281      }
282      for (RegionInfo ri : regions) {
283        RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
284          .getRegionStates().getRegionState(ri.getEncodedName());
285        LOG.info("RS: {}", rs.toString());
286        assertTrue(rs.toString(), rs.isOpened());
287      }
288      // What happens if crappy region list passed?
289      pids = hbck.assigns(
290        Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList()));
291      for (long pid : pids) {
292        assertEquals(Procedure.NO_PROC_ID, pid);
293      }
294    }
295  }
296
297  @Test
298  public void testScheduleSCP() throws Exception {
299    HRegionServer testRs = TEST_UTIL.getRSForFirstRegionInTable(TABLE_NAME);
300    try (final Table t = TEST_UTIL.getConnection().getTable(TABLE_NAME)) {
301      TEST_UTIL.loadTable(t, Bytes.toBytes("family1"), true);
302    }
303    ServerName serverName = testRs.getServerName();
304    Hbck hbck = getHbck();
305    List<Long> pids =
306      hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
307    assertEquals(1, pids.size());
308    assertNotEquals((Long) Procedure.NO_PROC_ID, pids.get(0));
309    LOG.debug("SCP pid is {}", pids.get(0));
310
311    List<Long> newPids =
312      hbck.scheduleServerCrashProcedure(Arrays.asList(ProtobufUtil.toServerName(serverName)));
313    assertEquals(1, pids.size());
314    assertEquals((Long) Procedure.NO_PROC_ID, newPids.get(0));
315    waitOnPids(pids);
316  }
317
318  @Test
319  public void testRunHbckChore() throws Exception {
320    HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
321    HbckChore hbckChore = master.getHbckChore();
322    Instant endTimestamp = Optional.ofNullable(hbckChore.getLastReport())
323      .map(HbckReport::getCheckingEndTimestamp).orElse(Instant.EPOCH);
324    Hbck hbck = getHbck();
325    TEST_UTIL.waitFor(TimeUnit.MINUTES.toMillis(5), hbck::runHbckChore);
326    HbckReport report = hbckChore.getLastReport();
327    assertNotNull(report);
328    assertTrue(report.getCheckingEndTimestamp().isAfter(endTimestamp));
329  }
330
331  public static class FailingSplitAfterMetaUpdatedMasterObserver
332    implements MasterCoprocessor, MasterObserver {
333    @SuppressWarnings("checkstyle:VisibilityModifier")
334    public volatile CountDownLatch latch;
335
336    @Override
337    public void start(CoprocessorEnvironment e) throws IOException {
338      resetLatch();
339    }
340
341    @Override
342    public Optional<MasterObserver> getMasterObserver() {
343      return Optional.of(this);
344    }
345
346    @Override
347    public void preSplitRegionAfterMETAAction(ObserverContext<MasterCoprocessorEnvironment> ctx)
348      throws IOException {
349      LOG.info("I'm here");
350      latch.countDown();
351      throw new IOException("this procedure will fail at here forever");
352    }
353
354    public void resetLatch() {
355      this.latch = new CountDownLatch(1);
356    }
357  }
358
359  public static class FailingMergeAfterMetaUpdatedMasterObserver
360    implements MasterCoprocessor, MasterObserver {
361    @SuppressWarnings("checkstyle:VisibilityModifier")
362    public volatile CountDownLatch latch;
363
364    @Override
365    public void start(CoprocessorEnvironment e) throws IOException {
366      resetLatch();
367    }
368
369    @Override
370    public Optional<MasterObserver> getMasterObserver() {
371      return Optional.of(this);
372    }
373
374    public void resetLatch() {
375      this.latch = new CountDownLatch(1);
376    }
377
378    @Override
379    public void postMergeRegionsCommitAction(
380      final ObserverContext<MasterCoprocessorEnvironment> ctx, final RegionInfo[] regionsToMerge,
381      final RegionInfo mergedRegion) throws IOException {
382      latch.countDown();
383      throw new IOException("this procedure will fail at here forever");
384    }
385  }
386
387  private void waitOnPids(List<Long> pids) {
388    TEST_UTIL.waitFor(60000, () -> pids.stream().allMatch(procExec::isFinished));
389  }
390}