001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import static org.apache.hadoop.hbase.master.HMaster.HBASE_MASTER_RSPROC_DISPATCHER_CLASS;
021
022import java.util.List;
023import java.util.stream.Collectors;
024import java.util.stream.IntStream;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.ServerName;
028import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.Admin;
031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.master.HMaster;
037import org.apache.hadoop.hbase.master.hbck.HbckChore;
038import org.apache.hadoop.hbase.master.hbck.HbckReport;
039import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
040import org.apache.hadoop.hbase.regionserver.HRegion;
041import org.apache.hadoop.hbase.regionserver.HRegionServer;
042import org.apache.hadoop.hbase.testclassification.LargeTests;
043import org.apache.hadoop.hbase.testclassification.MiscTests;
044import org.junit.AfterClass;
045import org.junit.Assert;
046import org.junit.Before;
047import org.junit.BeforeClass;
048import org.junit.ClassRule;
049import org.junit.Rule;
050import org.junit.Test;
051import org.junit.experimental.categories.Category;
052import org.junit.rules.TestName;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
057
058/**
059 * Testing custom RSProcedureDispatcher to ensure retry limit can be imposed on certain errors.
060 */
061@Category({ MiscTests.class, LargeTests.class })
062public class TestProcDispatcher {
063
064  private static final Logger LOG = LoggerFactory.getLogger(TestProcDispatcher.class);
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestProcDispatcher.class);
069
070  @Rule
071  public TestName name = new TestName();
072
073  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
074  private static ServerName rs0;
075
076  @BeforeClass
077  public static void setUpBeforeClass() throws Exception {
078    TEST_UTIL.getConfiguration().set(HBASE_MASTER_RSPROC_DISPATCHER_CLASS,
079      RSProcDispatcher.class.getName());
080    TEST_UTIL.startMiniCluster(3);
081    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
082    rs0 = cluster.getRegionServer(0).getServerName();
083    TEST_UTIL.getAdmin().balancerSwitch(false, true);
084  }
085
086  @AfterClass
087  public static void tearDownAfterClass() throws Exception {
088    TEST_UTIL.shutdownMiniCluster();
089  }
090
091  @Before
092  public void setUp() throws Exception {
093    final TableName tableName = TableName.valueOf(name.getMethodName());
094    TableDescriptor tableDesc = TableDescriptorBuilder.newBuilder(tableName)
095      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam1")).build();
096    int startKey = 0;
097    int endKey = 80000;
098    TEST_UTIL.getAdmin().createTable(tableDesc, Bytes.toBytes(startKey), Bytes.toBytes(endKey), 9);
099  }
100
101  @Test
102  public void testRetryLimitOnConnClosedErrors() throws Exception {
103    HbckChore hbckChore = new HbckChore(TEST_UTIL.getHBaseCluster().getMaster());
104    final TableName tableName = TableName.valueOf(name.getMethodName());
105    SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
106    Admin admin = TEST_UTIL.getAdmin();
107    Table table = TEST_UTIL.getConnection().getTable(tableName);
108    List<Put> puts = IntStream.range(10, 50000).mapToObj(i -> new Put(Bytes.toBytes(i))
109      .addColumn(Bytes.toBytes("fam1"), Bytes.toBytes("q1"), Bytes.toBytes("val_" + i)))
110      .collect(Collectors.toList());
111    table.put(puts);
112    admin.flush(tableName);
113    admin.compact(tableName);
114    Thread.sleep(3000);
115    HRegionServer hRegionServer0 = cluster.getRegionServer(0);
116    HRegionServer hRegionServer1 = cluster.getRegionServer(1);
117    HRegionServer hRegionServer2 = cluster.getRegionServer(2);
118    int numRegions0 = hRegionServer0.getNumberOfOnlineRegions();
119    int numRegions1 = hRegionServer1.getNumberOfOnlineRegions();
120    int numRegions2 = hRegionServer2.getNumberOfOnlineRegions();
121
122    hbckChore.choreForTesting();
123    HbckReport hbckReport = hbckChore.getLastReport();
124    Assert.assertEquals(0, hbckReport.getInconsistentRegions().size());
125    Assert.assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
126    Assert.assertEquals(0, hbckReport.getOrphanRegionsOnRS().size());
127
128    HRegion region0 = hRegionServer0.getRegions().get(0);
129    // move all regions from server1 to server0
130    for (HRegion region : hRegionServer1.getRegions()) {
131      TEST_UTIL.getAdmin().move(region.getRegionInfo().getEncodedNameAsBytes(), rs0);
132    }
133    TEST_UTIL.getAdmin().move(region0.getRegionInfo().getEncodedNameAsBytes());
134    HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
135
136    // Ensure:
137    // 1. num of regions before and after scheduling SCP remain same
138    // 2. all procedures including SCPs are successfully completed
139    // 3. two servers have SCPs scheduled
140    TEST_UTIL.waitFor(5000, 1000, () -> {
141      LOG.info("numRegions0: {} , numRegions1: {} , numRegions2: {}", numRegions0, numRegions1,
142        numRegions2);
143      LOG.info("Online regions - server0 : {} , server1: {} , server2: {}",
144        cluster.getRegionServer(0).getNumberOfOnlineRegions(),
145        cluster.getRegionServer(1).getNumberOfOnlineRegions(),
146        cluster.getRegionServer(2).getNumberOfOnlineRegions());
147      LOG.info("Num of successfully completed procedures: {} , num of all procedures: {}",
148        master.getMasterProcedureExecutor().getProcedures().stream()
149          .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState()
150              == ProcedureProtos.ProcedureState.SUCCESS)
151          .count(),
152        master.getMasterProcedureExecutor().getProcedures().size());
153      LOG.info("Num of SCPs: " + master.getMasterProcedureExecutor().getProcedures().stream()
154        .filter(proc -> proc instanceof ServerCrashProcedure).count());
155      return (numRegions0 + numRegions1 + numRegions2)
156          == (cluster.getRegionServer(0).getNumberOfOnlineRegions()
157            + cluster.getRegionServer(1).getNumberOfOnlineRegions()
158            + cluster.getRegionServer(2).getNumberOfOnlineRegions())
159        && master.getMasterProcedureExecutor().getProcedures().stream()
160          .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState()
161              == ProcedureProtos.ProcedureState.SUCCESS)
162          .count() == master.getMasterProcedureExecutor().getProcedures().size()
163        && master.getMasterProcedureExecutor().getProcedures().stream()
164          .filter(proc -> proc instanceof ServerCrashProcedure).count() > 0;
165    });
166
167    // Ensure we have no inconsistent regions
168    TEST_UTIL.waitFor(5000, 1000, () -> {
169      hbckChore.choreForTesting();
170      HbckReport report = hbckChore.getLastReport();
171      return report.getInconsistentRegions().isEmpty() && report.getOrphanRegionsOnFS().isEmpty()
172        && report.getOrphanRegionsOnRS().isEmpty();
173    });
174
175  }
176
177}