001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import static org.apache.hadoop.hbase.master.HMaster.HBASE_MASTER_RSPROC_DISPATCHER_CLASS; 021 022import java.util.List; 023import java.util.stream.Collectors; 024import java.util.stream.IntStream; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtil; 027import org.apache.hadoop.hbase.ServerName; 028import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.Admin; 031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 032import org.apache.hadoop.hbase.client.Put; 033import org.apache.hadoop.hbase.client.Table; 034import org.apache.hadoop.hbase.client.TableDescriptor; 035import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 036import org.apache.hadoop.hbase.master.HMaster; 037import org.apache.hadoop.hbase.master.hbck.HbckChore; 038import org.apache.hadoop.hbase.master.hbck.HbckReport; 039import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 040import org.apache.hadoop.hbase.regionserver.HRegion; 041import org.apache.hadoop.hbase.regionserver.HRegionServer; 042import org.apache.hadoop.hbase.testclassification.LargeTests; 043import org.apache.hadoop.hbase.testclassification.MiscTests; 044import org.junit.AfterClass; 045import org.junit.Assert; 046import org.junit.Before; 047import org.junit.BeforeClass; 048import org.junit.ClassRule; 049import org.junit.Rule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052import org.junit.rules.TestName; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 057 058/** 059 * Testing custom RSProcedureDispatcher to ensure retry limit can be imposed on certain errors. 060 */ 061@Category({ MiscTests.class, LargeTests.class }) 062public class TestProcDispatcher { 063 064 private static final Logger LOG = LoggerFactory.getLogger(TestProcDispatcher.class); 065 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestProcDispatcher.class); 069 070 @Rule 071 public TestName name = new TestName(); 072 073 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 074 private static ServerName rs0; 075 076 @BeforeClass 077 public static void setUpBeforeClass() throws Exception { 078 TEST_UTIL.getConfiguration().set(HBASE_MASTER_RSPROC_DISPATCHER_CLASS, 079 RSProcDispatcher.class.getName()); 080 TEST_UTIL.startMiniCluster(3); 081 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 082 rs0 = cluster.getRegionServer(0).getServerName(); 083 TEST_UTIL.getAdmin().balancerSwitch(false, true); 084 } 085 086 @AfterClass 087 public static void tearDownAfterClass() throws Exception { 088 TEST_UTIL.shutdownMiniCluster(); 089 } 090 091 @Before 092 public void setUp() throws Exception { 093 final TableName tableName = TableName.valueOf(name.getMethodName()); 094 TableDescriptor tableDesc = TableDescriptorBuilder.newBuilder(tableName) 095 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam1")).build(); 096 int startKey = 0; 097 int endKey = 80000; 098 TEST_UTIL.getAdmin().createTable(tableDesc, Bytes.toBytes(startKey), Bytes.toBytes(endKey), 9); 099 } 100 101 @Test 102 public void testRetryLimitOnConnClosedErrors() throws Exception { 103 HbckChore hbckChore = new HbckChore(TEST_UTIL.getHBaseCluster().getMaster()); 104 final TableName tableName = TableName.valueOf(name.getMethodName()); 105 SingleProcessHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 106 Admin admin = TEST_UTIL.getAdmin(); 107 Table table = TEST_UTIL.getConnection().getTable(tableName); 108 List<Put> puts = IntStream.range(10, 50000).mapToObj(i -> new Put(Bytes.toBytes(i)) 109 .addColumn(Bytes.toBytes("fam1"), Bytes.toBytes("q1"), Bytes.toBytes("val_" + i))) 110 .collect(Collectors.toList()); 111 table.put(puts); 112 admin.flush(tableName); 113 admin.compact(tableName); 114 Thread.sleep(3000); 115 HRegionServer hRegionServer0 = cluster.getRegionServer(0); 116 HRegionServer hRegionServer1 = cluster.getRegionServer(1); 117 HRegionServer hRegionServer2 = cluster.getRegionServer(2); 118 int numRegions0 = hRegionServer0.getNumberOfOnlineRegions(); 119 int numRegions1 = hRegionServer1.getNumberOfOnlineRegions(); 120 int numRegions2 = hRegionServer2.getNumberOfOnlineRegions(); 121 122 hbckChore.choreForTesting(); 123 HbckReport hbckReport = hbckChore.getLastReport(); 124 Assert.assertEquals(0, hbckReport.getInconsistentRegions().size()); 125 Assert.assertEquals(0, hbckReport.getOrphanRegionsOnFS().size()); 126 Assert.assertEquals(0, hbckReport.getOrphanRegionsOnRS().size()); 127 128 HRegion region0 = hRegionServer0.getRegions().get(0); 129 // move all regions from server1 to server0 130 for (HRegion region : hRegionServer1.getRegions()) { 131 TEST_UTIL.getAdmin().move(region.getRegionInfo().getEncodedNameAsBytes(), rs0); 132 } 133 TEST_UTIL.getAdmin().move(region0.getRegionInfo().getEncodedNameAsBytes()); 134 HMaster master = TEST_UTIL.getHBaseCluster().getMaster(); 135 136 // Ensure: 137 // 1. num of regions before and after scheduling SCP remain same 138 // 2. all procedures including SCPs are successfully completed 139 // 3. two servers have SCPs scheduled 140 TEST_UTIL.waitFor(5000, 1000, () -> { 141 LOG.info("numRegions0: {} , numRegions1: {} , numRegions2: {}", numRegions0, numRegions1, 142 numRegions2); 143 LOG.info("Online regions - server0 : {} , server1: {} , server2: {}", 144 cluster.getRegionServer(0).getNumberOfOnlineRegions(), 145 cluster.getRegionServer(1).getNumberOfOnlineRegions(), 146 cluster.getRegionServer(2).getNumberOfOnlineRegions()); 147 LOG.info("Num of successfully completed procedures: {} , num of all procedures: {}", 148 master.getMasterProcedureExecutor().getProcedures().stream() 149 .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState() 150 == ProcedureProtos.ProcedureState.SUCCESS) 151 .count(), 152 master.getMasterProcedureExecutor().getProcedures().size()); 153 LOG.info("Num of SCPs: " + master.getMasterProcedureExecutor().getProcedures().stream() 154 .filter(proc -> proc instanceof ServerCrashProcedure).count()); 155 return (numRegions0 + numRegions1 + numRegions2) 156 == (cluster.getRegionServer(0).getNumberOfOnlineRegions() 157 + cluster.getRegionServer(1).getNumberOfOnlineRegions() 158 + cluster.getRegionServer(2).getNumberOfOnlineRegions()) 159 && master.getMasterProcedureExecutor().getProcedures().stream() 160 .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState() 161 == ProcedureProtos.ProcedureState.SUCCESS) 162 .count() == master.getMasterProcedureExecutor().getProcedures().size() 163 && master.getMasterProcedureExecutor().getProcedures().stream() 164 .filter(proc -> proc instanceof ServerCrashProcedure).count() > 0; 165 }); 166 167 // Ensure we have no inconsistent regions 168 TEST_UTIL.waitFor(5000, 1000, () -> { 169 hbckChore.choreForTesting(); 170 HbckReport report = hbckChore.getLastReport(); 171 return report.getInconsistentRegions().isEmpty() && report.getOrphanRegionsOnFS().isEmpty() 172 && report.getOrphanRegionsOnRS().isEmpty(); 173 }); 174 175 } 176 177}