001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.junit.Assert.assertNotNull; 021import static org.junit.Assert.assertNull; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.concurrent.CountDownLatch; 027import java.util.concurrent.Future; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hbase.HBaseClassTestRule; 030import org.apache.hadoop.hbase.HBaseTestingUtil; 031import org.apache.hadoop.hbase.PleaseHoldException; 032import org.apache.hadoop.hbase.ServerName; 033import org.apache.hadoop.hbase.StartTestingClusterOption; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.client.RegionInfo; 036import org.apache.hadoop.hbase.master.HMaster; 037import org.apache.hadoop.hbase.master.MasterServices; 038import org.apache.hadoop.hbase.master.RegionPlan; 039import org.apache.hadoop.hbase.master.RegionServerList; 040import org.apache.hadoop.hbase.master.ServerManager; 041import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 042import org.apache.hadoop.hbase.master.region.MasterRegion; 043import org.apache.hadoop.hbase.regionserver.HRegionServer; 044import org.apache.hadoop.hbase.testclassification.MasterTests; 045import org.apache.hadoop.hbase.testclassification.MediumTests; 046import org.apache.hadoop.hbase.util.Bytes; 047import org.apache.hadoop.hbase.util.IdLock; 048import org.junit.AfterClass; 049import org.junit.BeforeClass; 050import org.junit.ClassRule; 051import org.junit.Test; 052import org.junit.experimental.categories.Category; 053 054import org.apache.hbase.thirdparty.com.google.common.collect.Iterables; 055 056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 057import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; 058import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; 059 060/** 061 * Testcase for HBASE-22365. 062 */ 063@Category({ MasterTests.class, MediumTests.class }) 064public class TestSCPGetRegionsRace { 065 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestSCPGetRegionsRace.class); 069 070 private static final List<ServerName> EXCLUDE_SERVERS = new ArrayList<>(); 071 072 private static final class ServerManagerForTest extends ServerManager { 073 074 public ServerManagerForTest(MasterServices master, RegionServerList storage) { 075 super(master, storage); 076 } 077 078 @Override 079 public List<ServerName> createDestinationServersList() { 080 return super.createDestinationServersList(EXCLUDE_SERVERS); 081 } 082 } 083 084 private static CountDownLatch ARRIVE_REPORT; 085 086 private static CountDownLatch RESUME_REPORT; 087 088 private static CountDownLatch ARRIVE_GET; 089 090 private static CountDownLatch RESUME_GET; 091 092 private static final class AssignmentManagerForTest extends AssignmentManager { 093 094 public AssignmentManagerForTest(MasterServices master, MasterRegion masterRegion) { 095 super(master, masterRegion); 096 } 097 098 @Override 099 public ReportRegionStateTransitionResponse reportRegionStateTransition( 100 ReportRegionStateTransitionRequest req) throws PleaseHoldException { 101 if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) { 102 if (ARRIVE_REPORT != null) { 103 ARRIVE_REPORT.countDown(); 104 try { 105 RESUME_REPORT.await(); 106 RESUME_REPORT = null; 107 } catch (InterruptedException e) { 108 throw new RuntimeException(e); 109 } 110 } 111 } 112 return super.reportRegionStateTransition(req); 113 } 114 115 @Override 116 public List<RegionInfo> getRegionsOnServer(ServerName serverName) { 117 List<RegionInfo> regions = super.getRegionsOnServer(serverName); 118 if (ARRIVE_GET != null) { 119 ARRIVE_GET.countDown(); 120 try { 121 RESUME_GET.await(); 122 RESUME_GET = null; 123 } catch (InterruptedException e) { 124 throw new RuntimeException(e); 125 } 126 } 127 return regions; 128 } 129 130 } 131 132 public static final class HMasterForTest extends HMaster { 133 134 public HMasterForTest(Configuration conf) throws IOException { 135 super(conf); 136 } 137 138 @Override 139 protected AssignmentManager createAssignmentManager(MasterServices master, 140 MasterRegion masterRegion) { 141 return new AssignmentManagerForTest(master, masterRegion); 142 } 143 144 @Override 145 protected ServerManager createServerManager(MasterServices master, RegionServerList storage) 146 throws IOException { 147 setupClusterConnection(); 148 return new ServerManagerForTest(master, storage); 149 } 150 } 151 152 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 153 154 private static TableName NAME = TableName.valueOf("Assign"); 155 156 private static byte[] CF = Bytes.toBytes("cf"); 157 158 @BeforeClass 159 public static void setUp() throws Exception { 160 UTIL.startMiniCluster(StartTestingClusterOption.builder().masterClass(HMasterForTest.class) 161 .numMasters(1).numRegionServers(3).build()); 162 UTIL.createTable(NAME, CF); 163 UTIL.waitTableAvailable(NAME); 164 UTIL.getAdmin().balancerSwitch(false, true); 165 } 166 167 @AfterClass 168 public static void tearDown() throws Exception { 169 UTIL.shutdownMiniCluster(); 170 } 171 172 @Test 173 public void test() throws Exception { 174 RegionInfo region = 175 Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo(); 176 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 177 AssignmentManager am = master.getAssignmentManager(); 178 RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); 179 ServerName source = rsn.getRegionLocation(); 180 ServerName dest = 181 UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get(); 182 183 ARRIVE_REPORT = new CountDownLatch(1); 184 RESUME_REPORT = new CountDownLatch(1); 185 186 Future<?> future = am.moveAsync(new RegionPlan(region, source, dest)); 187 188 ARRIVE_REPORT.await(); 189 ARRIVE_REPORT = null; 190 // let's get procedure lock to stop the TRSP 191 IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock(); 192 long procId = master.getProcedures().stream() 193 .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId(); 194 IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId); 195 RESUME_REPORT.countDown(); 196 197 // kill the source region server 198 ARRIVE_GET = new CountDownLatch(1); 199 RESUME_GET = new CountDownLatch(1); 200 UTIL.getMiniHBaseCluster().killRegionServer(source); 201 202 // wait until we try to get the region list of the region server 203 ARRIVE_GET.await(); 204 ARRIVE_GET = null; 205 // release the procedure lock and let the TRSP to finish 206 procExecutionLock.releaseLockEntry(lockEntry); 207 future.get(); 208 209 // resume the SCP 210 EXCLUDE_SERVERS.add(dest); 211 RESUME_GET.countDown(); 212 // wait until there are no SCPs and TRSPs 213 UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() 214 || (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure)))); 215 216 // assert the region is only on the dest server. 217 HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest); 218 assertNotNull(rs.getRegion(region.getEncodedName())); 219 assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName())); 220 } 221}