001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.rsgroup; 019 020import static org.apache.hadoop.hbase.util.Threads.sleep; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.lang.reflect.Field; 025import java.lang.reflect.Modifier; 026import java.util.ArrayList; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.hbase.HBaseClassTestRule; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.Version; 037import org.apache.hadoop.hbase.Waiter; 038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 039import org.apache.hadoop.hbase.client.RegionInfo; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.client.TableDescriptor; 042import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 043import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor; 044import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 045import org.apache.hadoop.hbase.net.Address; 046import org.apache.hadoop.hbase.testclassification.MediumTests; 047import org.apache.hadoop.hbase.testclassification.RSGroupTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.JVMClusterUtil; 050import org.apache.hadoop.hbase.util.ReflectionUtils; 051import org.apache.hadoop.hbase.util.VersionInfo; 052import org.junit.After; 053import org.junit.AfterClass; 054import org.junit.Before; 055import org.junit.BeforeClass; 056import org.junit.ClassRule; 057import org.junit.Test; 058import org.junit.experimental.categories.Category; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 063 064@Category({ RSGroupTests.class, MediumTests.class }) 065public class TestRSGroupsKillRS extends TestRSGroupsBase { 066 067 @ClassRule 068 public static final HBaseClassTestRule CLASS_RULE = 069 HBaseClassTestRule.forClass(TestRSGroupsKillRS.class); 070 071 private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class); 072 073 @BeforeClass 074 public static void setUp() throws Exception { 075 // avoid all the handlers blocked when meta is offline, and regionServerReport can not be 076 // processed which causes dead lock. 077 TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); 078 TEST_UTIL.getConfiguration() 079 .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f); 080 setUpTestBeforeClass(); 081 } 082 083 @AfterClass 084 public static void tearDown() throws Exception { 085 tearDownAfterClass(); 086 } 087 088 @Before 089 public void beforeMethod() throws Exception { 090 setUpBeforeMethod(); 091 } 092 093 @After 094 public void afterMethod() throws Exception { 095 tearDownAfterMethod(); 096 } 097 098 @Test 099 public void testKillRS() throws Exception { 100 RSGroupInfo appInfo = addGroup("appInfo", 1); 101 final TableName tableName = 102 TableName.valueOf(TABLE_PREFIX + "_ns", getNameWithoutIndex(name.getMethodName())); 103 ADMIN.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString()) 104 .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build()); 105 final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName) 106 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build(); 107 ADMIN.createTable(desc); 108 // wait for created table to be assigned 109 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 110 @Override 111 public boolean evaluate() throws Exception { 112 return getTableRegionMap().get(desc.getTableName()) != null; 113 } 114 }); 115 116 ServerName targetServer = getServerName(appInfo.getServers().iterator().next()); 117 assertEquals(1, ADMIN.getRegions(targetServer).size()); 118 119 try { 120 // stopping may cause an exception 121 // due to the connection loss 122 ADMIN.stopRegionServer(targetServer.getAddress().toString()); 123 } catch (Exception e) { 124 } 125 // wait until the server is actually down 126 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 127 @Override 128 public boolean evaluate() throws Exception { 129 return !CLUSTER.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer); 130 } 131 }); 132 // there is only one rs in the group and we killed it, so the region can not be online, until 133 // later we add new servers to it. 134 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 135 @Override 136 public boolean evaluate() throws Exception { 137 return !CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 138 } 139 }); 140 Set<Address> newServers = Sets.newHashSet(); 141 newServers.add(ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next()); 142 ADMIN.moveServersToRSGroup(newServers, appInfo.getName()); 143 144 // Make sure all the table's regions get reassigned 145 // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens 146 ADMIN.disableTable(tableName); 147 ADMIN.enableTable(tableName); 148 149 // wait for region to be assigned 150 TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() { 151 @Override 152 public boolean evaluate() throws Exception { 153 return CLUSTER.getClusterMetrics().getRegionStatesInTransition().isEmpty(); 154 } 155 }); 156 157 ServerName targetServer1 = getServerName(newServers.iterator().next()); 158 assertEquals(1, ADMIN.getRegions(targetServer1).size()); 159 assertEquals(tableName, ADMIN.getRegions(targetServer1).get(0).getTable()); 160 } 161 162 @Test 163 public void testKillAllRSInGroup() throws Exception { 164 // create a rsgroup and move two regionservers to it 165 String groupName = "my_group"; 166 int groupRSCount = 2; 167 addGroup(groupName, groupRSCount); 168 169 // create a table, and move it to my_group 170 Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5); 171 TEST_UTIL.loadTable(t, Bytes.toBytes("f")); 172 Set<TableName> toAddTables = new HashSet<>(); 173 toAddTables.add(tableName); 174 ADMIN.setRSGroup(toAddTables, groupName); 175 assertTrue( 176 ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond().contains(tableName)); 177 TEST_UTIL.waitTableAvailable(tableName, 30000); 178 179 // check my_group servers and table regions 180 Set<Address> servers = ADMIN.getRSGroup(groupName).getServers(); 181 assertEquals(2, servers.size()); 182 LOG.debug("group servers {}", servers); 183 for (RegionInfo tr : MASTER.getAssignmentManager().getRegionStates() 184 .getRegionsOfTable(tableName)) { 185 assertTrue(servers.contains(MASTER.getAssignmentManager().getRegionStates() 186 .getRegionAssignments().get(tr).getAddress())); 187 } 188 189 // Move a region, to ensure there exists a region whose 'lastHost' is in my_group 190 // ('lastHost' of other regions are in 'default' group) 191 // and check if all table regions are online 192 List<ServerName> gsn = new ArrayList<>(); 193 for (Address addr : servers) { 194 gsn.add(getServerName(addr)); 195 } 196 assertEquals(2, gsn.size()); 197 for (Map.Entry<RegionInfo, ServerName> entry : MASTER.getAssignmentManager().getRegionStates() 198 .getRegionAssignments().entrySet()) { 199 if (entry.getKey().getTable().equals(tableName)) { 200 LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(), 201 entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 202 TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue()))); 203 break; 204 } 205 } 206 TEST_UTIL.waitTableAvailable(tableName, 30000); 207 208 // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group, 209 // and then check if all table regions are online 210 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 211 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 212 } 213 // better wait for a while for region reassign 214 sleep(10000); 215 assertEquals(NUM_SLAVES_BASE - gsn.size(), 216 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 217 TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(), 218 gsn.get(0).getPort()); 219 assertEquals(NUM_SLAVES_BASE - gsn.size() + 1, 220 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 221 TEST_UTIL.waitTableAvailable(tableName, 30000); 222 223 // case 2: stop all the regionservers in my_group, and move another 224 // regionserver(from the 'default' group) to my_group, 225 // and then check if all table regions are online 226 for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster() 227 .getLiveRegionServerThreads()) { 228 if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) { 229 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName()); 230 break; 231 } 232 } 233 sleep(10000); 234 assertEquals(NUM_SLAVES_BASE - gsn.size(), 235 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 236 ServerName newServer = MASTER.getServerManager().getOnlineServersList().get(0); 237 ADMIN.moveServersToRSGroup(Sets.newHashSet(newServer.getAddress()), groupName); 238 // wait and check if table regions are online 239 TEST_UTIL.waitTableAvailable(tableName, 30000); 240 } 241 242 @Test 243 public void testLowerMetaGroupVersion() throws Exception { 244 // create a rsgroup and move one regionserver to it 245 String groupName = "meta_group"; 246 int groupRSCount = 1; 247 addGroup(groupName, groupRSCount); 248 249 // move hbase:meta to meta_group 250 Set<TableName> toAddTables = new HashSet<>(); 251 toAddTables.add(TableName.META_TABLE_NAME); 252 ADMIN.setRSGroup(toAddTables, groupName); 253 assertTrue(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(groupName).getSecond() 254 .contains(TableName.META_TABLE_NAME)); 255 256 // restart the regionserver in meta_group, and lower its version 257 String originVersion = ""; 258 Set<Address> servers = new HashSet<>(); 259 for (Address addr : ADMIN.getRSGroup(groupName).getServers()) { 260 servers.add(addr); 261 TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr)); 262 originVersion = MASTER.getRegionServerVersion(getServerName(addr)); 263 } 264 // better wait for a while for region reassign 265 sleep(10000); 266 assertEquals(NUM_SLAVES_BASE - groupRSCount, 267 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 268 Address address = servers.iterator().next(); 269 int majorVersion = VersionInfo.getMajorVersion(originVersion); 270 assertTrue(majorVersion >= 1); 271 String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1]; 272 setFinalStatic(Version.class.getField("version"), lowerVersion); 273 TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort()); 274 assertEquals(NUM_SLAVES_BASE, 275 TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size()); 276 assertTrue(VersionInfo.compareVersion(originVersion, 277 MASTER.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0); 278 LOG.debug("wait for META assigned..."); 279 // SCP finished, which means all regions assigned too. 280 TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream() 281 .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent()); 282 } 283 284 private static void setFinalStatic(Field field, Object newValue) throws Exception { 285 field.setAccessible(true); 286 Field modifiersField = ReflectionUtils.getModifiersField(); 287 modifiersField.setAccessible(true); 288 modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL); 289 field.set(null, newValue); 290 } 291}