001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.lang.reflect.Field;
025import java.lang.reflect.Modifier;
026import java.util.ArrayList;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.hbase.HBaseClassTestRule;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.NamespaceDescriptor;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.Version;
037import org.apache.hadoop.hbase.Waiter;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.Table;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.ipc.MetaRWQueueRpcExecutor;
044import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
045import org.apache.hadoop.hbase.net.Address;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.JVMClusterUtil;
049import org.apache.hadoop.hbase.util.ReflectionUtils;
050import org.apache.hadoop.hbase.util.VersionInfo;
051import org.junit.After;
052import org.junit.AfterClass;
053import org.junit.Before;
054import org.junit.BeforeClass;
055import org.junit.ClassRule;
056import org.junit.Test;
057import org.junit.experimental.categories.Category;
058import org.slf4j.Logger;
059import org.slf4j.LoggerFactory;
060
061import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
062
063@Category({ LargeTests.class })
064public class TestRSGroupsKillRS extends TestRSGroupsBase {
065
066  @ClassRule
067  public static final HBaseClassTestRule CLASS_RULE =
068    HBaseClassTestRule.forClass(TestRSGroupsKillRS.class);
069
070  protected static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsKillRS.class);
071
072  @BeforeClass
073  public static void setUp() throws Exception {
074    // avoid all the handlers blocked when meta is offline, and regionServerReport can not be
075    // processed which causes dead lock.
076    TEST_UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10);
077    TEST_UTIL.getConfiguration()
078      .setFloat(MetaRWQueueRpcExecutor.META_CALL_QUEUE_READ_SHARE_CONF_KEY, 0.5f);
079    setUpTestBeforeClass();
080  }
081
082  @AfterClass
083  public static void tearDown() throws Exception {
084    tearDownAfterClass();
085  }
086
087  @Before
088  public void beforeMethod() throws Exception {
089    setUpBeforeMethod();
090  }
091
092  @After
093  public void afterMethod() throws Exception {
094    tearDownAfterMethod();
095  }
096
097  @Test
098  public void testKillRS() throws Exception {
099    RSGroupInfo appInfo = addGroup("appInfo", 1);
100    final TableName tableName = TableName.valueOf(tablePrefix + "_ns", name.getMethodName());
101    admin.createNamespace(NamespaceDescriptor.create(tableName.getNamespaceAsString())
102      .addConfiguration(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP, appInfo.getName()).build());
103    final TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
104      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("f")).build();
105    admin.createTable(desc);
106    // wait for created table to be assigned
107    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
108      @Override
109      public boolean evaluate() throws Exception {
110        return getTableRegionMap().get(desc.getTableName()) != null;
111      }
112    });
113
114    ServerName targetServer = getServerName(appInfo.getServers().iterator().next());
115    assertEquals(1, admin.getRegions(targetServer).size());
116
117    try {
118      // stopping may cause an exception
119      // due to the connection loss
120      admin.stopRegionServer(targetServer.getAddress().toString());
121    } catch (Exception e) {
122    }
123    // wait until the server is actually down
124    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
125      @Override
126      public boolean evaluate() throws Exception {
127        return !cluster.getClusterMetrics().getLiveServerMetrics().containsKey(targetServer);
128      }
129    });
130    // there is only one rs in the group and we killed it, so the region can not be online, until
131    // later we add new servers to it.
132    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
133      @Override
134      public boolean evaluate() throws Exception {
135        return !cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
136      }
137    });
138    Set<Address> newServers = Sets.newHashSet();
139    newServers
140      .add(rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP).getServers().iterator().next());
141    rsGroupAdmin.moveServers(newServers, appInfo.getName());
142
143    // Make sure all the table's regions get reassigned
144    // disabling the table guarantees no conflicting assign/unassign (ie SSH) happens
145    admin.disableTable(tableName);
146    admin.enableTable(tableName);
147
148    // wait for region to be assigned
149    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
150      @Override
151      public boolean evaluate() throws Exception {
152        return cluster.getClusterMetrics().getRegionStatesInTransition().isEmpty();
153      }
154    });
155
156    ServerName targetServer1 = getServerName(newServers.iterator().next());
157    assertEquals(1, admin.getRegions(targetServer1).size());
158    assertEquals(tableName, admin.getRegions(targetServer1).get(0).getTable());
159  }
160
161  @Test
162  public void testKillAllRSInGroup() throws Exception {
163    // create a rsgroup and move two regionservers to it
164    String groupName = "my_group";
165    int groupRSCount = 2;
166    addGroup(groupName, groupRSCount);
167
168    // create a table, and move it to my_group
169    Table t = TEST_UTIL.createMultiRegionTable(tableName, Bytes.toBytes("f"), 5);
170    TEST_UTIL.loadTable(t, Bytes.toBytes("f"));
171    Set<TableName> toAddTables = new HashSet<>();
172    toAddTables.add(tableName);
173    rsGroupAdmin.moveTables(toAddTables, groupName);
174    assertTrue(rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(tableName));
175    TEST_UTIL.waitTableAvailable(tableName, 30000);
176
177    // check my_group servers and table regions
178    Set<Address> servers = rsGroupAdmin.getRSGroupInfo(groupName).getServers();
179    assertEquals(2, servers.size());
180    LOG.debug("group servers {}", servers);
181    for (RegionInfo tr : master.getAssignmentManager().getRegionStates()
182      .getRegionsOfTable(tableName)) {
183      assertTrue(servers.contains(master.getAssignmentManager().getRegionStates()
184        .getRegionAssignments().get(tr).getAddress()));
185    }
186
187    // Move a region, to ensure there exists a region whose 'lastHost' is in my_group
188    // ('lastHost' of other regions are in 'default' group)
189    // and check if all table regions are online
190    List<ServerName> gsn = new ArrayList<>();
191    for (Address addr : servers) {
192      gsn.add(getServerName(addr));
193    }
194    assertEquals(2, gsn.size());
195    for (Map.Entry<RegionInfo, ServerName> entry : master.getAssignmentManager().getRegionStates()
196      .getRegionAssignments().entrySet()) {
197      if (entry.getKey().getTable().equals(tableName)) {
198        LOG.debug("move region {} from {} to {}", entry.getKey().getRegionNameAsString(),
199          entry.getValue(), gsn.get(1 - gsn.indexOf(entry.getValue())));
200        TEST_UTIL.moveRegionAndWait(entry.getKey(), gsn.get(1 - gsn.indexOf(entry.getValue())));
201        break;
202      }
203    }
204    TEST_UTIL.waitTableAvailable(tableName, 30000);
205
206    // case 1: stop all the regionservers in my_group, and restart a regionserver in my_group,
207    // and then check if all table regions are online
208    for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
209      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
210    }
211    // better wait for a while for region reassign
212    sleep(10000);
213    assertEquals(NUM_SLAVES_BASE - gsn.size(),
214      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
215    TEST_UTIL.getMiniHBaseCluster().startRegionServer(gsn.get(0).getHostname(),
216      gsn.get(0).getPort());
217    assertEquals(NUM_SLAVES_BASE - gsn.size() + 1,
218      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
219    TEST_UTIL.waitTableAvailable(tableName, 30000);
220
221    // case 2: stop all the regionservers in my_group, and move another
222    // regionserver(from the 'default' group) to my_group,
223    // and then check if all table regions are online
224    for (JVMClusterUtil.RegionServerThread rst : TEST_UTIL.getMiniHBaseCluster()
225      .getLiveRegionServerThreads()) {
226      if (rst.getRegionServer().getServerName().getAddress().equals(gsn.get(0).getAddress())) {
227        TEST_UTIL.getMiniHBaseCluster().stopRegionServer(rst.getRegionServer().getServerName());
228        break;
229      }
230    }
231    sleep(10000);
232    assertEquals(NUM_SLAVES_BASE - gsn.size(),
233      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
234    ServerName newServer = master.getServerManager().getOnlineServersList().get(0);
235    rsGroupAdmin.moveServers(Sets.newHashSet(newServer.getAddress()), groupName);
236    // wait and check if table regions are online
237    TEST_UTIL.waitTableAvailable(tableName, 30000);
238  }
239
240  @Test
241  public void testLowerMetaGroupVersion() throws Exception {
242    // create a rsgroup and move one regionserver to it
243    String groupName = "meta_group";
244    int groupRSCount = 1;
245    addGroup(groupName, groupRSCount);
246
247    // move hbase:meta to meta_group
248    Set<TableName> toAddTables = new HashSet<>();
249    toAddTables.add(TableName.META_TABLE_NAME);
250    rsGroupAdmin.moveTables(toAddTables, groupName);
251    assertTrue(
252      rsGroupAdmin.getRSGroupInfo(groupName).getTables().contains(TableName.META_TABLE_NAME));
253
254    // restart the regionserver in meta_group, and lower its version
255    String originVersion = "";
256    Set<Address> servers = new HashSet<>();
257    for (Address addr : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) {
258      servers.add(addr);
259      TEST_UTIL.getMiniHBaseCluster().stopRegionServer(getServerName(addr));
260      originVersion = master.getRegionServerVersion(getServerName(addr));
261    }
262    // better wait for a while for region reassign
263    sleep(10000);
264    assertEquals(NUM_SLAVES_BASE - groupRSCount,
265      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
266    Address address = servers.iterator().next();
267    int majorVersion = VersionInfo.getMajorVersion(originVersion);
268    assertTrue(majorVersion >= 1);
269    String lowerVersion = String.valueOf(majorVersion - 1) + originVersion.split("\\.")[1];
270    setFinalStatic(Version.class.getField("version"), lowerVersion);
271    TEST_UTIL.getMiniHBaseCluster().startRegionServer(address.getHostName(), address.getPort());
272    assertEquals(NUM_SLAVES_BASE,
273      TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads().size());
274    assertTrue(VersionInfo.compareVersion(originVersion,
275      master.getRegionServerVersion(getServerName(servers.iterator().next()))) > 0);
276    LOG.debug("wait for META assigned...");
277    // SCP finished, which means all regions assigned too.
278    TEST_UTIL.waitFor(60000, () -> !TEST_UTIL.getHBaseCluster().getMaster().getProcedures().stream()
279      .filter(p -> (p instanceof ServerCrashProcedure)).findAny().isPresent());
280  }
281
282  private static void setFinalStatic(Field field, Object newValue) throws Exception {
283    field.setAccessible(true);
284    Field modifiersField = ReflectionUtils.getModifiersField();
285    modifiersField.setAccessible(true);
286    modifiersField.setInt(field, field.getModifiers() & ~Modifier.FINAL);
287    field.set(null, newValue);
288  }
289}