001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.rsgroup;
019
020import static org.apache.hadoop.hbase.util.Threads.sleep;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertTrue;
024import static org.junit.Assert.fail;
025
026import java.io.IOException;
027import java.util.ArrayList;
028import java.util.EnumSet;
029import java.util.Iterator;
030import java.util.List;
031import java.util.Map;
032import java.util.Random;
033import java.util.Set;
034import java.util.concurrent.ThreadLocalRandom;
035import java.util.concurrent.atomic.AtomicBoolean;
036import java.util.function.Function;
037import org.apache.hadoop.hbase.ClusterMetrics.Option;
038import org.apache.hadoop.hbase.HBaseClassTestRule;
039import org.apache.hadoop.hbase.ServerName;
040import org.apache.hadoop.hbase.TableName;
041import org.apache.hadoop.hbase.Waiter;
042import org.apache.hadoop.hbase.client.RegionInfo;
043import org.apache.hadoop.hbase.constraint.ConstraintException;
044import org.apache.hadoop.hbase.master.RegionState;
045import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
046import org.apache.hadoop.hbase.net.Address;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.testclassification.RSGroupTests;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.apache.hadoop.hbase.util.Pair;
052import org.junit.After;
053import org.junit.AfterClass;
054import org.junit.Before;
055import org.junit.BeforeClass;
056import org.junit.ClassRule;
057import org.junit.Test;
058import org.junit.experimental.categories.Category;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
063import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
064
065@Category({ RSGroupTests.class, LargeTests.class })
066public class TestRSGroupsAdmin2 extends TestRSGroupsBase {
067
068  @ClassRule
069  public static final HBaseClassTestRule CLASS_RULE =
070    HBaseClassTestRule.forClass(TestRSGroupsAdmin2.class);
071
072  private static final Logger LOG = LoggerFactory.getLogger(TestRSGroupsAdmin2.class);
073
074  @BeforeClass
075  public static void setUp() throws Exception {
076    setUpTestBeforeClass();
077  }
078
079  @AfterClass
080  public static void tearDown() throws Exception {
081    tearDownAfterClass();
082  }
083
084  @Before
085  public void beforeMethod() throws Exception {
086    setUpBeforeMethod();
087  }
088
089  @After
090  public void afterMethod() throws Exception {
091    tearDownAfterMethod();
092  }
093
094  @Test
095  public void testRegionMove() throws Exception {
096    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
097    final byte[] familyNameBytes = Bytes.toBytes("f");
098    // All the regions created below will be assigned to the default group.
099    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, 6);
100    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
101      @Override
102      public boolean evaluate() throws Exception {
103        List<String> regions = getTableRegionMap().get(tableName);
104        if (regions == null) {
105          return false;
106        }
107
108        return getTableRegionMap().get(tableName).size() >= 6;
109      }
110    });
111
112    // get target region to move
113    Map<ServerName, List<String>> assignMap = getTableServerRegionMap().get(tableName);
114    String targetRegion = null;
115    for (ServerName server : assignMap.keySet()) {
116      targetRegion = assignMap.get(server).size() > 0 ? assignMap.get(server).get(0) : null;
117      if (targetRegion != null) {
118        break;
119      }
120    }
121    // get server which is not a member of new group
122    ServerName tmpTargetServer = null;
123    for (ServerName server : ADMIN.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
124      .getLiveServerMetrics().keySet()) {
125      if (!newGroup.containsServer(server.getAddress())) {
126        tmpTargetServer = server;
127        break;
128      }
129    }
130    final ServerName targetServer = tmpTargetServer;
131    // move target server to group
132    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()), newGroup.getName());
133    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
134      @Override
135      public boolean evaluate() throws Exception {
136        return ADMIN.getRegions(targetServer).size() <= 0;
137      }
138    });
139
140    // Lets move this region to the new group.
141    TEST_UTIL.getAdmin()
142      .move(Bytes.toBytes(RegionInfo.encodeRegionName(Bytes.toBytes(targetRegion))), targetServer);
143    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
144      @Override
145      public boolean evaluate() throws Exception {
146        return getTableRegionMap().get(tableName) != null
147          && getTableRegionMap().get(tableName).size() == 6
148          && ADMIN.getClusterMetrics(EnumSet.of(Option.REGIONS_IN_TRANSITION))
149            .getRegionStatesInTransition().size() < 1;
150      }
151    });
152
153    // verify that targetServer didn't open it
154    for (RegionInfo region : ADMIN.getRegions(targetServer)) {
155      if (targetRegion.equals(region.getRegionNameAsString())) {
156        fail("Target server opened region");
157      }
158    }
159  }
160
161  @Test
162  public void testRegionServerMove() throws IOException, InterruptedException {
163    int initNumGroups = ADMIN.listRSGroups().size();
164    RSGroupInfo appInfo = addGroup(getGroupName(name.getMethodName()), 1);
165    RSGroupInfo adminInfo = addGroup(getGroupName(name.getMethodName()), 1);
166    RSGroupInfo dInfo = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
167    assertEquals(initNumGroups + 2, ADMIN.listRSGroups().size());
168    assertEquals(1, adminInfo.getServers().size());
169    assertEquals(1, appInfo.getServers().size());
170    assertEquals(getNumServers() - 2, dInfo.getServers().size());
171    ADMIN.moveServersToRSGroup(appInfo.getServers(), RSGroupInfo.DEFAULT_GROUP);
172    ADMIN.removeRSGroup(appInfo.getName());
173    ADMIN.moveServersToRSGroup(adminInfo.getServers(), RSGroupInfo.DEFAULT_GROUP);
174    ADMIN.removeRSGroup(adminInfo.getName());
175    assertEquals(ADMIN.listRSGroups().size(), initNumGroups);
176  }
177
178  @Test
179  public void testMoveServers() throws Exception {
180    // create groups and assign servers
181    addGroup("bar", 3);
182    ADMIN.addRSGroup("foo");
183
184    RSGroupInfo barGroup = ADMIN.getRSGroup("bar");
185    RSGroupInfo fooGroup = ADMIN.getRSGroup("foo");
186    assertEquals(3, barGroup.getServers().size());
187    assertEquals(0, fooGroup.getServers().size());
188
189    // test fail bogus server move
190    try {
191      ADMIN.moveServersToRSGroup(Sets.newHashSet(Address.fromString("foo:9999")), "foo");
192      fail("Bogus servers shouldn't have been successfully moved.");
193    } catch (IOException ex) {
194      String exp = "Server foo:9999 is either offline or it does not exist.";
195      String msg = "Expected '" + exp + "' in exception message: ";
196      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
197    }
198
199    // test success case
200    LOG.info("moving servers " + barGroup.getServers() + " to group foo");
201    ADMIN.moveServersToRSGroup(barGroup.getServers(), fooGroup.getName());
202
203    barGroup = ADMIN.getRSGroup("bar");
204    fooGroup = ADMIN.getRSGroup("foo");
205    assertEquals(0, barGroup.getServers().size());
206    assertEquals(3, fooGroup.getServers().size());
207
208    LOG.info("moving servers " + fooGroup.getServers() + " to group default");
209    ADMIN.moveServersToRSGroup(fooGroup.getServers(), RSGroupInfo.DEFAULT_GROUP);
210
211    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
212      @Override
213      public boolean evaluate() throws Exception {
214        return getNumServers() == ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
215      }
216    });
217
218    fooGroup = ADMIN.getRSGroup("foo");
219    assertEquals(0, fooGroup.getServers().size());
220
221    // test group removal
222    LOG.info("Remove group " + barGroup.getName());
223    ADMIN.removeRSGroup(barGroup.getName());
224    assertEquals(null, ADMIN.getRSGroup(barGroup.getName()));
225    LOG.info("Remove group " + fooGroup.getName());
226    ADMIN.removeRSGroup(fooGroup.getName());
227    assertEquals(null, ADMIN.getRSGroup(fooGroup.getName()));
228  }
229
230  @Test
231  public void testRemoveServers() throws Exception {
232    LOG.info("testRemoveServers");
233    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 3);
234    Iterator<Address> iterator = newGroup.getServers().iterator();
235    ServerName targetServer = getServerName(iterator.next());
236
237    // remove online servers
238    try {
239      ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
240      fail("Online servers shouldn't have been successfully removed.");
241    } catch (IOException ex) {
242      String exp =
243        "Server " + targetServer.getAddress() + " is an online server, not allowed to remove.";
244      String msg = "Expected '" + exp + "' in exception message: ";
245      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
246    }
247    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
248
249    // remove dead servers
250    NUM_DEAD_SERVERS = CLUSTER.getClusterMetrics().getDeadServerNames().size();
251    try {
252      // stopping may cause an exception
253      // due to the connection loss
254      LOG.info("stopping server " + targetServer.getServerName());
255      ADMIN.stopRegionServer(targetServer.getAddress().toString());
256      NUM_DEAD_SERVERS++;
257    } catch (Exception e) {
258    }
259
260    // wait for stopped regionserver to dead server list
261    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
262      @Override
263      public boolean evaluate() throws Exception {
264        return !MASTER.getServerManager().areDeadServersInProgress()
265          && CLUSTER.getClusterMetrics().getDeadServerNames().size() == NUM_DEAD_SERVERS;
266      }
267    });
268
269    try {
270      ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
271      fail("Dead servers shouldn't have been successfully removed.");
272    } catch (IOException ex) {
273      String exp = "Server " + targetServer.getAddress() + " is on the dead servers list,"
274        + " Maybe it will come back again, not allowed to remove.";
275      String msg = "Expected '" + exp + "' in exception message: ";
276      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
277    }
278    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
279
280    // remove decommissioned servers
281    List<ServerName> serversToDecommission = new ArrayList<>();
282    targetServer = getServerName(iterator.next());
283    assertTrue(MASTER.getServerManager().getOnlineServers().containsKey(targetServer));
284    serversToDecommission.add(targetServer);
285
286    ADMIN.decommissionRegionServers(serversToDecommission, true);
287    assertEquals(1, ADMIN.listDecommissionedRegionServers().size());
288
289    assertTrue(newGroup.getServers().contains(targetServer.getAddress()));
290    ADMIN.removeServersFromRSGroup(Sets.newHashSet(targetServer.getAddress()));
291    Set<Address> newGroupServers = ADMIN.getRSGroup(newGroup.getName()).getServers();
292    assertFalse(newGroupServers.contains(targetServer.getAddress()));
293    assertEquals(2, newGroupServers.size());
294
295    assertTrue(OBSERVER.preRemoveServersCalled);
296    assertTrue(OBSERVER.postRemoveServersCalled);
297  }
298
299  @Test
300  public void testMoveServersAndTables() throws Exception {
301    LOG.info("testMoveServersAndTables");
302    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
303    // create table
304    final byte[] familyNameBytes = Bytes.toBytes("f");
305    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, 5);
306    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
307      @Override
308      public boolean evaluate() throws Exception {
309        List<String> regions = getTableRegionMap().get(tableName);
310        if (regions == null) {
311          return false;
312        }
313
314        return getTableRegionMap().get(tableName).size() >= 5;
315      }
316    });
317
318    // get server which is not a member of new group
319    ServerName targetServer = null;
320    for (ServerName server : ADMIN.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))
321      .getLiveServerMetrics().keySet()) {
322      if (
323        !newGroup.containsServer(server.getAddress())
324          && !ADMIN.getRSGroup("master").containsServer(server.getAddress())
325      ) {
326        targetServer = server;
327        break;
328      }
329    }
330
331    LOG.debug("Print group info : " + ADMIN.listRSGroups());
332    int oldDefaultGroupServerSize = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
333    int oldDefaultGroupTableSize = ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP).size();
334    assertTrue(OBSERVER.preListTablesInRSGroupCalled);
335    assertTrue(OBSERVER.postListTablesInRSGroupCalled);
336
337    // test fail bogus server move
338    try {
339      ADMIN.moveServersToRSGroup(Sets.newHashSet(Address.fromString("foo:9999")),
340        newGroup.getName());
341      ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
342      fail("Bogus servers shouldn't have been successfully moved.");
343    } catch (IOException ex) {
344      String exp = "Server foo:9999 is either offline or it does not exist.";
345      String msg = "Expected '" + exp + "' in exception message: ";
346      assertTrue(msg + " " + ex.getMessage(), ex.getMessage().contains(exp));
347    }
348
349    // test move when src = dst
350    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()),
351      RSGroupInfo.DEFAULT_GROUP);
352    ADMIN.setRSGroup(Sets.newHashSet(tableName), RSGroupInfo.DEFAULT_GROUP);
353
354    // verify default group info
355    assertEquals(oldDefaultGroupServerSize,
356      ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size());
357    assertEquals(oldDefaultGroupTableSize,
358      ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP).size());
359
360    // verify new group info
361    assertEquals(1, ADMIN.getRSGroup(newGroup.getName()).getServers().size());
362    assertEquals(0,
363      ADMIN.getConfiguredNamespacesAndTablesInRSGroup(newGroup.getName()).getSecond().size());
364    assertTrue(OBSERVER.preGetConfiguredNamespacesAndTablesInRSGroupCalled);
365    assertTrue(OBSERVER.postGetConfiguredNamespacesAndTablesInRSGroupCalled);
366
367    // get all region to move targetServer
368    List<String> regionList = getTableRegionMap().get(tableName);
369    for (String region : regionList) {
370      // Lets move this region to the targetServer
371      TEST_UTIL.getAdmin().move(Bytes.toBytes(RegionInfo.encodeRegionName(Bytes.toBytes(region))),
372        targetServer);
373    }
374
375    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
376      @Override
377      public boolean evaluate() throws Exception {
378        return getTableRegionMap().get(tableName) != null
379          && getTableRegionMap().get(tableName).size() == 5
380          && getTableServerRegionMap().get(tableName).size() == 1
381          && ADMIN.getClusterMetrics(EnumSet.of(Option.REGIONS_IN_TRANSITION))
382            .getRegionStatesInTransition().size() < 1;
383      }
384    });
385
386    // verify that all region move to targetServer
387    assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size());
388
389    // move targetServer and table to newGroup
390    LOG.info("moving server and table to newGroup");
391    ADMIN.moveServersToRSGroup(Sets.newHashSet(targetServer.getAddress()), newGroup.getName());
392    ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
393
394    // verify group change
395    assertEquals(newGroup.getName(), ADMIN.getRSGroup(tableName).getName());
396
397    // verify servers' not exist in old group
398    Set<Address> defaultServers = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers();
399    assertFalse(defaultServers.contains(targetServer.getAddress()));
400
401    // verify servers' exist in new group
402    Set<Address> newGroupServers = ADMIN.getRSGroup(newGroup.getName()).getServers();
403    assertTrue(newGroupServers.contains(targetServer.getAddress()));
404
405    // verify tables' not exist in old group
406    Set<TableName> defaultTables =
407      Sets.newHashSet(ADMIN.listTablesInRSGroup(RSGroupInfo.DEFAULT_GROUP));
408    assertFalse(defaultTables.contains(tableName));
409
410    // verify tables' exist in new group
411    Set<TableName> newGroupTables = Sets
412      .newHashSet(ADMIN.getConfiguredNamespacesAndTablesInRSGroup(newGroup.getName()).getSecond());
413    assertTrue(newGroupTables.contains(tableName));
414
415    // verify that all region still assign on targetServer
416    // TODO: uncomment after we reimplement moveServersAndTables, now the implementation is
417    // moveToRSGroup first and then moveTables, so the region will be moved to other region servers.
418    // assertEquals(5, getTableServerRegionMap().get(tableName).get(targetServer).size());
419
420    assertTrue(OBSERVER.preMoveServersCalled);
421    assertTrue(OBSERVER.postMoveServersCalled);
422  }
423
424  @Test
425  public void testMoveServersFromDefaultGroup() throws Exception {
426    // create groups and assign servers
427    ADMIN.addRSGroup("foo");
428
429    RSGroupInfo fooGroup = ADMIN.getRSGroup("foo");
430    assertEquals(0, fooGroup.getServers().size());
431    RSGroupInfo defaultGroup = ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP);
432
433    // test remove all servers from default
434    try {
435      ADMIN.moveServersToRSGroup(defaultGroup.getServers(), fooGroup.getName());
436      fail(RSGroupInfoManagerImpl.KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE);
437    } catch (ConstraintException ex) {
438      assertTrue(
439        ex.getMessage().contains(RSGroupInfoManagerImpl.KEEP_ONE_SERVER_IN_DEFAULT_ERROR_MESSAGE));
440    }
441
442    // test success case, remove one server from default ,keep at least one server
443    if (defaultGroup.getServers().size() > 1) {
444      Address serverInDefaultGroup = defaultGroup.getServers().iterator().next();
445      LOG.info("moving server " + serverInDefaultGroup + " from group default to group "
446        + fooGroup.getName());
447      ADMIN.moveServersToRSGroup(Sets.newHashSet(serverInDefaultGroup), fooGroup.getName());
448    }
449
450    fooGroup = ADMIN.getRSGroup("foo");
451    LOG.info("moving servers " + fooGroup.getServers() + " to group default");
452    ADMIN.moveServersToRSGroup(fooGroup.getServers(), RSGroupInfo.DEFAULT_GROUP);
453
454    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
455      @Override
456      public boolean evaluate() throws Exception {
457        return getNumServers() == ADMIN.getRSGroup(RSGroupInfo.DEFAULT_GROUP).getServers().size();
458      }
459    });
460
461    fooGroup = ADMIN.getRSGroup("foo");
462    assertEquals(0, fooGroup.getServers().size());
463
464    // test group removal
465    LOG.info("Remove group " + fooGroup.getName());
466    ADMIN.removeRSGroup(fooGroup.getName());
467    assertEquals(null, ADMIN.getRSGroup(fooGroup.getName()));
468  }
469
470  @Test
471  public void testFailedMoveBeforeRetryExhaustedWhenMoveServer() throws Exception {
472    String groupName = getGroupName(name.getMethodName());
473    ADMIN.addRSGroup(groupName);
474    final RSGroupInfo newGroup = ADMIN.getRSGroup(groupName);
475    Pair<ServerName, RegionStateNode> gotPair = createTableWithRegionSplitting(newGroup, 10);
476
477    // start thread to recover region state
478    final ServerName movedServer = gotPair.getFirst();
479    final RegionStateNode rsn = gotPair.getSecond();
480    AtomicBoolean changed = new AtomicBoolean(false);
481    Thread t1 = recoverRegionStateThread(movedServer,
482      server -> MASTER.getAssignmentManager().getRegionsOnServer(movedServer), rsn, changed);
483    t1.start();
484
485    // move target server to group
486    Thread t2 = new Thread(() -> {
487      LOG.info("thread2 start running, to move regions");
488      try {
489        ADMIN.moveServersToRSGroup(Sets.newHashSet(movedServer.getAddress()), newGroup.getName());
490      } catch (IOException e) {
491        LOG.error("move server error", e);
492      }
493    });
494    t2.start();
495
496    t1.join();
497    t2.join();
498
499    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
500      @Override
501      public boolean evaluate() {
502        if (changed.get()) {
503          return MASTER.getAssignmentManager().getRegionsOnServer(movedServer).size() == 0
504            && !rsn.getRegionLocation().equals(movedServer);
505        }
506        return false;
507      }
508    });
509  }
510
511  private <T> Thread recoverRegionStateThread(T owner, Function<T, List<RegionInfo>> getRegions,
512    RegionStateNode rsn, AtomicBoolean changed) {
513    return new Thread(() -> {
514      LOG.info("thread1 start running, will recover region state");
515      long current = EnvironmentEdgeManager.currentTime();
516      // wait until there is only left the region we changed state and recover its state.
517      // wait time is set according to the number of max retries, all except failed regions will be
518      // moved in one retry, and will sleep 1s until next retry.
519      while (
520        EnvironmentEdgeManager.currentTime() - current
521            <= RSGroupInfoManagerImpl.DEFAULT_MAX_RETRY_VALUE * 1000
522      ) {
523        List<RegionInfo> regions = getRegions.apply(owner);
524        LOG.debug("server table region size is:{}", regions.size());
525        assert regions.size() >= 1;
526        // when there is exactly one region left, we can determine the move operation encountered
527        // exception caused by the strange region state.
528        if (regions.size() == 1) {
529          assertEquals(regions.get(0).getRegionNameAsString(),
530            rsn.getRegionInfo().getRegionNameAsString());
531          rsn.setState(RegionState.State.OPEN);
532          LOG.info("set region {} state OPEN", rsn.getRegionInfo().getRegionNameAsString());
533          changed.set(true);
534          break;
535        }
536        sleep(5000);
537      }
538    });
539  }
540
541  private Pair<ServerName, RegionStateNode> createTableWithRegionSplitting(RSGroupInfo rsGroupInfo,
542    int tableRegionCount) throws Exception {
543    final byte[] familyNameBytes = Bytes.toBytes("f");
544    // All the regions created below will be assigned to the default group.
545    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, tableRegionCount);
546    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
547      @Override
548      public boolean evaluate() throws Exception {
549        List<String> regions = getTableRegionMap().get(tableName);
550        if (regions == null) {
551          return false;
552        }
553        return getTableRegionMap().get(tableName).size() >= tableRegionCount;
554      }
555    });
556
557    return randomlySetOneRegionStateToSplitting(rsGroupInfo);
558  }
559
560  /**
561   * Randomly choose a region to set state.
562   * @param newGroup target group
563   * @return source server of region, and region state
564   * @throws IOException if methods called throw
565   */
566  private Pair<ServerName, RegionStateNode>
567    randomlySetOneRegionStateToSplitting(RSGroupInfo newGroup) throws IOException {
568    // get target server to move, which should has more than one regions
569    // randomly set a region state to SPLITTING to make move fail
570    return randomlySetRegionState(newGroup, RegionState.State.SPLITTING, tableName);
571  }
572
573  private Pair<ServerName, RegionStateNode> randomlySetRegionState(RSGroupInfo groupInfo,
574    RegionState.State state, TableName... tableNames) throws IOException {
575    Preconditions.checkArgument(tableNames.length == 1 || tableNames.length == 2,
576      "only support one or two tables");
577    Map<TableName, Map<ServerName, List<String>>> tableServerRegionMap = getTableServerRegionMap();
578    Map<ServerName, List<String>> assignMap = tableServerRegionMap.get(tableNames[0]);
579    if (tableNames.length == 2) {
580      Map<ServerName, List<String>> assignMap2 = tableServerRegionMap.get(tableNames[1]);
581      assignMap2.forEach((k, v) -> {
582        if (!assignMap.containsKey(k)) {
583          assignMap.remove(k);
584        }
585      });
586    }
587    String toCorrectRegionName = null;
588    ServerName srcServer = null;
589    for (ServerName server : assignMap.keySet()) {
590      toCorrectRegionName =
591        assignMap.get(server).size() >= 1 && !groupInfo.containsServer(server.getAddress())
592          ? assignMap.get(server).get(0)
593          : null;
594      if (toCorrectRegionName != null) {
595        srcServer = server;
596        break;
597      }
598    }
599    assert srcServer != null;
600    RegionInfo toCorrectRegionInfo = TEST_UTIL.getMiniHBaseCluster().getMaster()
601      .getAssignmentManager().getRegionInfo(Bytes.toBytesBinary(toCorrectRegionName));
602    RegionStateNode rsn = TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager()
603      .getRegionStates().getRegionStateNode(toCorrectRegionInfo);
604    rsn.setState(state);
605    return new Pair<>(srcServer, rsn);
606  }
607
608  @Test
609  public void testFailedMoveServersAndRepair() throws Exception {
610    // This UT calls moveToRSGroup() twice to test the idempotency of it.
611    // The first time, movement fails because a region is made in SPLITTING state
612    // which will not be moved.
613    // The second time, the region state is OPEN and check if all
614    // regions on target group servers after the call.
615    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
616
617    // create table
618    // randomly set a region state to SPLITTING to make move abort
619    Pair<ServerName, RegionStateNode> gotPair =
620      createTableWithRegionSplitting(newGroup, ThreadLocalRandom.current().nextInt(8) + 4);
621    RegionStateNode rsn = gotPair.getSecond();
622    ServerName srcServer = rsn.getRegionLocation();
623
624    // move server to newGroup and check regions
625    try {
626      ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
627      fail("should get IOException when retry exhausted but there still exists failed moved "
628        + "regions");
629    } catch (Exception e) {
630      assertTrue(
631        e.getMessage().contains(gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
632    }
633    for (RegionInfo regionInfo : MASTER.getAssignmentManager().getAssignedRegions()) {
634      if (regionInfo.getTable().equals(tableName) && regionInfo.equals(rsn.getRegionInfo())) {
635        assertEquals(
636          MASTER.getAssignmentManager().getRegionStates().getRegionServerOfRegion(regionInfo),
637          srcServer);
638      }
639    }
640
641    // retry move server to newGroup and check if all regions on srcServer was moved
642    rsn.setState(RegionState.State.OPEN);
643    ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
644    assertEquals(MASTER.getAssignmentManager().getRegionsOnServer(srcServer).size(), 0);
645  }
646
647  @Test
648  public void testFailedMoveServersTablesAndRepair() throws Exception {
649    // This UT calls moveTablesAndServers() twice to test the idempotency of it.
650    // The first time, movement fails because a region is made in SPLITTING state
651    // which will not be moved.
652    // The second time, the region state is OPEN and check if all
653    // regions on target group servers after the call.
654    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 1);
655    // create table
656    final byte[] familyNameBytes = Bytes.toBytes("f");
657    TableName table1 = TableName.valueOf(tableName.getNameAsString() + "_1");
658    TableName table2 = TableName.valueOf(tableName.getNameAsString() + "_2");
659    Random rand = ThreadLocalRandom.current();
660    TEST_UTIL.createMultiRegionTable(table1, familyNameBytes, rand.nextInt(12) + 4);
661    TEST_UTIL.createMultiRegionTable(table2, familyNameBytes, rand.nextInt(12) + 4);
662
663    // randomly set a region state to SPLITTING to make move abort
664    Pair<ServerName, RegionStateNode> gotPair =
665      randomlySetRegionState(newGroup, RegionState.State.SPLITTING, table1, table2);
666    RegionStateNode rsn = gotPair.getSecond();
667    ServerName srcServer = rsn.getRegionLocation();
668
669    // move server and table to newGroup and check regions
670    try {
671      ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
672      ADMIN.setRSGroup(Sets.newHashSet(table2), newGroup.getName());
673      fail("should get IOException when retry exhausted but there still exists failed moved "
674        + "regions");
675    } catch (Exception e) {
676      assertTrue(
677        e.getMessage().contains(gotPair.getSecond().getRegionInfo().getRegionNameAsString()));
678    }
679    for (RegionInfo regionInfo : MASTER.getAssignmentManager().getAssignedRegions()) {
680      if (regionInfo.getTable().equals(table1) && regionInfo.equals(rsn.getRegionInfo())) {
681        assertEquals(
682          MASTER.getAssignmentManager().getRegionStates().getRegionServerOfRegion(regionInfo),
683          srcServer);
684      }
685    }
686
687    // retry moveServersAndTables to newGroup and check if all regions on srcServer belongs to
688    // table2
689    rsn.setState(RegionState.State.OPEN);
690    ADMIN.moveServersToRSGroup(Sets.newHashSet(srcServer.getAddress()), newGroup.getName());
691    ADMIN.setRSGroup(Sets.newHashSet(table2), newGroup.getName());
692    for (RegionInfo regionsInfo : MASTER.getAssignmentManager().getRegionsOnServer(srcServer)) {
693      assertEquals(regionsInfo.getTable(), table2);
694    }
695  }
696
697  @Test
698  public void testMoveServersToRSGroupPerformance() throws Exception {
699    final RSGroupInfo newGroup = addGroup(getGroupName(name.getMethodName()), 2);
700    final byte[] familyNameBytes = Bytes.toBytes("f");
701    // there will be 100 regions are both the serves
702    final int tableRegionCount = 200;
703    // All the regions created below will be assigned to the default group.
704    TEST_UTIL.createMultiRegionTable(tableName, familyNameBytes, tableRegionCount);
705    TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
706      @Override
707      public boolean evaluate() throws Exception {
708        List<String> regions = getTableRegionMap().get(tableName);
709        if (regions == null) {
710          return false;
711        }
712        return getTableRegionMap().get(tableName).size() >= tableRegionCount;
713      }
714    });
715    ADMIN.setRSGroup(Sets.newHashSet(tableName), newGroup.getName());
716    TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
717    String rsGroup2 = "rsGroup2";
718    ADMIN.addRSGroup(rsGroup2);
719
720    long startTime = EnvironmentEdgeManager.currentTime();
721    ADMIN.moveServersToRSGroup(Sets.newHashSet(newGroup.getServers().iterator().next()), rsGroup2);
722    long timeTaken = EnvironmentEdgeManager.currentTime() - startTime;
723    String msg =
724      "Should not take mote than 15000 ms to move a table with 100 regions. Time taken  ="
725        + timeTaken + " ms";
726    // This test case is meant to be used for verifying the performance quickly by a developer.
727    // Moving 100 regions takes much less than 15000 ms. Given 15000 ms so test cases passes
728    // on all environment.
729    assertTrue(msg, timeTaken < 15000);
730    LOG.info("Time taken to move a table with 100 region is {} ms", timeTaken);
731  }
732}