001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.TableDescriptorBuilder.SPLIT_POLICY;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertNotSame;
026import static org.junit.Assert.assertNull;
027import static org.junit.Assert.assertTrue;
028import static org.junit.Assert.fail;
029
030import java.io.IOException;
031import java.lang.reflect.Field;
032import java.util.ArrayList;
033import java.util.Collection;
034import java.util.List;
035import java.util.Map;
036import java.util.Optional;
037import java.util.concurrent.CountDownLatch;
038import java.util.concurrent.ExecutionException;
039import java.util.concurrent.TimeUnit;
040import java.util.concurrent.TimeoutException;
041import java.util.concurrent.atomic.AtomicBoolean;
042import org.apache.hadoop.conf.Configuration;
043import org.apache.hadoop.fs.FileSystem;
044import org.apache.hadoop.fs.Path;
045import org.apache.hadoop.hbase.CellComparator;
046import org.apache.hadoop.hbase.Coprocessor;
047import org.apache.hadoop.hbase.CoprocessorEnvironment;
048import org.apache.hadoop.hbase.DoNotRetryIOException;
049import org.apache.hadoop.hbase.HBaseClassTestRule;
050import org.apache.hadoop.hbase.HBaseTestingUtil;
051import org.apache.hadoop.hbase.HConstants;
052import org.apache.hadoop.hbase.MasterNotRunningException;
053import org.apache.hadoop.hbase.PrivateCellUtil;
054import org.apache.hadoop.hbase.ServerName;
055import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
056import org.apache.hadoop.hbase.StartTestingClusterOption;
057import org.apache.hadoop.hbase.TableName;
058import org.apache.hadoop.hbase.ZooKeeperConnectionException;
059import org.apache.hadoop.hbase.client.Admin;
060import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
061import org.apache.hadoop.hbase.client.Consistency;
062import org.apache.hadoop.hbase.client.Delete;
063import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
064import org.apache.hadoop.hbase.client.Get;
065import org.apache.hadoop.hbase.client.Mutation;
066import org.apache.hadoop.hbase.client.Put;
067import org.apache.hadoop.hbase.client.RegionInfo;
068import org.apache.hadoop.hbase.client.Result;
069import org.apache.hadoop.hbase.client.ResultScanner;
070import org.apache.hadoop.hbase.client.Scan;
071import org.apache.hadoop.hbase.client.Table;
072import org.apache.hadoop.hbase.client.TableDescriptor;
073import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
074import org.apache.hadoop.hbase.client.TestReplicasClient.SlowMeCopro;
075import org.apache.hadoop.hbase.coprocessor.MasterCoprocessor;
076import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment;
077import org.apache.hadoop.hbase.coprocessor.MasterObserver;
078import org.apache.hadoop.hbase.coprocessor.ObserverContext;
079import org.apache.hadoop.hbase.io.HFileLink;
080import org.apache.hadoop.hbase.io.Reference;
081import org.apache.hadoop.hbase.master.HMaster;
082import org.apache.hadoop.hbase.master.MasterRpcServices;
083import org.apache.hadoop.hbase.master.RegionState;
084import org.apache.hadoop.hbase.master.RegionState.State;
085import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
086import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
087import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
088import org.apache.hadoop.hbase.master.assignment.RegionStates;
089import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
090import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
091import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
092import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
093import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
094import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
095import org.apache.hadoop.hbase.testclassification.LargeTests;
096import org.apache.hadoop.hbase.testclassification.RegionServerTests;
097import org.apache.hadoop.hbase.util.Bytes;
098import org.apache.hadoop.hbase.util.CommonFSUtils;
099import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
100import org.apache.hadoop.hbase.util.FSUtils;
101import org.apache.hadoop.hbase.util.FutureUtils;
102import org.apache.hadoop.hbase.util.HBaseFsck;
103import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
104import org.apache.hadoop.hbase.util.Threads;
105import org.apache.zookeeper.KeeperException;
106import org.apache.zookeeper.KeeperException.NodeExistsException;
107import org.junit.After;
108import org.junit.AfterClass;
109import org.junit.Assert;
110import org.junit.Before;
111import org.junit.BeforeClass;
112import org.junit.ClassRule;
113import org.junit.Rule;
114import org.junit.Test;
115import org.junit.experimental.categories.Category;
116import org.junit.rules.TestName;
117import org.mockito.Mockito;
118import org.slf4j.Logger;
119import org.slf4j.LoggerFactory;
120
121import org.apache.hbase.thirdparty.com.google.common.io.Closeables;
122import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
123import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
124
125import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
126import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
127import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
128import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse;
129
130/**
131 * The below tests are testing split region against a running cluster
132 */
133@Category({ RegionServerTests.class, LargeTests.class })
134public class TestSplitTransactionOnCluster {
135
136  @ClassRule
137  public static final HBaseClassTestRule CLASS_RULE =
138    HBaseClassTestRule.forClass(TestSplitTransactionOnCluster.class);
139
140  private static final Logger LOG = LoggerFactory.getLogger(TestSplitTransactionOnCluster.class);
141  private Admin admin = null;
142  private SingleProcessHBaseCluster cluster = null;
143  private static final int NB_SERVERS = 3;
144
145  static final HBaseTestingUtil TESTING_UTIL = new HBaseTestingUtil();
146
147  @Rule
148  public TestName name = new TestName();
149
150  @BeforeClass
151  public static void before() throws Exception {
152    TESTING_UTIL.getConfiguration().setInt(HConstants.HBASE_BALANCER_PERIOD, 60000);
153    StartTestingClusterOption option = StartTestingClusterOption.builder()
154      .masterClass(MyMaster.class).numRegionServers(NB_SERVERS).numDataNodes(NB_SERVERS).build();
155    TESTING_UTIL.startMiniCluster(option);
156  }
157
158  @AfterClass
159  public static void after() throws Exception {
160    TESTING_UTIL.shutdownMiniCluster();
161  }
162
163  @Before
164  public void setup() throws IOException {
165    TESTING_UTIL.ensureSomeNonStoppedRegionServersAvailable(NB_SERVERS);
166    this.admin = TESTING_UTIL.getAdmin();
167    this.cluster = TESTING_UTIL.getMiniHBaseCluster();
168  }
169
170  @After
171  public void tearDown() throws Exception {
172    this.admin.close();
173    for (TableDescriptor htd : this.admin.listTableDescriptors()) {
174      LOG.info("Tear down, remove table=" + htd.getTableName());
175      TESTING_UTIL.deleteTable(htd.getTableName());
176    }
177  }
178
179  private RegionInfo getAndCheckSingleTableRegion(final List<HRegion> regions)
180    throws IOException, InterruptedException {
181    assertEquals(1, regions.size());
182    RegionInfo hri = regions.get(0).getRegionInfo();
183    AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
184    return hri;
185  }
186
187  private void requestSplitRegion(final HRegionServer rsServer, final Region region,
188    final byte[] midKey) throws IOException {
189    long procId = cluster.getMaster().splitRegion(region.getRegionInfo(), midKey, 0, 0);
190    // wait for the split to complete or get interrupted. If the split completes successfully,
191    // the procedure will return true; if the split fails, the procedure would throw exception.
192    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
193  }
194
195  @Test
196  public void testRITStateForRollback() throws Exception {
197    final TableName tableName = TableName.valueOf(name.getMethodName());
198    final HMaster master = cluster.getMaster();
199    try {
200      // Create table then get the single region for our new table.
201      Table t = createTableAndWait(tableName, Bytes.toBytes("cf"));
202      final List<HRegion> regions = cluster.getRegions(tableName);
203      final RegionInfo hri = getAndCheckSingleTableRegion(regions);
204      insertData(tableName, admin, t);
205      t.close();
206
207      // Turn off balancer so it doesn't cut in and mess up our placements.
208      this.admin.balancerSwitch(false, true);
209      // Turn off the meta scanner so it don't remove parent on us.
210      master.setCatalogJanitorEnabled(false);
211
212      // find a splittable region
213      final HRegion region = findSplittableRegion(regions);
214      assertTrue("not able to find a splittable region", region != null);
215
216      // install master co-processor to fail splits
217      master.getMasterCoprocessorHost().load(FailingSplitMasterObserver.class,
218        Coprocessor.PRIORITY_USER, master.getConfiguration());
219
220      // split async
221      this.admin.splitRegionAsync(region.getRegionInfo().getRegionName(), new byte[] { 42 });
222
223      // we have to wait until the SPLITTING state is seen by the master
224      FailingSplitMasterObserver observer =
225        master.getMasterCoprocessorHost().findCoprocessor(FailingSplitMasterObserver.class);
226      assertNotNull(observer);
227      observer.latch.await();
228
229      LOG.info("Waiting for region to come out of RIT");
230      while (!cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri)) {
231        Threads.sleep(100);
232      }
233      assertTrue(cluster.getMaster().getAssignmentManager().getRegionStates().isRegionOnline(hri));
234    } finally {
235      admin.balancerSwitch(true, false);
236      master.setCatalogJanitorEnabled(true);
237      abortAndWaitForMaster();
238      TESTING_UTIL.deleteTable(tableName);
239    }
240  }
241
242  @Test
243  public void testSplitFailedCompactionAndSplit() throws Exception {
244    final TableName tableName = TableName.valueOf(name.getMethodName());
245    // Create table then get the single region for our new table.
246    byte[] cf = Bytes.toBytes("cf");
247    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
248      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
249    admin.createTable(htd);
250
251    for (int i = 0; cluster.getRegions(tableName).isEmpty() && i < 100; i++) {
252      Thread.sleep(100);
253    }
254    assertEquals(1, cluster.getRegions(tableName).size());
255
256    HRegion region = cluster.getRegions(tableName).get(0);
257    HStore store = region.getStore(cf);
258    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
259    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
260
261    Table t = TESTING_UTIL.getConnection().getTable(tableName);
262    // insert data
263    insertData(tableName, admin, t);
264    insertData(tableName, admin, t);
265
266    int fileNum = store.getStorefiles().size();
267    // 0, Compaction Request
268    store.triggerMajorCompaction();
269    Optional<CompactionContext> cc = store.requestCompaction();
270    assertTrue(cc.isPresent());
271    // 1, A timeout split
272    // 1.1 close region
273    assertEquals(2, region.close(false).get(cf).size());
274    // 1.2 rollback and Region initialize again
275    region.initialize();
276
277    // 2, Run Compaction cc
278    assertFalse(region.compact(cc.get(), store, NoLimitThroughputController.INSTANCE));
279    assertTrue(fileNum > store.getStorefiles().size());
280
281    // 3, Split
282    requestSplitRegion(regionServer, region, Bytes.toBytes("row3"));
283    assertEquals(2, cluster.getRegions(tableName).size());
284  }
285
286  @Test
287  public void testSplitCompactWithPriority() throws Exception {
288    final TableName tableName = TableName.valueOf(name.getMethodName());
289    // Create table then get the single region for our new table.
290    byte[] cf = Bytes.toBytes("cf");
291    TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
292      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf)).build();
293    admin.createTable(htd);
294
295    assertNotEquals("Unable to retrieve regions of the table", -1,
296      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
297
298    HRegion region = cluster.getRegions(tableName).get(0);
299    HStore store = region.getStore(cf);
300    int regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
301    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
302
303    Table table = TESTING_UTIL.getConnection().getTable(tableName);
304    // insert data
305    insertData(tableName, admin, table);
306    insertData(tableName, admin, table, 20);
307    insertData(tableName, admin, table, 40);
308
309    // Compaction Request
310    store.triggerMajorCompaction();
311    Optional<CompactionContext> compactionContext = store.requestCompaction();
312    assertTrue(compactionContext.isPresent());
313    assertFalse(compactionContext.get().getRequest().isAfterSplit());
314    assertEquals(compactionContext.get().getRequest().getPriority(), 13);
315
316    // Split
317    long procId =
318      cluster.getMaster().splitRegion(region.getRegionInfo(), Bytes.toBytes("row4"), 0, 0);
319
320    // wait for the split to complete or get interrupted. If the split completes successfully,
321    // the procedure will return true; if the split fails, the procedure would throw exception.
322    ProcedureTestingUtility.waitProcedure(cluster.getMaster().getMasterProcedureExecutor(), procId);
323    Thread.sleep(3000);
324    assertNotEquals("Table is not split properly?", -1,
325      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
326    // we have 2 daughter regions
327    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
328    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
329    HStore hStore1 = hRegion1.getStore(cf);
330    HStore hStore2 = hRegion2.getStore(cf);
331
332    // For hStore1 && hStore2, set mock reference to one of the storeFiles
333    StoreFileInfo storeFileInfo1 = new ArrayList<>(hStore1.getStorefiles()).get(0).getFileInfo();
334    StoreFileInfo storeFileInfo2 = new ArrayList<>(hStore2.getStorefiles()).get(0).getFileInfo();
335    Field field = StoreFileInfo.class.getDeclaredField("reference");
336    field.setAccessible(true);
337    field.set(storeFileInfo1, Mockito.mock(Reference.class));
338    field.set(storeFileInfo2, Mockito.mock(Reference.class));
339    hStore1.triggerMajorCompaction();
340    hStore2.triggerMajorCompaction();
341
342    compactionContext = hStore1.requestCompaction();
343    assertTrue(compactionContext.isPresent());
344    // since we set mock reference to one of the storeFiles, we will get isAfterSplit=true &&
345    // highest priority for hStore1's compactionContext
346    assertTrue(compactionContext.get().getRequest().isAfterSplit());
347    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 1000);
348
349    compactionContext =
350      hStore2.requestCompaction(Integer.MIN_VALUE + 10, CompactionLifeCycleTracker.DUMMY, null);
351    assertTrue(compactionContext.isPresent());
352    // compaction request contains higher priority than default priority of daughter region
353    // compaction (Integer.MIN_VALUE + 1000), hence we are expecting request priority to
354    // be accepted.
355    assertTrue(compactionContext.get().getRequest().isAfterSplit());
356    assertEquals(compactionContext.get().getRequest().getPriority(), Integer.MIN_VALUE + 10);
357    admin.disableTable(tableName);
358    admin.deleteTable(tableName);
359  }
360
361  @Test
362  public void testContinuousSplitUsingLinkFile() throws Exception {
363    final TableName tableName = TableName.valueOf(name.getMethodName());
364    // Create table then get the single region for our new table.
365    byte[] cf = Bytes.toBytes("cf");
366    TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName)
367      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf));
368    String splitPolicy = ConstantSizeRegionSplitPolicy.class.getName();
369    builder.setValue(SPLIT_POLICY, splitPolicy);
370
371    admin.createTable(builder.build());
372    admin.compactionSwitch(false, new ArrayList<>());
373
374    assertNotEquals("Unable to retrieve regions of the table", -1,
375      TESTING_UTIL.waitFor(10000, () -> cluster.getRegions(tableName).size() == 1));
376    Table table = TESTING_UTIL.getConnection().getTable(tableName);
377    // insert data
378    insertData(tableName, admin, table, 10);
379    insertData(tableName, admin, table, 20);
380    insertData(tableName, admin, table, 40);
381    int rowCount = 3 * 4;
382    Scan scan = new Scan();
383    scanValidate(scan, rowCount, table);
384
385    // Split
386    admin.splitRegionAsync(cluster.getRegions(tableName).get(0).getRegionInfo().getRegionName(),
387      Bytes.toBytes("row14"));
388    // wait for the split to complete or get interrupted. If the split completes successfully,
389    // the procedure will return true; if the split fails, the procedure would throw exception.
390    Thread.sleep(3000);
391    assertNotEquals("Table is not split properly?", -1,
392      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 2));
393    // we have 2 daughter regions
394    HRegion hRegion1 = cluster.getRegions(tableName).get(0);
395    HRegion hRegion2 = cluster.getRegions(tableName).get(1);
396    HStore hStore1 = hRegion1.getStore(cf);
397    HStore hStore2 = hRegion2.getStore(cf);
398    // the sum of store files of the two children should be equal to their parent
399    assertEquals(3, hStore1.getStorefilesCount() + hStore2.getStorefilesCount());
400    // both the two children should have link files
401    for (StoreFile sf : hStore1.getStorefiles()) {
402      assertTrue(HFileLink.isHFileLink(sf.getPath()));
403    }
404    for (StoreFile sf : hStore2.getStorefiles()) {
405      assertTrue(HFileLink.isHFileLink(sf.getPath()));
406    }
407    // validate children data
408    scan = new Scan();
409    scanValidate(scan, rowCount, table);
410
411    // Continuous Split
412    findRegionToSplit(tableName, "row24");
413    Thread.sleep(3000);
414    assertNotEquals("Table is not split properly?", -1,
415      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 3));
416    // now table has 3 region, each region should have one link file
417    for (HRegion newRegion : cluster.getRegions(tableName)) {
418      assertEquals(1, newRegion.getStore(cf).getStorefilesCount());
419      assertTrue(
420        HFileLink.isHFileLink(newRegion.getStore(cf).getStorefiles().iterator().next().getPath()));
421    }
422
423    scan = new Scan();
424    scanValidate(scan, rowCount, table);
425
426    // Continuous Split, random split HFileLink, generate Reference files.
427    // After this, can not continuous split, because there are reference files.
428    findRegionToSplit(tableName, "row11");
429    Thread.sleep(3000);
430    assertNotEquals("Table is not split properly?", -1,
431      TESTING_UTIL.waitFor(3000, () -> cluster.getRegions(tableName).size() == 4));
432
433    scan = new Scan();
434    scanValidate(scan, rowCount, table);
435  }
436
437  private void findRegionToSplit(TableName tableName, String splitRowKey) throws Exception {
438    HRegion toSplit = null;
439    byte[] toSplitKey = Bytes.toBytes(splitRowKey);
440    for (HRegion rg : cluster.getRegions(tableName)) {
441      LOG.debug(
442        "startKey=" + Bytes.toStringBinary(rg.getRegionInfo().getStartKey()) + ", getEndKey()="
443          + Bytes.toStringBinary(rg.getRegionInfo().getEndKey()) + ", row=" + splitRowKey);
444      if (
445        (rg.getRegionInfo().getStartKey().length == 0 || CellComparator.getInstance().compare(
446          PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getStartKey()),
447          PrivateCellUtil.createFirstOnRow(toSplitKey)) <= 0)
448          && (rg.getRegionInfo().getEndKey().length == 0 || CellComparator.getInstance().compare(
449            PrivateCellUtil.createFirstOnRow(rg.getRegionInfo().getEndKey()),
450            PrivateCellUtil.createFirstOnRow(toSplitKey)) >= 0)
451      ) {
452        toSplit = rg;
453      }
454    }
455    assertNotNull(toSplit);
456    admin.splitRegionAsync(toSplit.getRegionInfo().getRegionName(), toSplitKey);
457  }
458
459  private static void scanValidate(Scan scan, int expectedRowCount, Table table)
460    throws IOException {
461    ResultScanner scanner = table.getScanner(scan);
462    int rows = 0;
463    for (Result result : scanner) {
464      rows++;
465    }
466    scanner.close();
467    assertEquals(expectedRowCount, rows);
468  }
469
470  public static class FailingSplitMasterObserver implements MasterCoprocessor, MasterObserver {
471    volatile CountDownLatch latch;
472
473    @Override
474    public void start(CoprocessorEnvironment e) throws IOException {
475      latch = new CountDownLatch(1);
476    }
477
478    @Override
479    public Optional<MasterObserver> getMasterObserver() {
480      return Optional.of(this);
481    }
482
483    @Override
484    public void preSplitRegionBeforeMETAAction(
485      final ObserverContext<MasterCoprocessorEnvironment> ctx, final byte[] splitKey,
486      final List<Mutation> metaEntries) throws IOException {
487      latch.countDown();
488      throw new IOException("Causing rollback of region split");
489    }
490  }
491
492  @Test
493  public void testSplitRollbackOnRegionClosing() throws Exception {
494    final TableName tableName = TableName.valueOf(name.getMethodName());
495
496    // Create table then get the single region for our new table.
497    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
498    List<HRegion> regions = cluster.getRegions(tableName);
499    RegionInfo hri = getAndCheckSingleTableRegion(regions);
500
501    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
502
503    RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
504
505    // Turn off balancer so it doesn't cut in and mess up our placements.
506    this.admin.balancerSwitch(false, true);
507    // Turn off the meta scanner so it don't remove parent on us.
508    cluster.getMaster().setCatalogJanitorEnabled(false);
509    try {
510      // Add a bit of load up into the table so splittable.
511      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
512      // Get region pre-split.
513      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
514      printOutRegions(server, "Initial regions: ");
515      int regionCount = cluster.getRegions(hri.getTable()).size();
516      regionStates.updateRegionState(hri, RegionState.State.CLOSING);
517
518      // Now try splitting.... should fail. And each should successfully
519      // rollback.
520      // We don't roll back here anymore. Instead we fail-fast on construction of the
521      // split transaction. Catch the exception instead.
522      try {
523        FutureUtils.get(this.admin.splitRegionAsync(hri.getRegionName()));
524        fail();
525      } catch (DoNotRetryRegionException e) {
526        // Expected
527      }
528      // Wait around a while and assert count of regions remains constant.
529      for (int i = 0; i < 10; i++) {
530        Thread.sleep(100);
531        assertEquals(regionCount, cluster.getRegions(hri.getTable()).size());
532      }
533      regionStates.updateRegionState(hri, State.OPEN);
534      // Now try splitting and it should work.
535      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
536      // Get daughters
537      checkAndGetDaughters(tableName);
538      // OK, so split happened after we cleared the blocking node.
539    } finally {
540      admin.balancerSwitch(true, false);
541      cluster.getMaster().setCatalogJanitorEnabled(true);
542      t.close();
543    }
544  }
545
546  /**
547   * Test that if daughter split on us, we won't do the shutdown handler fixup just because we can't
548   * find the immediate daughter of an offlined parent.
549   */
550  @Test
551  public void testShutdownFixupWhenDaughterHasSplit() throws Exception {
552    final TableName tableName = TableName.valueOf(name.getMethodName());
553
554    // Create table then get the single region for our new table.
555    Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
556    List<HRegion> regions = cluster.getRegions(tableName);
557    RegionInfo hri = getAndCheckSingleTableRegion(regions);
558    int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
559
560    // Turn off balancer so it doesn't cut in and mess up our placements.
561    this.admin.balancerSwitch(false, true);
562    // Turn off the meta scanner so it don't remove parent on us.
563    cluster.getMaster().setCatalogJanitorEnabled(false);
564    try {
565      // Add a bit of load up into the table so splittable.
566      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
567      // Get region pre-split.
568      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
569      printOutRegions(server, "Initial regions: ");
570      // Now split.
571      admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
572      // Get daughters
573      List<HRegion> daughters = checkAndGetDaughters(tableName);
574      // Now split one of the daughters.
575      HRegion daughterRegion = daughters.get(0);
576      RegionInfo daughter = daughterRegion.getRegionInfo();
577      LOG.info("Daughter we are going to split: " + daughter);
578      clearReferences(daughterRegion);
579      LOG.info("Finished {} references={}", daughterRegion, daughterRegion.hasReferences());
580      admin.splitRegionAsync(daughter.getRegionName()).get(2, TimeUnit.MINUTES);
581      // Get list of daughters
582      daughters = cluster.getRegions(tableName);
583      for (HRegion d : daughters) {
584        LOG.info("Regions before crash: " + d);
585      }
586      // Now crash the server
587      cluster.abortRegionServer(tableRegionIndex);
588      waitUntilRegionServerDead();
589      awaitDaughters(tableName, daughters.size());
590      // Assert daughters are online and ONLY the original daughters -- that
591      // fixup didn't insert one during server shutdown recover.
592      regions = cluster.getRegions(tableName);
593      for (HRegion d : daughters) {
594        LOG.info("Regions after crash: " + d);
595      }
596      if (daughters.size() != regions.size()) {
597        LOG.info("Daughters=" + daughters.size() + ", regions=" + regions.size());
598      }
599      assertEquals(daughters.size(), regions.size());
600      for (HRegion r : regions) {
601        LOG.info("Regions post crash " + r + ", contains=" + daughters.contains(r));
602        assertTrue("Missing region post crash " + r, daughters.contains(r));
603      }
604    } finally {
605      LOG.info("EXITING");
606      admin.balancerSwitch(true, false);
607      cluster.getMaster().setCatalogJanitorEnabled(true);
608      t.close();
609    }
610  }
611
612  private void clearReferences(HRegion region) throws IOException {
613    // Presumption.
614    assertEquals(1, region.getStores().size());
615    HStore store = region.getStores().get(0);
616    while (store.hasReferences()) {
617      while (store.storeEngine.getCompactor().isCompacting()) {
618        Threads.sleep(100);
619      }
620      // Run new compaction. Shoudn't be any others running.
621      region.compact(true);
622      store.closeAndArchiveCompactedFiles();
623    }
624  }
625
626  @Test
627  public void testSplitShouldNotThrowNPEEvenARegionHasEmptySplitFiles() throws Exception {
628    TableName userTableName = TableName.valueOf(name.getMethodName());
629    TableDescriptor htd = TableDescriptorBuilder.newBuilder(userTableName)
630      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("col")).build();
631    admin.createTable(htd);
632    Table table = TESTING_UTIL.getConnection().getTable(userTableName);
633    try {
634      for (int i = 0; i <= 5; i++) {
635        String row = "row" + i;
636        Put p = new Put(Bytes.toBytes(row));
637        String val = "Val" + i;
638        p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes(val));
639        table.put(p);
640        admin.flush(userTableName);
641        Delete d = new Delete(Bytes.toBytes(row));
642        // Do a normal delete
643        table.delete(d);
644        admin.flush(userTableName);
645      }
646      admin.majorCompact(userTableName);
647      List<RegionInfo> regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
648        .getRegionsOfTable(userTableName);
649      assertEquals(1, regionsOfTable.size());
650      RegionInfo hRegionInfo = regionsOfTable.get(0);
651      Put p = new Put(Bytes.toBytes("row6"));
652      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
653      table.put(p);
654      p = new Put(Bytes.toBytes("row7"));
655      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
656      table.put(p);
657      p = new Put(Bytes.toBytes("row8"));
658      p.addColumn(Bytes.toBytes("col"), Bytes.toBytes("ql"), Bytes.toBytes("val"));
659      table.put(p);
660      admin.flush(userTableName);
661      admin.splitRegionAsync(hRegionInfo.getRegionName(), Bytes.toBytes("row7"));
662      regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
663        .getRegionsOfTable(userTableName);
664
665      while (regionsOfTable.size() != 2) {
666        Thread.sleep(1000);
667        regionsOfTable = cluster.getMaster().getAssignmentManager().getRegionStates()
668          .getRegionsOfTable(userTableName);
669        LOG.debug("waiting 2 regions to be available, got " + regionsOfTable.size() + ": "
670          + regionsOfTable);
671
672      }
673      Assert.assertEquals(2, regionsOfTable.size());
674
675      Scan s = new Scan();
676      ResultScanner scanner = table.getScanner(s);
677      int mainTableCount = 0;
678      for (Result rr = scanner.next(); rr != null; rr = scanner.next()) {
679        mainTableCount++;
680      }
681      Assert.assertEquals(3, mainTableCount);
682    } finally {
683      table.close();
684    }
685  }
686
687  /**
688   * Verifies HBASE-5806. Here the case is that splitting is completed but before the CJ could
689   * remove the parent region the master is killed and restarted.
690   */
691  @Test
692  public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
693    throws IOException, InterruptedException, NodeExistsException, KeeperException,
694    ServiceException, ExecutionException, TimeoutException {
695    final TableName tableName = TableName.valueOf(name.getMethodName());
696    // Create table then get the single region for our new table.
697    try (Table t = createTableAndWait(tableName, HConstants.CATALOG_FAMILY)) {
698      List<HRegion> regions = cluster.getRegions(tableName);
699      RegionInfo hri = getAndCheckSingleTableRegion(regions);
700
701      int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
702
703      // Turn off balancer so it doesn't cut in and mess up our placements.
704      this.admin.balancerSwitch(false, true);
705      // Turn off the meta scanner so it don't remove parent on us.
706      cluster.getMaster().setCatalogJanitorEnabled(false);
707      // Add a bit of load up into the table so splittable.
708      TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY, false);
709      // Get region pre-split.
710      HRegionServer server = cluster.getRegionServer(tableRegionIndex);
711      printOutRegions(server, "Initial regions: ");
712      // Call split.
713      this.admin.splitRegionAsync(hri.getRegionName()).get(2, TimeUnit.MINUTES);
714      List<HRegion> daughters = checkAndGetDaughters(tableName);
715
716      // Before cleanup, get a new master.
717      HMaster master = abortAndWaitForMaster();
718      // Now call compact on the daughters and clean up any references.
719      for (HRegion daughter : daughters) {
720        clearReferences(daughter);
721        assertFalse(daughter.hasReferences());
722      }
723      // BUT calling compact on the daughters is not enough. The CatalogJanitor looks
724      // in the filesystem, and the filesystem content is not same as what the Region
725      // is reading from. Compacted-away files are picked up later by the compacted
726      // file discharger process. It runs infrequently. Make it run so CatalogJanitor
727      // doens't find any references.
728      for (RegionServerThread rst : cluster.getRegionServerThreads()) {
729        boolean oldSetting = rst.getRegionServer().compactedFileDischarger.setUseExecutor(false);
730        rst.getRegionServer().compactedFileDischarger.run();
731        rst.getRegionServer().compactedFileDischarger.setUseExecutor(oldSetting);
732      }
733      cluster.getMaster().setCatalogJanitorEnabled(true);
734      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
735      LOG.info("Starting run of CatalogJanitor");
736      cluster.getMaster().getCatalogJanitor().run();
737      ProcedureTestingUtility.waitAllProcedures(cluster.getMaster().getMasterProcedureExecutor());
738      RegionStates regionStates = master.getAssignmentManager().getRegionStates();
739      ServerName regionServerOfRegion = regionStates.getRegionServerOfRegion(hri);
740      assertEquals(null, regionServerOfRegion);
741    } finally {
742      TESTING_UTIL.getAdmin().balancerSwitch(true, false);
743      cluster.getMaster().setCatalogJanitorEnabled(true);
744    }
745  }
746
747  @Test
748  public void testSplitWithRegionReplicas() throws Exception {
749    final TableName tableName = TableName.valueOf(name.getMethodName());
750    TableDescriptor htd = TESTING_UTIL
751      .createModifyableTableDescriptor(TableName.valueOf(name.getMethodName()),
752        ColumnFamilyDescriptorBuilder.DEFAULT_MIN_VERSIONS, 3, HConstants.FOREVER,
753        ColumnFamilyDescriptorBuilder.DEFAULT_KEEP_DELETED)
754      .setRegionReplication(2).setCoprocessor(SlowMeCopro.class.getName()).build();
755    // Create table then get the single region for our new table.
756    Table t = TESTING_UTIL.createTable(htd, new byte[][] { Bytes.toBytes("cf") }, null);
757    List<HRegion> oldRegions;
758    do {
759      oldRegions = cluster.getRegions(tableName);
760      Thread.sleep(10);
761    } while (oldRegions.size() != 2);
762    for (HRegion h : oldRegions)
763      LOG.debug("OLDREGION " + h.getRegionInfo());
764    try {
765      int regionServerIndex =
766        cluster.getServerWith(oldRegions.get(0).getRegionInfo().getRegionName());
767      HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
768      insertData(tableName, admin, t);
769      // Turn off balancer so it doesn't cut in and mess up our placements.
770      admin.balancerSwitch(false, true);
771      // Turn off the meta scanner so it don't remove parent on us.
772      cluster.getMaster().setCatalogJanitorEnabled(false);
773      boolean tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
774      assertEquals("The specified table should be present.", true, tableExists);
775      final HRegion region = findSplittableRegion(oldRegions);
776      regionServerIndex = cluster.getServerWith(region.getRegionInfo().getRegionName());
777      regionServer = cluster.getRegionServer(regionServerIndex);
778      assertTrue("not able to find a splittable region", region != null);
779      try {
780        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
781      } catch (IOException e) {
782        e.printStackTrace();
783        fail("Split execution should have succeeded with no exceptions thrown " + e);
784      }
785      // TESTING_UTIL.waitUntilAllRegionsAssigned(tableName);
786      List<HRegion> newRegions;
787      do {
788        newRegions = cluster.getRegions(tableName);
789        for (HRegion h : newRegions)
790          LOG.debug("NEWREGION " + h.getRegionInfo());
791        Thread.sleep(1000);
792      } while (
793        (newRegions.contains(oldRegions.get(0)) || newRegions.contains(oldRegions.get(1)))
794          || newRegions.size() != 4
795      );
796      tableExists = TESTING_UTIL.getAdmin().tableExists(tableName);
797      assertEquals("The specified table should be present.", true, tableExists);
798      // exists works on stale and we see the put after the flush
799      byte[] b1 = Bytes.toBytes("row1");
800      Get g = new Get(b1);
801      g.setConsistency(Consistency.STRONG);
802      // The following GET will make a trip to the meta to get the new location of the 1st daughter
803      // In the process it will also get the location of the replica of the daughter (initially
804      // pointing to the parent's replica)
805      Result r = t.get(g);
806      Assert.assertFalse(r.isStale());
807      LOG.info("exists stale after flush done");
808
809      SlowMeCopro.getPrimaryCdl().set(new CountDownLatch(1));
810      g = new Get(b1);
811      g.setConsistency(Consistency.TIMELINE);
812      // This will succeed because in the previous GET we get the location of the replica
813      r = t.get(g);
814      Assert.assertTrue(r.isStale());
815      SlowMeCopro.getPrimaryCdl().get().countDown();
816    } finally {
817      SlowMeCopro.getPrimaryCdl().get().countDown();
818      admin.balancerSwitch(true, false);
819      cluster.getMaster().setCatalogJanitorEnabled(true);
820      t.close();
821    }
822  }
823
824  private void insertData(final TableName tableName, Admin admin, Table t) throws IOException {
825    insertData(tableName, admin, t, 1);
826  }
827
828  private void insertData(TableName tableName, Admin admin, Table t, int i) throws IOException {
829    Put p = new Put(Bytes.toBytes("row" + i));
830    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("1"));
831    t.put(p);
832    p = new Put(Bytes.toBytes("row" + (i + 1)));
833    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("2"));
834    t.put(p);
835    p = new Put(Bytes.toBytes("row" + (i + 2)));
836    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("3"));
837    t.put(p);
838    p = new Put(Bytes.toBytes("row" + (i + 3)));
839    p.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("q1"), Bytes.toBytes("4"));
840    t.put(p);
841    admin.flush(tableName);
842  }
843
844  /**
845   * If a table has regions that have no store files in a region, they should split successfully
846   * into two regions with no store files.
847   */
848  @Test
849  public void testSplitRegionWithNoStoreFiles() throws Exception {
850    final TableName tableName = TableName.valueOf(name.getMethodName());
851    // Create table then get the single region for our new table.
852    createTableAndWait(tableName, HConstants.CATALOG_FAMILY);
853    List<HRegion> regions = cluster.getRegions(tableName);
854    RegionInfo hri = getAndCheckSingleTableRegion(regions);
855    ensureTableRegionNotOnSameServerAsMeta(admin, hri);
856    int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionInfo().getRegionName());
857    HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
858    // Turn off balancer so it doesn't cut in and mess up our placements.
859    this.admin.balancerSwitch(false, true);
860    // Turn off the meta scanner so it don't remove parent on us.
861    cluster.getMaster().setCatalogJanitorEnabled(false);
862    try {
863      // Precondition: we created a table with no data, no store files.
864      printOutRegions(regionServer, "Initial regions: ");
865      Configuration conf = cluster.getConfiguration();
866      HBaseFsck.debugLsr(conf, new Path("/"));
867      Path rootDir = CommonFSUtils.getRootDir(conf);
868      FileSystem fs = TESTING_UTIL.getDFSCluster().getFileSystem();
869      Map<String, Path> storefiles = FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
870      assertEquals("Expected nothing but found " + storefiles.toString(), 0, storefiles.size());
871
872      // find a splittable region. Refresh the regions list
873      regions = cluster.getRegions(tableName);
874      final HRegion region = findSplittableRegion(regions);
875      assertTrue("not able to find a splittable region", region != null);
876
877      // Now split.
878      try {
879        requestSplitRegion(regionServer, region, Bytes.toBytes("row2"));
880      } catch (IOException e) {
881        fail("Split execution should have succeeded with no exceptions thrown");
882      }
883
884      // Postcondition: split the table with no store files into two regions, but still have no
885      // store files
886      List<HRegion> daughters = cluster.getRegions(tableName);
887      assertEquals(2, daughters.size());
888
889      // check dirs
890      HBaseFsck.debugLsr(conf, new Path("/"));
891      Map<String, Path> storefilesAfter =
892        FSUtils.getTableStoreFilePathMap(null, fs, rootDir, tableName);
893      assertEquals("Expected nothing but found " + storefilesAfter.toString(), 0,
894        storefilesAfter.size());
895
896      hri = region.getRegionInfo(); // split parent
897      AssignmentManager am = cluster.getMaster().getAssignmentManager();
898      RegionStates regionStates = am.getRegionStates();
899      long start = EnvironmentEdgeManager.currentTime();
900      while (!regionStates.isRegionInState(hri, State.SPLIT)) {
901        LOG.debug("Waiting for SPLIT state on: " + hri);
902        assertFalse("Timed out in waiting split parent to be in state SPLIT",
903          EnvironmentEdgeManager.currentTime() - start > 60000);
904        Thread.sleep(500);
905      }
906      assertTrue(regionStates.isRegionInState(daughters.get(0).getRegionInfo(), State.OPEN));
907      assertTrue(regionStates.isRegionInState(daughters.get(1).getRegionInfo(), State.OPEN));
908
909      // We should not be able to assign it again
910      try {
911        am.assign(hri);
912      } catch (DoNotRetryIOException e) {
913        // Expected
914      }
915      assertFalse("Split region can't be assigned", regionStates.isRegionInTransition(hri));
916      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
917
918      // We should not be able to unassign it either
919      try {
920        am.unassign(hri);
921        fail("Should have thrown exception");
922      } catch (DoNotRetryIOException e) {
923        // Expected
924      }
925      assertFalse("Split region can't be unassigned", regionStates.isRegionInTransition(hri));
926      assertTrue(regionStates.isRegionInState(hri, State.SPLIT));
927    } finally {
928      admin.balancerSwitch(true, false);
929      cluster.getMaster().setCatalogJanitorEnabled(true);
930    }
931  }
932
933  @Test
934  public void testStoreFileReferenceCreationWhenSplitPolicySaysToSkipRangeCheck() throws Exception {
935    final TableName tableName = TableName.valueOf(name.getMethodName());
936    try {
937      byte[] cf = Bytes.toBytes("f");
938      byte[] cf1 = Bytes.toBytes("i_f");
939      TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName)
940        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
941        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf1))
942        .setRegionSplitPolicyClassName(CustomSplitPolicy.class.getName()).build();
943      admin.createTable(htd);
944      List<HRegion> regions = awaitTableRegions(tableName);
945      HRegion region = regions.get(0);
946      for (int i = 3; i < 9; i++) {
947        Put p = new Put(Bytes.toBytes("row" + i));
948        p.addColumn(cf, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
949        p.addColumn(cf1, Bytes.toBytes("q"), Bytes.toBytes("value" + i));
950        region.put(p);
951      }
952      region.flush(true);
953      HStore store = region.getStore(cf);
954      Collection<HStoreFile> storefiles = store.getStorefiles();
955      assertEquals(1, storefiles.size());
956      assertFalse(region.hasReferences());
957      HRegionFileSystem hfs = region.getRegionFileSystem();
958      StoreFileTracker sft = StoreFileTrackerFactory.create(TESTING_UTIL.getConfiguration(), true,
959        store.getStoreContext());
960      Path referencePath = hfs.splitStoreFile(region.getRegionInfo(), "f",
961        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft);
962      assertNull(referencePath);
963      referencePath = hfs.splitStoreFile(region.getRegionInfo(), "i_f",
964        storefiles.iterator().next(), Bytes.toBytes("row1"), false, region.getSplitPolicy(), sft);
965      assertNotNull(referencePath);
966    } finally {
967      TESTING_UTIL.deleteTable(tableName);
968    }
969  }
970
971  private HRegion findSplittableRegion(final List<HRegion> regions) throws InterruptedException {
972    for (int i = 0; i < 5; ++i) {
973      for (HRegion r : regions) {
974        if (r.isSplittable() && r.getRegionInfo().getReplicaId() == 0) {
975          return (r);
976        }
977      }
978      Thread.sleep(100);
979    }
980    return null;
981  }
982
983  private List<HRegion> checkAndGetDaughters(TableName tableName) throws InterruptedException {
984    List<HRegion> daughters = null;
985    // try up to 10s
986    for (int i = 0; i < 100; i++) {
987      daughters = cluster.getRegions(tableName);
988      if (daughters.size() >= 2) {
989        break;
990      }
991      Thread.sleep(100);
992    }
993    assertTrue(daughters.size() >= 2);
994    return daughters;
995  }
996
997  private HMaster abortAndWaitForMaster() throws IOException, InterruptedException {
998    cluster.abortMaster(0);
999    cluster.waitOnMaster(0);
1000    HMaster master = cluster.startMaster().getMaster();
1001    cluster.waitForActiveAndReadyMaster();
1002    // reset the connections
1003    Closeables.close(admin, true);
1004    TESTING_UTIL.invalidateConnection();
1005    admin = TESTING_UTIL.getAdmin();
1006    return master;
1007  }
1008
1009  /**
1010   * Ensure single table region is not on same server as the single hbase:meta table region.
1011   * @return Index of the server hosting the single table region
1012   */
1013  private int ensureTableRegionNotOnSameServerAsMeta(final Admin admin, final RegionInfo hri)
1014    throws IOException, MasterNotRunningException, ZooKeeperConnectionException,
1015    InterruptedException {
1016    // Now make sure that the table region is not on same server as that hosting
1017    // hbase:meta We don't want hbase:meta replay polluting our test when we later crash
1018    // the table region serving server.
1019    int metaServerIndex = cluster.getServerWithMeta();
1020    HRegionServer metaRegionServer = cluster.getRegionServer(metaServerIndex);
1021    int tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1022    assertTrue(tableRegionIndex != -1);
1023    HRegionServer tableRegionServer = cluster.getRegionServer(tableRegionIndex);
1024    LOG.info("MetaRegionServer=" + metaRegionServer.getServerName() + ", other="
1025      + tableRegionServer.getServerName());
1026    if (metaRegionServer.getServerName().equals(tableRegionServer.getServerName())) {
1027      HRegionServer hrs = getOtherRegionServer(cluster, metaRegionServer);
1028      assertNotNull(hrs);
1029      assertNotNull(hri);
1030      LOG.info("Moving " + hri.getRegionNameAsString() + " from " + metaRegionServer.getServerName()
1031        + " to " + hrs.getServerName() + "; metaServerIndex=" + metaServerIndex);
1032      admin.move(hri.getEncodedNameAsBytes(), hrs.getServerName());
1033    }
1034    // Wait till table region is up on the server that is NOT carrying hbase:meta.
1035    for (int i = 0; i < 100; i++) {
1036      tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1037      if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
1038      LOG.debug("Waiting on region move off the hbase:meta server; current index "
1039        + tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
1040      Thread.sleep(100);
1041    }
1042    assertTrue("Region not moved off hbase:meta server, tableRegionIndex=" + tableRegionIndex,
1043      tableRegionIndex != -1 && tableRegionIndex != metaServerIndex);
1044    // Verify for sure table region is not on same server as hbase:meta
1045    tableRegionIndex = cluster.getServerWith(hri.getRegionName());
1046    assertTrue(tableRegionIndex != -1);
1047    assertNotSame(metaServerIndex, tableRegionIndex);
1048    return tableRegionIndex;
1049  }
1050
1051  /**
1052   * Find regionserver other than the one passed. Can't rely on indexes into list of regionservers
1053   * since crashed servers occupy an index.
1054   * @return A regionserver that is not <code>notThisOne</code> or null if none found
1055   */
1056  private HRegionServer getOtherRegionServer(final SingleProcessHBaseCluster cluster,
1057    final HRegionServer notThisOne) {
1058    for (RegionServerThread rst : cluster.getRegionServerThreads()) {
1059      HRegionServer hrs = rst.getRegionServer();
1060      if (hrs.getServerName().equals(notThisOne.getServerName())) continue;
1061      if (hrs.isStopping() || hrs.isStopped()) continue;
1062      return hrs;
1063    }
1064    return null;
1065  }
1066
1067  private void printOutRegions(final HRegionServer hrs, final String prefix) throws IOException {
1068    List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
1069    for (RegionInfo region : regions) {
1070      LOG.info(prefix + region.getRegionNameAsString());
1071    }
1072  }
1073
1074  private void waitUntilRegionServerDead() throws InterruptedException, IOException {
1075    // Wait until the master processes the RS shutdown
1076    for (int i =
1077      0; (cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1078        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS) && i < 100; i++) {
1079      LOG.info("Waiting on server to go down");
1080      Thread.sleep(100);
1081    }
1082    assertFalse("Waited too long for RS to die",
1083      cluster.getMaster().getClusterMetrics().getLiveServerMetrics().size() > NB_SERVERS
1084        || cluster.getLiveRegionServerThreads().size() > NB_SERVERS);
1085  }
1086
1087  private void awaitDaughters(TableName tableName, int numDaughters) throws InterruptedException {
1088    // Wait till regions are back on line again.
1089    for (int i = 0; cluster.getRegions(tableName).size() < numDaughters && i < 60; i++) {
1090      LOG.info("Waiting for repair to happen");
1091      Thread.sleep(1000);
1092    }
1093    if (cluster.getRegions(tableName).size() < numDaughters) {
1094      fail("Waiting too long for daughter regions");
1095    }
1096  }
1097
1098  private List<HRegion> awaitTableRegions(final TableName tableName) throws InterruptedException {
1099    List<HRegion> regions = null;
1100    for (int i = 0; i < 100; i++) {
1101      regions = cluster.getRegions(tableName);
1102      if (regions.size() > 0) break;
1103      Thread.sleep(100);
1104    }
1105    return regions;
1106  }
1107
1108  private Table createTableAndWait(TableName tableName, byte[] cf)
1109    throws IOException, InterruptedException {
1110    Table t = TESTING_UTIL.createTable(tableName, cf);
1111    awaitTableRegions(tableName);
1112    assertTrue("Table not online: " + tableName, cluster.getRegions(tableName).size() != 0);
1113    return t;
1114  }
1115
1116  // Make it public so that JVMClusterUtil can access it.
1117  public static class MyMaster extends HMaster {
1118    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
1119      super(conf);
1120    }
1121
1122    @Override
1123    protected MasterRpcServices createRpcServices() throws IOException {
1124      return new MyMasterRpcServices(this);
1125    }
1126  }
1127
1128  static class MyMasterRpcServices extends MasterRpcServices {
1129    static AtomicBoolean enabled = new AtomicBoolean(false);
1130
1131    private HMaster myMaster;
1132
1133    public MyMasterRpcServices(HMaster master) throws IOException {
1134      super(master);
1135      myMaster = master;
1136    }
1137
1138    @Override
1139    public ReportRegionStateTransitionResponse reportRegionStateTransition(RpcController c,
1140      ReportRegionStateTransitionRequest req) throws ServiceException {
1141      ReportRegionStateTransitionResponse resp = super.reportRegionStateTransition(c, req);
1142      if (
1143        enabled.get()
1144          && req.getTransition(0).getTransitionCode().equals(TransitionCode.READY_TO_SPLIT)
1145          && !resp.hasErrorMessage()
1146      ) {
1147        RegionStates regionStates = myMaster.getAssignmentManager().getRegionStates();
1148        for (RegionStateNode regionState : regionStates.getRegionsInTransition()) {
1149          /*
1150           * TODO!!!! // Find the merging_new region and remove it if (regionState.isSplittingNew())
1151           * { regionStates.deleteRegion(regionState.getRegion()); }
1152           */
1153        }
1154      }
1155      return resp;
1156    }
1157  }
1158
1159  static class CustomSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
1160
1161    @Override
1162    protected boolean shouldSplit() {
1163      return true;
1164    }
1165
1166    @Override
1167    public boolean skipStoreFileRangeCheck(String familyName) {
1168      if (familyName.startsWith("i_")) {
1169        return true;
1170      } else {
1171        return false;
1172      }
1173    }
1174  }
1175}