001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.assertProcFailed;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.List;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.TableName;
033import org.apache.hadoop.hbase.client.Admin;
034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
035import org.apache.hadoop.hbase.client.Put;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.client.SnapshotDescription;
038import org.apache.hadoop.hbase.client.SnapshotType;
039import org.apache.hadoop.hbase.client.Table;
040import org.apache.hadoop.hbase.client.TableDescriptor;
041import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
042import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
043import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
044import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
045import org.apache.hadoop.hbase.master.procedure.TestSnapshotProcedure;
046import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
047import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
048import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
049import org.apache.hadoop.hbase.regionserver.HRegion;
050import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
051import org.apache.hadoop.hbase.testclassification.LargeTests;
052import org.apache.hadoop.hbase.testclassification.MasterTests;
053import org.apache.hadoop.hbase.util.Bytes;
054import org.apache.hadoop.hbase.util.Threads;
055import org.junit.After;
056import org.junit.AfterClass;
057import org.junit.Before;
058import org.junit.BeforeClass;
059import org.junit.ClassRule;
060import org.junit.Rule;
061import org.junit.Test;
062import org.junit.experimental.categories.Category;
063import org.junit.rules.TestName;
064import org.slf4j.Logger;
065import org.slf4j.LoggerFactory;
066
067import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
068import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
069
070@Category({ MasterTests.class, LargeTests.class })
071public class TestMergeTableRegionsProcedure {
072
073  @ClassRule
074  public static final HBaseClassTestRule CLASS_RULE =
075    HBaseClassTestRule.forClass(TestMergeTableRegionsProcedure.class);
076
077  private static final Logger LOG = LoggerFactory.getLogger(TestMergeTableRegionsProcedure.class);
078  @Rule
079  public final TestName name = new TestName();
080
081  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
082
083  private static final int initialRegionCount = 4;
084  private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
085  private static Admin admin;
086
087  private ProcedureMetrics mergeProcMetrics;
088  private ProcedureMetrics assignProcMetrics;
089  private ProcedureMetrics unassignProcMetrics;
090  private long mergeSubmittedCount = 0;
091  private long mergeFailedCount = 0;
092  private long assignSubmittedCount = 0;
093  private long assignFailedCount = 0;
094  private long unassignSubmittedCount = 0;
095  private long unassignFailedCount = 0;
096
097  private static void setupConf(Configuration conf) {
098    // Reduce the maximum attempts to speed up the test
099    conf.setInt("hbase.assignment.maximum.attempts", 3);
100    conf.setInt("hbase.master.maximum.ping.server.attempts", 3);
101    conf.setInt("hbase.master.ping.server.retry.sleep.interval", 1);
102    conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
103  }
104
105  @BeforeClass
106  public static void setupCluster() throws Exception {
107    setupConf(UTIL.getConfiguration());
108    UTIL.startMiniCluster(1);
109    admin = UTIL.getAdmin();
110  }
111
112  @AfterClass
113  public static void cleanupTest() throws Exception {
114    UTIL.shutdownMiniCluster();
115  }
116
117  @Before
118  public void setup() throws Exception {
119    resetProcExecutorTestingKillFlag();
120    MasterProcedureTestingUtility.generateNonceGroup(UTIL.getHBaseCluster().getMaster());
121    MasterProcedureTestingUtility.generateNonce(UTIL.getHBaseCluster().getMaster());
122    // Turn off balancer so it doesn't cut in and mess up our placements.
123    admin.balancerSwitch(false, true);
124    // Turn off the meta scanner so it don't remove parent on us.
125    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(false);
126    resetProcExecutorTestingKillFlag();
127    AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
128    mergeProcMetrics = am.getAssignmentManagerMetrics().getMergeProcMetrics();
129    assignProcMetrics = am.getAssignmentManagerMetrics().getAssignProcMetrics();
130    unassignProcMetrics = am.getAssignmentManagerMetrics().getUnassignProcMetrics();
131  }
132
133  @After
134  public void tearDown() throws Exception {
135    resetProcExecutorTestingKillFlag();
136    for (TableDescriptor htd : admin.listTableDescriptors()) {
137      LOG.info("Tear down, remove table=" + htd.getTableName());
138      UTIL.deleteTable(htd.getTableName());
139    }
140  }
141
142  private void resetProcExecutorTestingKillFlag() {
143    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
144    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
145    assertTrue("expected executor to be running", procExec.isRunning());
146  }
147
148  private int loadARowPerRegion(final Table t, List<RegionInfo> ris) throws IOException {
149    List<Put> puts = new ArrayList<>();
150    for (RegionInfo ri : ris) {
151      Put put = new Put(ri.getStartKey() == null || ri.getStartKey().length == 0
152        ? new byte[] { 'a' }
153        : ri.getStartKey());
154      put.addColumn(HConstants.CATALOG_FAMILY, HConstants.CATALOG_FAMILY,
155        HConstants.CATALOG_FAMILY);
156      puts.add(put);
157    }
158    t.put(puts);
159    return puts.size();
160  }
161
162  /**
163   * This tests two region merges
164   */
165  @Test
166  public void testMergeTwoRegions() throws Exception {
167    final TableName tableName = TableName.valueOf(this.name.getMethodName());
168    UTIL.createTable(tableName, new byte[][] { HConstants.CATALOG_FAMILY }, new byte[][] {
169      new byte[] { 'b' }, new byte[] { 'c' }, new byte[] { 'd' }, new byte[] { 'e' } });
170    testMerge(tableName, 2);
171  }
172
173  private void testMerge(TableName tableName, int mergeCount) throws IOException {
174    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName);
175    int originalRegionCount = ris.size();
176    assertTrue(originalRegionCount > mergeCount);
177    RegionInfo[] regionsToMerge = ris.subList(0, mergeCount).toArray(new RegionInfo[] {});
178    int countOfRowsLoaded = 0;
179    try (Table table = UTIL.getConnection().getTable(tableName)) {
180      countOfRowsLoaded = loadARowPerRegion(table, ris);
181    }
182    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
183
184    // collect AM metrics before test
185    collectAssignmentManagerMetrics();
186    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
187    MergeTableRegionsProcedure proc =
188      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true);
189    long procId = procExec.submitProcedure(proc);
190    ProcedureTestingUtility.waitProcedure(procExec, procId);
191    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
192    MetaTableAccessor.fullScanMetaAndPrint(UTIL.getConnection());
193    assertEquals(originalRegionCount - mergeCount + 1,
194      MetaTableAccessor.getTableRegions(UTIL.getConnection(), tableName).size());
195
196    assertEquals(mergeSubmittedCount + 1, mergeProcMetrics.getSubmittedCounter().getCount());
197    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
198    assertEquals(assignSubmittedCount + 1, assignProcMetrics.getSubmittedCounter().getCount());
199    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
200    assertEquals(unassignSubmittedCount + mergeCount,
201      unassignProcMetrics.getSubmittedCounter().getCount());
202    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
203
204    // Need to get the references cleaned out. Close of region will move them
205    // to archive so disable and reopen just to get rid of references to later
206    // when the catalogjanitor runs, it can do merged region cleanup.
207    admin.disableTable(tableName);
208    admin.enableTable(tableName);
209
210    // Can I purge the merged regions from hbase:meta? Check that all went
211    // well by looking at the merged row up in hbase:meta. It should have no
212    // more mention of the merged regions; they are purged as last step in
213    // the merged regions cleanup.
214    UTIL.getHBaseCluster().getMaster().setCatalogJanitorEnabled(true);
215    UTIL.getHBaseCluster().getMaster().getCatalogJanitor().triggerNow();
216    RegionInfo mergedRegion = proc.getMergedRegion();
217    while (ris != null && ris.get(0) != null && ris.get(1) != null) {
218      ris = MetaTableAccessor.getMergeRegions(UTIL.getConnection(), mergedRegion);
219      LOG.info("{} {}", Bytes.toStringBinary(mergedRegion.getRegionName()), ris);
220      Threads.sleep(1000);
221    }
222    assertEquals(countOfRowsLoaded, UTIL.countRows(tableName));
223  }
224
225  /**
226   * This tests ten region merges in one go.
227   */
228  @Test
229  public void testMergeTenRegions() throws Exception {
230    final TableName tableName = TableName.valueOf(this.name.getMethodName());
231    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
232    UTIL.createMultiRegionTable(tableName, HConstants.CATALOG_FAMILY);
233    testMerge(tableName, 10);
234  }
235
236  /**
237   * This tests two concurrent region merges
238   */
239  @Test
240  public void testMergeRegionsConcurrently() throws Exception {
241    final TableName tableName = TableName.valueOf("testMergeRegionsConcurrently");
242    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
243
244    List<RegionInfo> tableRegions = createTable(tableName);
245
246    RegionInfo[] regionsToMerge1 = new RegionInfo[2];
247    RegionInfo[] regionsToMerge2 = new RegionInfo[2];
248    regionsToMerge1[0] = tableRegions.get(0);
249    regionsToMerge1[1] = tableRegions.get(1);
250    regionsToMerge2[0] = tableRegions.get(2);
251    regionsToMerge2[1] = tableRegions.get(3);
252
253    // collect AM metrics before test
254    collectAssignmentManagerMetrics();
255
256    long procId1 = procExec.submitProcedure(
257      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge1, true));
258    long procId2 = procExec.submitProcedure(
259      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge2, true));
260    ProcedureTestingUtility.waitProcedure(procExec, procId1);
261    ProcedureTestingUtility.waitProcedure(procExec, procId2);
262    ProcedureTestingUtility.assertProcNotFailed(procExec, procId1);
263    ProcedureTestingUtility.assertProcNotFailed(procExec, procId2);
264    assertRegionCount(tableName, initialRegionCount - 2);
265
266    assertEquals(mergeSubmittedCount + 2, mergeProcMetrics.getSubmittedCounter().getCount());
267    assertEquals(mergeFailedCount, mergeProcMetrics.getFailedCounter().getCount());
268    assertEquals(assignSubmittedCount + 2, assignProcMetrics.getSubmittedCounter().getCount());
269    assertEquals(assignFailedCount, assignProcMetrics.getFailedCounter().getCount());
270    assertEquals(unassignSubmittedCount + 4, unassignProcMetrics.getSubmittedCounter().getCount());
271    assertEquals(unassignFailedCount, unassignProcMetrics.getFailedCounter().getCount());
272  }
273
274  @Test
275  public void testRecoveryAndDoubleExecution() throws Exception {
276    final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecution");
277    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
278
279    List<RegionInfo> tableRegions = createTable(tableName);
280
281    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
282    ProcedureTestingUtility.setKillIfHasParent(procExec, false);
283    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
284
285    RegionInfo[] regionsToMerge = new RegionInfo[2];
286    regionsToMerge[0] = tableRegions.get(0);
287    regionsToMerge[1] = tableRegions.get(1);
288
289    long procId = procExec.submitProcedure(
290      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
291
292    // Restart the executor and execute the step twice
293    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId);
294    ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
295
296    assertRegionCount(tableName, initialRegionCount - 1);
297  }
298
299  @Test
300  public void testRollbackAndDoubleExecution() throws Exception {
301    final TableName tableName = TableName.valueOf("testRollbackAndDoubleExecution");
302    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
303
304    List<RegionInfo> tableRegions = createTable(tableName);
305
306    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
307    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
308
309    RegionInfo[] regionsToMerge = new RegionInfo[2];
310    regionsToMerge[0] = tableRegions.get(0);
311    regionsToMerge[1] = tableRegions.get(1);
312
313    long procId = procExec.submitProcedure(
314      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
315
316    // Failing before MERGE_TABLE_REGIONS_UPDATE_META we should trigger the rollback
317    // NOTE: the 8 (number of MERGE_TABLE_REGIONS_UPDATE_META step) is
318    // hardcoded, so you have to look at this test at least once when you add a new step.
319    int lastStep = 8;
320    MasterProcedureTestingUtility.testRollbackAndDoubleExecution(procExec, procId, lastStep, true);
321    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
322    UTIL.waitUntilAllRegionsAssigned(tableName);
323    List<HRegion> regions = UTIL.getMiniHBaseCluster().getRegions(tableName);
324    assertEquals(initialRegionCount, regions.size());
325  }
326
327  @Test
328  public void testMergeWithoutPONR() throws Exception {
329    final TableName tableName = TableName.valueOf("testMergeWithoutPONR");
330    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
331
332    List<RegionInfo> tableRegions = createTable(tableName);
333
334    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
335    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true);
336
337    RegionInfo[] regionsToMerge = new RegionInfo[2];
338    regionsToMerge[0] = tableRegions.get(0);
339    regionsToMerge[1] = tableRegions.get(1);
340
341    long procId = procExec.submitProcedure(
342      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
343
344    // Execute until step 9 of split procedure
345    // NOTE: step 9 is after step MERGE_TABLE_REGIONS_UPDATE_META
346    MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId, 9, false);
347
348    // Unset Toggle Kill and make ProcExec work correctly
349    ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, false);
350    MasterProcedureTestingUtility.restartMasterProcedureExecutor(procExec);
351    ProcedureTestingUtility.waitProcedure(procExec, procId);
352
353    assertRegionCount(tableName, initialRegionCount - 1);
354  }
355
356  @Test
357  public void testMergingRegionWhileTakingSnapshot() throws Exception {
358    final TableName tableName = TableName.valueOf("testMergingRegionWhileTakingSnapshot");
359    final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
360
361    List<RegionInfo> tableRegions = createTable(tableName);
362
363    ProcedureTestingUtility.waitNoProcedureRunning(procExec);
364
365    SnapshotDescription snapshot =
366      new SnapshotDescription("SnapshotProcedureTest", tableName, SnapshotType.FLUSH);
367    SnapshotProtos.SnapshotDescription snapshotProto =
368      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshot);
369    snapshotProto = SnapshotDescriptionUtils.validate(snapshotProto,
370      UTIL.getHBaseCluster().getMaster().getConfiguration());
371    long snapshotProcId = procExec.submitProcedure(
372      new TestSnapshotProcedure.DelaySnapshotProcedure(procExec.getEnvironment(), snapshotProto));
373    UTIL.getHBaseCluster().getMaster().getSnapshotManager().registerSnapshotProcedure(snapshotProto,
374      snapshotProcId);
375
376    RegionInfo[] regionsToMerge = new RegionInfo[2];
377    regionsToMerge[0] = tableRegions.get(0);
378    regionsToMerge[1] = tableRegions.get(1);
379
380    long mergeProcId = procExec.submitProcedure(
381      new MergeTableRegionsProcedure(procExec.getEnvironment(), regionsToMerge, true));
382
383    ProcedureTestingUtility
384      .waitProcedure(UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), mergeProcId);
385    ProcedureTestingUtility.waitProcedure(
386      UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(), snapshotProcId);
387
388    assertProcFailed(procExec, mergeProcId);
389    assertEquals(initialRegionCount, UTIL.getAdmin().getRegions(tableName).size());
390  }
391
392  private List<RegionInfo> createTable(final TableName tableName) throws Exception {
393    TableDescriptor desc = TableDescriptorBuilder.newBuilder(tableName)
394      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build();
395    byte[][] splitRows = new byte[initialRegionCount - 1][];
396    for (int i = 0; i < splitRows.length; ++i) {
397      splitRows[i] = Bytes.toBytes(String.format("%d", i));
398    }
399    admin.createTable(desc, splitRows);
400    return assertRegionCount(tableName, initialRegionCount);
401  }
402
403  public List<RegionInfo> assertRegionCount(final TableName tableName, final int nregions)
404    throws Exception {
405    UTIL.waitUntilNoRegionsInTransition();
406    List<RegionInfo> tableRegions = admin.getRegions(tableName);
407    assertEquals(nregions, tableRegions.size());
408    return tableRegions;
409  }
410
411  private ProcedureExecutor<MasterProcedureEnv> getMasterProcedureExecutor() {
412    return UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor();
413  }
414
415  private void collectAssignmentManagerMetrics() {
416    mergeSubmittedCount = mergeProcMetrics.getSubmittedCounter().getCount();
417    mergeFailedCount = mergeProcMetrics.getFailedCounter().getCount();
418
419    assignSubmittedCount = assignProcMetrics.getSubmittedCounter().getCount();
420    assignFailedCount = assignProcMetrics.getFailedCounter().getCount();
421    unassignSubmittedCount = unassignProcMetrics.getSubmittedCounter().getCount();
422    unassignFailedCount = unassignProcMetrics.getFailedCounter().getCount();
423  }
424}