001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import org.apache.hadoop.hbase.CatalogFamilyFormat;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellBuilderFactory;
032import org.apache.hadoop.hbase.CellBuilderType;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.MetaTableAccessor;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.Put;
039import org.apache.hadoop.hbase.client.RegionInfo;
040import org.apache.hadoop.hbase.client.RegionInfoBuilder;
041import org.apache.hadoop.hbase.client.Result;
042import org.apache.hadoop.hbase.client.Table;
043import org.apache.hadoop.hbase.master.HMaster;
044import org.apache.hadoop.hbase.master.MasterServices;
045import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
046import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
047import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
048import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
049import org.apache.hadoop.hbase.master.assignment.RegionStates;
050import org.apache.hadoop.hbase.master.hbck.HbckChore;
051import org.apache.hadoop.hbase.master.hbck.HbckReport;
052import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
053import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
054import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
055import org.apache.hadoop.hbase.testclassification.LargeTests;
056import org.apache.hadoop.hbase.testclassification.MasterTests;
057import org.apache.hadoop.hbase.util.Bytes;
058import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
059import org.apache.hadoop.hbase.util.Pair;
060import org.apache.hadoop.hbase.util.Threads;
061import org.junit.AfterClass;
062import org.junit.BeforeClass;
063import org.junit.ClassRule;
064import org.junit.Rule;
065import org.junit.Test;
066import org.junit.experimental.categories.Category;
067import org.junit.rules.TestName;
068
069@Category({ MasterTests.class, LargeTests.class })
070public class TestMetaFixer {
071  @ClassRule
072  public static final HBaseClassTestRule CLASS_RULE =
073    HBaseClassTestRule.forClass(TestMetaFixer.class);
074  @Rule
075  public TestName name = new TestName();
076
077  private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
078
079  @BeforeClass
080  public static void setupBeforeClass() throws Exception {
081    TEST_UTIL.startMiniCluster();
082  }
083
084  @AfterClass
085  public static void tearDownAfterClass() throws Exception {
086    TEST_UTIL.shutdownMiniCluster();
087  }
088
089  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
090    services.getAssignmentManager().getRegionStateStore().deleteRegion(ri);
091    // Delete it from Master context too else it sticks around.
092    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
093  }
094
095  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
096    throws Exception {
097    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
098    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
099    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
100    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
101    services.getCatalogJanitor().scan();
102    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
103    assertTrue(report.isEmpty());
104    int originalCount = ris.size();
105    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount
106    // regions.
107    for (int i = 0; i < replicaCount; i++) {
108      deleteRegion(services, ris.get(3 * replicaCount + i));
109      deleteRegion(services, ris.get(i));
110      deleteRegion(services, ris.get(ris.size() - 1 - i));
111    }
112    assertEquals(initialSize - 3 * replicaCount,
113      services.getAssignmentManager().getRegionStates().getRegionStates().size());
114    services.getCatalogJanitor().scan();
115    report = services.getCatalogJanitor().getLastReport();
116    assertEquals(report.toString(), 3, report.getHoles().size());
117    MetaFixer fixer = new MetaFixer(services);
118    fixer.fixHoles(report);
119    services.getCatalogJanitor().scan();
120    report = services.getCatalogJanitor().getLastReport();
121    assertTrue(report.toString(), report.isEmpty());
122    assertEquals(initialSize,
123      services.getAssignmentManager().getRegionStates().getRegionStates().size());
124
125    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
126    // watchdog TestRule terminates the test.
127    HBaseTestingUtil.await(50,
128      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
129
130    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
131    assertEquals(originalCount, ris.size());
132  }
133
134  @Test
135  public void testPlugsHoles() throws Exception {
136    TableName tn = TableName.valueOf(this.name.getMethodName());
137    testPlugsHolesWithReadReplicaInternal(tn, 1);
138  }
139
140  @Test
141  public void testPlugsHolesWithReadReplica() throws Exception {
142    TableName tn = TableName.valueOf(this.name.getMethodName());
143    testPlugsHolesWithReadReplicaInternal(tn, 3);
144  }
145
146  /**
147   * Just make sure running fixMeta does right thing for the case of a single-region Table where the
148   * region gets dropped. There is nothing much we can do. We can't restore what we don't know about
149   * (at least from a read of hbase:meta).
150   */
151  @Test
152  public void testOneRegionTable() throws IOException {
153    TableName tn = TableName.valueOf(this.name.getMethodName());
154    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
155    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
156    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
157    services.getCatalogJanitor().scan();
158    deleteRegion(services, ris.get(0));
159    services.getCatalogJanitor().scan();
160    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
161    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
162    assertTrue(ris.isEmpty());
163    MetaFixer fixer = new MetaFixer(services);
164    fixer.fixHoles(report);
165    report = services.getCatalogJanitor().getLastReport();
166    assertTrue(report.isEmpty());
167    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
168    assertEquals(0, ris.size());
169  }
170
171  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
172    throws IOException {
173    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
174      .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
175    MetaTableAccessor.putsToMetaTable(services.getConnection(),
176      Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
177        EnvironmentEdgeManager.currentTime())));
178    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
179    long assign = services.getAssignmentManager().assign(overlapRegion);
180    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign);
181    return overlapRegion;
182  }
183
184  private void testOverlapCommon(final TableName tn) throws Exception {
185    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
186    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
187    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
188    assertTrue(ris.size() > 5);
189    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
190    services.getCatalogJanitor().scan();
191    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
192    assertTrue(report.isEmpty());
193    // Make a simple overlap spanning second and third region.
194    makeOverlap(services, ris.get(1), ris.get(3));
195    makeOverlap(services, ris.get(2), ris.get(3));
196    makeOverlap(services, ris.get(2), ris.get(4));
197  }
198
199  @Test
200  public void testOverlap() throws Exception {
201    TableName tn = TableName.valueOf(this.name.getMethodName());
202    testOverlapCommon(tn);
203    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
204    HbckChore hbckChore = services.getHbckChore();
205
206    CatalogJanitor cj = services.getCatalogJanitor();
207    cj.scan();
208    CatalogJanitorReport report = cj.getLastReport();
209    assertEquals(6, report.getOverlaps().size());
210    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
211    MetaFixer fixer = new MetaFixer(services);
212    fixer.fixOverlaps(report);
213
214    HBaseTestingUtil.await(10, () -> {
215      try {
216        if (cj.scan() > 0) {
217          // It submits GC once, then it will immediately kick off another GC to test if
218          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
219          // a hole.
220          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
221          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
222            List<RegionInfo> parents = CatalogFamilyFormat.getMergeRegions(e.getValue().rawCells());
223            if (parents != null) {
224              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
225              pe.submitProcedure(
226                new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents));
227            }
228          }
229          return true;
230        }
231        return false;
232      } catch (Exception e) {
233        throw new RuntimeException(e);
234      }
235    });
236
237    // Wait until all GCs settled down
238    HBaseTestingUtil.await(10, () -> {
239      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
240    });
241
242    // No orphan regions on FS
243    hbckChore.choreForTesting();
244    HbckReport hbckReport = hbckChore.getLastReport();
245    assertNotNull(hbckReport);
246    assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
247
248    // No holes reported.
249    cj.scan();
250    final CatalogJanitorReport postReport = cj.getLastReport();
251    assertTrue(postReport.isEmpty());
252  }
253
254  @Test
255  public void testMultipleTableOverlaps() throws Exception {
256    TableName t1 = TableName.valueOf("t1");
257    TableName t2 = TableName.valueOf("t2");
258    TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY });
259    TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY });
260    TEST_UTIL.waitTableAvailable(t2);
261
262    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
263    services.getCatalogJanitor().scan();
264    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
265    assertTrue(report.isEmpty());
266
267    // Make a simple overlap for t1
268    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1);
269    makeOverlap(services, ris.get(1), ris.get(2));
270    // Make a simple overlap for t2
271    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2);
272    makeOverlap(services, ris.get(1), ris.get(2));
273
274    services.getCatalogJanitor().scan();
275    report = services.getCatalogJanitor().getLastReport();
276    assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size());
277
278    MetaFixer fixer = new MetaFixer(services);
279    List<Long> longs = fixer.fixOverlaps(report);
280    long[] procIds = longs.stream().mapToLong(l -> l).toArray();
281    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds);
282
283    // After fix, verify no overlaps are left.
284    services.getCatalogJanitor().scan();
285    report = services.getCatalogJanitor().getLastReport();
286    assertTrue("After fix there should not have been any overlaps.", report.isEmpty());
287  }
288
289  @Test
290  public void testOverlapWithSmallMergeCount() throws Exception {
291    TableName tn = TableName.valueOf(this.name.getMethodName());
292    try {
293      testOverlapCommon(tn);
294      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
295      CatalogJanitor cj = services.getCatalogJanitor();
296      cj.scan();
297      CatalogJanitorReport report = cj.getLastReport();
298      assertEquals(6, report.getOverlaps().size());
299      assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size());
300
301      // The max merge count is set to 5 so overlap regions are divided into
302      // two merge requests.
303      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
304        .setInt("hbase.master.metafixer.max.merge.count", 5);
305
306      // Get overlap regions
307      HashSet<String> overlapRegions = new HashSet<>();
308      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
309        overlapRegions.add(pair.getFirst().getRegionNameAsString());
310        overlapRegions.add(pair.getSecond().getRegionNameAsString());
311      }
312
313      MetaFixer fixer = new MetaFixer(services);
314      fixer.fixOverlaps(report);
315      AssignmentManager am = services.getAssignmentManager();
316
317      HBaseTestingUtil.await(200, () -> {
318        try {
319          cj.scan();
320          final CatalogJanitorReport postReport = cj.getLastReport();
321          RegionStates regionStates = am.getRegionStates();
322          RegionStateStore regionStateStore = am.getRegionStateStore();
323          // Make sure that two merged regions are opened and GCs are done.
324          if (postReport.getOverlaps().size() == 1) {
325            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
326            if (
327              (!overlapRegions.contains(pair.getFirst().getRegionNameAsString())
328                && regionStates.getRegionState(pair.getFirst()).isOpened())
329                && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString())
330                  && regionStates.getRegionState(pair.getSecond()).isOpened())
331            ) {
332              // Make sure GC is done.
333              List<RegionInfo> firstParents = regionStateStore.getMergeRegions(pair.getFirst());
334              List<RegionInfo> secondParents = regionStateStore.getMergeRegions(pair.getSecond());
335
336              return (firstParents == null || firstParents.isEmpty())
337                && (secondParents == null || secondParents.isEmpty());
338            }
339          }
340          return false;
341        } catch (Exception e) {
342          throw new RuntimeException(e);
343        }
344      });
345
346      // Second run of fixOverlap should fix all.
347      report = cj.getLastReport();
348      fixer.fixOverlaps(report);
349
350      HBaseTestingUtil.await(20, () -> {
351        try {
352          // Make sure it GC only once.
353          return (cj.scan() > 0);
354        } catch (Exception e) {
355          throw new RuntimeException(e);
356        }
357      });
358
359      // No holes reported.
360      cj.scan();
361      final CatalogJanitorReport postReport = cj.getLastReport();
362      assertTrue(postReport.isEmpty());
363
364    } finally {
365      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
366        .unset("hbase.master.metafixer.max.merge.count");
367
368      TEST_UTIL.deleteTable(tn);
369    }
370  }
371
372  /**
373   * This test covers the case that one of merged parent regions is a merged child region that has
374   * not been GCed but there is no reference files anymore. In this case, it will kick off a GC
375   * procedure, but no merge will happen.
376   */
377  @Test
378  public void testMergeWithMergedChildRegion() throws Exception {
379    TableName tn = TableName.valueOf(this.name.getMethodName());
380    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
381    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
382    assertTrue(ris.size() > 5);
383    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
384    CatalogJanitor cj = services.getCatalogJanitor();
385    cj.scan();
386    CatalogJanitorReport report = cj.getLastReport();
387    assertTrue(report.isEmpty());
388    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
389
390    cj.scan();
391    report = cj.getLastReport();
392    assertEquals(2, report.getOverlaps().size());
393
394    // Mark it as a merged child region.
395    RegionInfo fakedParentRegion =
396      RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build();
397
398    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
399    Put putOfMerged =
400      MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP);
401    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
402    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
403      .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY)
404      .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp())
405      .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build());
406
407    meta.put(putOfMerged);
408
409    MetaFixer fixer = new MetaFixer(services);
410    fixer.fixOverlaps(report);
411
412    // Wait until all procedures settled down
413    HBaseTestingUtil.await(200, () -> {
414      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
415    });
416
417    // No merge is done, overlap is still there.
418    cj.scan();
419    report = cj.getLastReport();
420    assertEquals(2, report.getOverlaps().size());
421
422    fixer.fixOverlaps(report);
423
424    // Wait until all procedures settled down
425    HBaseTestingUtil.await(200, () -> {
426      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
427    });
428
429    // Merge is done and no more overlaps
430    cj.scan();
431    report = cj.getLastReport();
432    assertEquals(0, report.getOverlaps().size());
433  }
434
435  /**
436   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we
437   * can fix this condition. HBASE-24247
438   */
439  @Test
440  public void testOverlapWithMergeOfNonContiguous() throws Exception {
441    TableName tn = TableName.valueOf(this.name.getMethodName());
442    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
443    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
444    assertTrue(ris.size() > 5);
445    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
446    services.getCatalogJanitor().scan();
447    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
448    assertTrue(report.isEmpty());
449    // Make a simple overlap spanning second and third region.
450    makeOverlap(services, ris.get(1), ris.get(5));
451    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
452    RegionInfo deletedRegion = ris.get(3);
453    long pid = services.getAssignmentManager().unassign(deletedRegion);
454    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
455      Threads.sleep(100);
456    }
457    GCRegionProcedure procedure =
458      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
459    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
460    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
461      Threads.sleep(100);
462    }
463    services.getCatalogJanitor().scan();
464    report = services.getCatalogJanitor().getLastReport();
465    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
466    MetaFixer fixer = new MetaFixer(services);
467    fixer.fixOverlaps(report);
468    HBaseTestingUtil.await(10, () -> {
469      try {
470        services.getCatalogJanitor().scan();
471        final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport();
472        return postReport.isEmpty();
473      } catch (Exception e) {
474        throw new RuntimeException(e);
475      }
476    });
477  }
478}