001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.janitor;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertNotNull;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.Collections;
026import java.util.HashSet;
027import java.util.List;
028import java.util.Map;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.CellBuilderFactory;
031import org.apache.hadoop.hbase.CellBuilderType;
032import org.apache.hadoop.hbase.HBaseClassTestRule;
033import org.apache.hadoop.hbase.HBaseTestingUtility;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.MetaTableAccessor;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.RegionInfoBuilder;
040import org.apache.hadoop.hbase.client.Result;
041import org.apache.hadoop.hbase.client.Table;
042import org.apache.hadoop.hbase.master.HMaster;
043import org.apache.hadoop.hbase.master.MasterServices;
044import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
045import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure;
046import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure;
047import org.apache.hadoop.hbase.master.assignment.RegionStates;
048import org.apache.hadoop.hbase.master.hbck.HbckChore;
049import org.apache.hadoop.hbase.master.hbck.HbckReport;
050import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
051import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
052import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
053import org.apache.hadoop.hbase.testclassification.LargeTests;
054import org.apache.hadoop.hbase.testclassification.MasterTests;
055import org.apache.hadoop.hbase.util.Bytes;
056import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
057import org.apache.hadoop.hbase.util.Pair;
058import org.apache.hadoop.hbase.util.Threads;
059import org.junit.AfterClass;
060import org.junit.BeforeClass;
061import org.junit.ClassRule;
062import org.junit.Rule;
063import org.junit.Test;
064import org.junit.experimental.categories.Category;
065import org.junit.rules.TestName;
066
067@Category({ MasterTests.class, LargeTests.class })
068public class TestMetaFixer {
069  @ClassRule
070  public static final HBaseClassTestRule CLASS_RULE =
071    HBaseClassTestRule.forClass(TestMetaFixer.class);
072  @Rule
073  public TestName name = new TestName();
074
075  private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
076
077  @BeforeClass
078  public static void setupBeforeClass() throws Exception {
079    TEST_UTIL.startMiniCluster();
080  }
081
082  @AfterClass
083  public static void tearDownAfterClass() throws Exception {
084    TEST_UTIL.shutdownMiniCluster();
085  }
086
087  private void deleteRegion(MasterServices services, RegionInfo ri) throws IOException {
088    MetaTableAccessor.deleteRegionInfo(TEST_UTIL.getConnection(), ri);
089    // Delete it from Master context too else it sticks around.
090    services.getAssignmentManager().getRegionStates().deleteRegion(ri);
091  }
092
093  private void testPlugsHolesWithReadReplicaInternal(final TableName tn, final int replicaCount)
094    throws Exception {
095    TEST_UTIL.createMultiRegionTable(tn, replicaCount, new byte[][] { HConstants.CATALOG_FAMILY });
096    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
097    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
098    int initialSize = services.getAssignmentManager().getRegionStates().getRegionStates().size();
099    services.getCatalogJanitor().scan();
100    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
101    assertTrue(report.isEmpty());
102    int originalCount = ris.size();
103    // Remove first, last and middle region. See if hole gets plugged. Table has 26 * replicaCount
104    // regions.
105    for (int i = 0; i < replicaCount; i++) {
106      deleteRegion(services, ris.get(3 * replicaCount + i));
107      deleteRegion(services, ris.get(i));
108      deleteRegion(services, ris.get(ris.size() - 1 - i));
109    }
110    assertEquals(initialSize - 3 * replicaCount,
111      services.getAssignmentManager().getRegionStates().getRegionStates().size());
112    services.getCatalogJanitor().scan();
113    report = services.getCatalogJanitor().getLastReport();
114    assertEquals(report.toString(), 3, report.getHoles().size());
115    MetaFixer fixer = new MetaFixer(services);
116    fixer.fixHoles(report);
117    services.getCatalogJanitor().scan();
118    report = services.getCatalogJanitor().getLastReport();
119    assertTrue(report.toString(), report.isEmpty());
120    assertEquals(initialSize,
121      services.getAssignmentManager().getRegionStates().getRegionStates().size());
122
123    // wait for RITs to settle -- those are the fixed regions being assigned -- or until the
124    // watchdog TestRule terminates the test.
125    HBaseTestingUtility.await(50,
126      () -> services.getMasterProcedureExecutor().getActiveProcIds().size() == 0);
127
128    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
129    assertEquals(originalCount, ris.size());
130  }
131
132  @Test
133  public void testPlugsHoles() throws Exception {
134    TableName tn = TableName.valueOf(this.name.getMethodName());
135    testPlugsHolesWithReadReplicaInternal(tn, 1);
136  }
137
138  @Test
139  public void testPlugsHolesWithReadReplica() throws Exception {
140    TableName tn = TableName.valueOf(this.name.getMethodName());
141    testPlugsHolesWithReadReplicaInternal(tn, 3);
142  }
143
144  /**
145   * Just make sure running fixMeta does right thing for the case of a single-region Table where the
146   * region gets dropped. There is nothing much we can do. We can't restore what we don't know about
147   * (at least from a read of hbase:meta).
148   */
149  @Test
150  public void testOneRegionTable() throws IOException {
151    TableName tn = TableName.valueOf(this.name.getMethodName());
152    TEST_UTIL.createTable(tn, HConstants.CATALOG_FAMILY);
153    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
154    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
155    services.getCatalogJanitor().scan();
156    deleteRegion(services, ris.get(0));
157    services.getCatalogJanitor().scan();
158    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
159    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
160    assertTrue(ris.isEmpty());
161    MetaFixer fixer = new MetaFixer(services);
162    fixer.fixHoles(report);
163    report = services.getCatalogJanitor().getLastReport();
164    assertTrue(report.isEmpty());
165    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
166    assertEquals(0, ris.size());
167  }
168
169  private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
170    throws IOException {
171    RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable())
172      .setStartKey(a.getStartKey()).setEndKey(b.getEndKey()).build();
173    MetaTableAccessor.putsToMetaTable(services.getConnection(),
174      Collections.singletonList(MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
175        EnvironmentEdgeManager.currentTime())));
176    // TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
177    long assign = services.getAssignmentManager().assign(overlapRegion);
178    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), assign);
179    return overlapRegion;
180  }
181
182  private void testOverlapCommon(final TableName tn) throws Exception {
183    Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
184    TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
185    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
186    assertTrue(ris.size() > 5);
187    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
188    services.getCatalogJanitor().scan();
189    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
190    assertTrue(report.isEmpty());
191    // Make a simple overlap spanning second and third region.
192    makeOverlap(services, ris.get(1), ris.get(3));
193    makeOverlap(services, ris.get(2), ris.get(3));
194    makeOverlap(services, ris.get(2), ris.get(4));
195  }
196
197  @Test
198  public void testOverlap() throws Exception {
199    TableName tn = TableName.valueOf(this.name.getMethodName());
200    testOverlapCommon(tn);
201    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
202    HbckChore hbckChore = services.getHbckChore();
203
204    CatalogJanitor cj = services.getCatalogJanitor();
205    cj.scan();
206    CatalogJanitorReport report = cj.getLastReport();
207    assertEquals(6, report.getOverlaps().size());
208    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
209    MetaFixer fixer = new MetaFixer(services);
210    fixer.fixOverlaps(report);
211
212    HBaseTestingUtility.await(10, () -> {
213      try {
214        if (cj.scan() > 0) {
215          // It submits GC once, then it will immediately kick off another GC to test if
216          // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create
217          // a hole.
218          Map<RegionInfo, Result> mergedRegions = cj.getLastReport().mergedRegions;
219          for (Map.Entry<RegionInfo, Result> e : mergedRegions.entrySet()) {
220            List<RegionInfo> parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells());
221            if (parents != null) {
222              ProcedureExecutor<MasterProcedureEnv> pe = services.getMasterProcedureExecutor();
223              pe.submitProcedure(
224                new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), e.getKey(), parents));
225            }
226          }
227          return true;
228        }
229        return false;
230      } catch (Exception e) {
231        throw new RuntimeException(e);
232      }
233    });
234
235    // Wait until all GCs settled down
236    HBaseTestingUtility.await(10, () -> {
237      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
238    });
239
240    // No orphan regions on FS
241    hbckChore.choreForTesting();
242    HbckReport hbckReport = hbckChore.getLastReport();
243    assertNotNull(hbckReport);
244    assertEquals(0, hbckReport.getOrphanRegionsOnFS().size());
245
246    // No holes reported.
247    cj.scan();
248    final CatalogJanitorReport postReport = cj.getLastReport();
249    assertTrue(postReport.isEmpty());
250  }
251
252  @Test
253  public void testMultipleTableOverlaps() throws Exception {
254    TableName t1 = TableName.valueOf("t1");
255    TableName t2 = TableName.valueOf("t2");
256    TEST_UTIL.createMultiRegionTable(t1, new byte[][] { HConstants.CATALOG_FAMILY });
257    TEST_UTIL.createMultiRegionTable(t2, new byte[][] { HConstants.CATALOG_FAMILY });
258    TEST_UTIL.waitTableAvailable(t2);
259
260    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
261    services.getCatalogJanitor().scan();
262    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
263    assertTrue(report.isEmpty());
264
265    // Make a simple overlap for t1
266    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t1);
267    makeOverlap(services, ris.get(1), ris.get(2));
268    // Make a simple overlap for t2
269    ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), t2);
270    makeOverlap(services, ris.get(1), ris.get(2));
271
272    services.getCatalogJanitor().scan();
273    report = services.getCatalogJanitor().getLastReport();
274    assertEquals("Region overlaps count does not match.", 4, report.getOverlaps().size());
275
276    MetaFixer fixer = new MetaFixer(services);
277    List<Long> longs = fixer.fixOverlaps(report);
278    long[] procIds = longs.stream().mapToLong(l -> l).toArray();
279    ProcedureTestingUtility.waitProcedures(services.getMasterProcedureExecutor(), procIds);
280
281    // After fix, verify no overlaps are left.
282    services.getCatalogJanitor().scan();
283    report = services.getCatalogJanitor().getLastReport();
284    assertTrue("After fix there should not have been any overlaps.", report.isEmpty());
285  }
286
287  @Test
288  public void testOverlapWithSmallMergeCount() throws Exception {
289    TableName tn = TableName.valueOf(this.name.getMethodName());
290    try {
291      testOverlapCommon(tn);
292      HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
293      CatalogJanitor cj = services.getCatalogJanitor();
294      cj.scan();
295      CatalogJanitorReport report = cj.getLastReport();
296      assertEquals(6, report.getOverlaps().size());
297      assertEquals(2, MetaFixer.calculateMerges(5, report.getOverlaps()).size());
298
299      // The max merge count is set to 5 so overlap regions are divided into
300      // two merge requests.
301      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
302        .setInt("hbase.master.metafixer.max.merge.count", 5);
303
304      // Get overlap regions
305      HashSet<String> overlapRegions = new HashSet<>();
306      for (Pair<RegionInfo, RegionInfo> pair : report.getOverlaps()) {
307        overlapRegions.add(pair.getFirst().getRegionNameAsString());
308        overlapRegions.add(pair.getSecond().getRegionNameAsString());
309      }
310
311      MetaFixer fixer = new MetaFixer(services);
312      fixer.fixOverlaps(report);
313      AssignmentManager am = services.getAssignmentManager();
314
315      HBaseTestingUtility.await(200, () -> {
316        try {
317          cj.scan();
318          final CatalogJanitorReport postReport = cj.getLastReport();
319          RegionStates regionStates = am.getRegionStates();
320
321          // Make sure that two merged regions are opened and GCs are done.
322          if (postReport.getOverlaps().size() == 1) {
323            Pair<RegionInfo, RegionInfo> pair = postReport.getOverlaps().get(0);
324            if (
325              (!overlapRegions.contains(pair.getFirst().getRegionNameAsString())
326                && regionStates.getRegionState(pair.getFirst()).isOpened())
327                && (!overlapRegions.contains(pair.getSecond().getRegionNameAsString())
328                  && regionStates.getRegionState(pair.getSecond()).isOpened())
329            ) {
330              // Make sure GC is done.
331              List<RegionInfo> firstParents =
332                MetaTableAccessor.getMergeRegions(services.getConnection(), pair.getFirst());
333              List<RegionInfo> secondParents =
334                MetaTableAccessor.getMergeRegions(services.getConnection(), pair.getSecond());
335
336              return (firstParents == null || firstParents.isEmpty())
337                && (secondParents == null || secondParents.isEmpty());
338            }
339          }
340          return false;
341        } catch (Exception e) {
342          throw new RuntimeException(e);
343        }
344      });
345
346      // Second run of fixOverlap should fix all.
347      report = cj.getLastReport();
348      fixer.fixOverlaps(report);
349
350      HBaseTestingUtility.await(20, () -> {
351        try {
352          // Make sure it GC only once.
353          return (cj.scan() > 0);
354        } catch (Exception e) {
355          throw new RuntimeException(e);
356        }
357      });
358
359      // No holes reported.
360      cj.scan();
361      final CatalogJanitorReport postReport = cj.getLastReport();
362      assertTrue(postReport.isEmpty());
363
364    } finally {
365      TEST_UTIL.getHBaseCluster().getMaster().getConfiguration()
366        .unset("hbase.master.metafixer.max.merge.count");
367
368      TEST_UTIL.deleteTable(tn);
369    }
370  }
371
372  /**
373   * This test covers the case that one of merged parent regions is a merged child region that has
374   * not been GCed but there is no reference files anymore. In this case, it will kick off a GC
375   * procedure, but no merge will happen.
376   */
377  @Test
378  public void testMergeWithMergedChildRegion() throws Exception {
379    TableName tn = TableName.valueOf(this.name.getMethodName());
380    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
381    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
382    assertTrue(ris.size() > 5);
383    HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
384    CatalogJanitor cj = services.getCatalogJanitor();
385    cj.scan();
386    CatalogJanitorReport report = cj.getLastReport();
387    assertTrue(report.isEmpty());
388    RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
389
390    cj.scan();
391    report = cj.getLastReport();
392    assertEquals(2, report.getOverlaps().size());
393
394    // Mark it as a merged child region.
395    RegionInfo fakedParentRegion =
396      RegionInfoBuilder.newBuilder(tn).setStartKey(overlapRegion.getStartKey()).build();
397
398    Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
399    Put putOfMerged =
400      MetaTableAccessor.makePutFromRegionInfo(overlapRegion, HConstants.LATEST_TIMESTAMP);
401    String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
402    putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
403      .setRow(putOfMerged.getRow()).setFamily(HConstants.CATALOG_FAMILY)
404      .setQualifier(Bytes.toBytes(qualifier)).setTimestamp(putOfMerged.getTimestamp())
405      .setType(Cell.Type.Put).setValue(RegionInfo.toByteArray(fakedParentRegion)).build());
406
407    meta.put(putOfMerged);
408
409    MetaFixer fixer = new MetaFixer(services);
410    fixer.fixOverlaps(report);
411
412    // Wait until all procedures settled down
413    HBaseTestingUtility.await(200, () -> {
414      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
415    });
416
417    // No merge is done, overlap is still there.
418    cj.scan();
419    report = cj.getLastReport();
420    assertEquals(2, report.getOverlaps().size());
421
422    fixer.fixOverlaps(report);
423
424    // Wait until all procedures settled down
425    HBaseTestingUtility.await(200, () -> {
426      return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
427    });
428
429    // Merge is done and no more overlaps
430    cj.scan();
431    report = cj.getLastReport();
432    assertEquals(0, report.getOverlaps().size());
433  }
434
435  /**
436   * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so we
437   * can fix this condition. HBASE-24247
438   */
439  @Test
440  public void testOverlapWithMergeOfNonContiguous() throws Exception {
441    TableName tn = TableName.valueOf(this.name.getMethodName());
442    TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
443    List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
444    assertTrue(ris.size() > 5);
445    MasterServices services = TEST_UTIL.getHBaseCluster().getMaster();
446    services.getCatalogJanitor().scan();
447    CatalogJanitorReport report = services.getCatalogJanitor().getLastReport();
448    assertTrue(report.isEmpty());
449    // Make a simple overlap spanning second and third region.
450    makeOverlap(services, ris.get(1), ris.get(5));
451    // Now Delete a region under the overlap to manufacture non-contiguous sub regions.
452    RegionInfo deletedRegion = ris.get(3);
453    long pid = services.getAssignmentManager().unassign(deletedRegion);
454    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
455      Threads.sleep(100);
456    }
457    GCRegionProcedure procedure =
458      new GCRegionProcedure(services.getMasterProcedureExecutor().getEnvironment(), ris.get(3));
459    pid = services.getMasterProcedureExecutor().submitProcedure(procedure);
460    while (!services.getMasterProcedureExecutor().isFinished(pid)) {
461      Threads.sleep(100);
462    }
463    services.getCatalogJanitor().scan();
464    report = services.getCatalogJanitor().getLastReport();
465    assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
466    MetaFixer fixer = new MetaFixer(services);
467    fixer.fixOverlaps(report);
468    HBaseTestingUtility.await(10, () -> {
469      try {
470        services.getCatalogJanitor().scan();
471        final CatalogJanitorReport postReport = services.getCatalogJanitor().getLastReport();
472        return postReport.isEmpty();
473      } catch (Exception e) {
474        throw new RuntimeException(e);
475      }
476    });
477  }
478}