001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.janitor; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.util.Arrays; 027import java.util.LinkedList; 028import java.util.List; 029import org.apache.hadoop.hbase.CatalogFamilyFormat; 030import org.apache.hadoop.hbase.HBaseClassTestRule; 031import org.apache.hadoop.hbase.HBaseTestingUtil; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.MetaTableAccessor; 034import org.apache.hadoop.hbase.TableName; 035import org.apache.hadoop.hbase.TableNotEnabledException; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.RegionInfo; 038import org.apache.hadoop.hbase.client.RegionInfoBuilder; 039import org.apache.hadoop.hbase.master.assignment.RegionStateStore; 040import org.apache.hadoop.hbase.testclassification.LargeTests; 041import org.apache.hadoop.hbase.testclassification.MasterTests; 042import org.apache.hadoop.hbase.util.Bytes; 043import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 044import org.apache.hadoop.hbase.util.Pair; 045import org.junit.After; 046import org.junit.Before; 047import org.junit.ClassRule; 048import org.junit.Rule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.junit.rules.TestName; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055@Category({ MasterTests.class, LargeTests.class }) 056public class TestCatalogJanitorCluster { 057 private static final Logger LOG = LoggerFactory.getLogger(TestCatalogJanitorCluster.class); 058 059 @ClassRule 060 public static final HBaseClassTestRule CLASS_RULE = 061 HBaseClassTestRule.forClass(TestCatalogJanitorCluster.class); 062 063 @Rule 064 public final TestName name = new TestName(); 065 066 private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 067 private static final TableName T1 = TableName.valueOf("t1"); 068 private static final TableName T2 = TableName.valueOf("t2"); 069 private static final TableName T3 = TableName.valueOf("t3"); 070 private static final TableName T4 = TableName.valueOf("t4"); 071 private static final TableName T5 = TableName.valueOf("t5"); 072 private static final TableName T6 = TableName.valueOf("t6"); 073 private static final TableName T7 = TableName.valueOf("t7"); 074 075 @Before 076 public void before() throws Exception { 077 TEST_UTIL.startMiniCluster(); 078 TEST_UTIL.createMultiRegionTable(T1, new byte[][] { HConstants.CATALOG_FAMILY }); 079 TEST_UTIL.createMultiRegionTable(T2, new byte[][] { HConstants.CATALOG_FAMILY }); 080 TEST_UTIL.createMultiRegionTable(T3, new byte[][] { HConstants.CATALOG_FAMILY }); 081 082 final byte[][] keysForT4 = 083 { Bytes.toBytes("aa"), Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") }; 084 085 TEST_UTIL.createTable(T4, HConstants.CATALOG_FAMILY, keysForT4); 086 087 final byte[][] keysForT5 = { Bytes.toBytes("bb"), Bytes.toBytes("cc"), Bytes.toBytes("dd") }; 088 089 TEST_UTIL.createTable(T5, HConstants.CATALOG_FAMILY, keysForT5); 090 091 TEST_UTIL.createMultiRegionTable(T6, new byte[][] { HConstants.CATALOG_FAMILY }); 092 TEST_UTIL.createMultiRegionTable(T7, new byte[][] { HConstants.CATALOG_FAMILY }); 093 } 094 095 @After 096 public void after() throws Exception { 097 TEST_UTIL.shutdownMiniCluster(); 098 } 099 100 /** 101 * Fat method where we start with a fat hbase:meta and then gradually intro problems running 102 * catalogjanitor for each to ensure it triggers complaint. Do one big method because takes a 103 * while to build up the context we need. We create three tables and then make holes, overlaps, 104 * add unknown servers and empty out regioninfo columns. Each should up counts in the 105 * CatalogJanitor.Report produced. 106 */ 107 @Test 108 public void testConsistency() throws IOException { 109 CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor(); 110 RegionStateStore regionStateStore = 111 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 112 janitor.scan(); 113 CatalogJanitorReport report = janitor.getLastReport(); 114 // Assert no problems. 115 assertTrue(report.isEmpty()); 116 // Now remove first region in table t2 to see if catalogjanitor scan notices. 117 List<RegionInfo> t2Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T2); 118 regionStateStore.deleteRegion(t2Ris.get(0)); 119 janitor.scan(); 120 report = janitor.getLastReport(); 121 assertFalse(report.isEmpty()); 122 assertEquals(1, report.getHoles().size()); 123 assertTrue(report.getHoles().get(0).getFirst().getTable() 124 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 125 assertTrue(report.getHoles().get(0).getSecond().getTable().equals(T2)); 126 assertEquals(0, report.getOverlaps().size()); 127 // Next, add overlaps to first row in t3 128 List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3); 129 RegionInfo ri = t3Ris.get(0); 130 RegionInfo newRi1 = RegionInfoBuilder.newBuilder(ri.getTable()) 131 .setStartKey(incrementRow(ri.getStartKey())).setEndKey(incrementRow(ri.getEndKey())).build(); 132 Put p1 = MetaTableAccessor.makePutFromRegionInfo(newRi1, EnvironmentEdgeManager.currentTime()); 133 RegionInfo newRi2 = RegionInfoBuilder.newBuilder(newRi1.getTable()) 134 .setStartKey(incrementRow(newRi1.getStartKey())).setEndKey(incrementRow(newRi1.getEndKey())) 135 .build(); 136 Put p2 = MetaTableAccessor.makePutFromRegionInfo(newRi2, EnvironmentEdgeManager.currentTime()); 137 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(p1, p2)); 138 janitor.scan(); 139 report = janitor.getLastReport(); 140 assertFalse(report.isEmpty()); 141 // We added two overlaps so total three. 142 assertEquals(3, report.getOverlaps().size()); 143 // Assert hole is still there. 144 assertEquals(1, report.getHoles().size()); 145 // Assert other attributes are empty still. 146 assertTrue(report.getEmptyRegionInfo().isEmpty()); 147 assertTrue(report.getUnknownServers().isEmpty()); 148 // Now make bad server in t1. 149 List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1); 150 RegionInfo t1Ri1 = t1Ris.get(1); 151 Put pServer = new Put(t1Ri1.getRegionName()); 152 pServer.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0), 153 Bytes.toBytes("bad.server.example.org:1234")); 154 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pServer)); 155 janitor.scan(); 156 report = janitor.getLastReport(); 157 assertFalse(report.isEmpty()); 158 assertEquals(1, report.getUnknownServers().size()); 159 // Test what happens if we blow away an info:server row, if it is null. Should not kill CJ 160 // and we should log the row that had the problem. HBASE-23192. Just make sure we don't 161 // break if this happens. 162 LOG.info("Make null info:server"); 163 Put emptyInfoServerPut = new Put(t1Ri1.getRegionName()); 164 emptyInfoServerPut.addColumn(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(0), 165 Bytes.toBytes("")); 166 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(emptyInfoServerPut)); 167 janitor.scan(); 168 report = janitor.getLastReport(); 169 assertEquals(0, report.getUnknownServers().size()); 170 // Mke an empty regioninfo in t1. 171 RegionInfo t1Ri2 = t1Ris.get(2); 172 Put pEmptyRI = new Put(t1Ri2.getRegionName()); 173 pEmptyRI.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, 174 HConstants.EMPTY_BYTE_ARRAY); 175 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI)); 176 janitor.scan(); 177 report = janitor.getLastReport(); 178 assertEquals(1, report.getEmptyRegionInfo().size()); 179 180 int holesReported = report.getHoles().size(); 181 int overlapsReported = report.getOverlaps().size(); 182 183 // Test the case for T4 184 // r1: [aa, bb), r2: [cc, dd), r3: [a, cc) 185 // Make sure only overlaps and no holes are reported. 186 List<RegionInfo> t4Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T4); 187 // delete the region [bb, cc) 188 regionStateStore.deleteRegion(t4Ris.get(2)); 189 190 // add a new region [a, cc) 191 RegionInfo newRiT4 = RegionInfoBuilder.newBuilder(T4).setStartKey("a".getBytes()) 192 .setEndKey("cc".getBytes()).build(); 193 Put putForT4 = 194 MetaTableAccessor.makePutFromRegionInfo(newRiT4, EnvironmentEdgeManager.currentTime()); 195 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT4)); 196 197 janitor.scan(); 198 report = janitor.getLastReport(); 199 // there is no new hole reported, 2 more overLaps added. 200 assertEquals(holesReported, report.getHoles().size()); 201 assertEquals(overlapsReported + 2, report.getOverlaps().size()); 202 203 holesReported = report.getHoles().size(); 204 overlapsReported = report.getOverlaps().size(); 205 206 // Test the case for T5 207 // r0: [, bb), r1: [a, g), r2: [bb, cc), r3: [dd, ) 208 // Make sure only overlaps and no holes are reported. 209 List<RegionInfo> t5Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T5); 210 // delete the region [cc, dd) 211 regionStateStore.deleteRegion(t5Ris.get(2)); 212 213 // add a new region [a, g) 214 RegionInfo newRiT5 = RegionInfoBuilder.newBuilder(T5).setStartKey("a".getBytes()) 215 .setEndKey("g".getBytes()).build(); 216 Put putForT5 = 217 MetaTableAccessor.makePutFromRegionInfo(newRiT5, EnvironmentEdgeManager.currentTime()); 218 MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(putForT5)); 219 220 janitor.scan(); 221 report = janitor.getLastReport(); 222 // there is no new hole reported, 3 more overLaps added. 223 // ([a, g), [, bb)), ([a, g), [bb, cc)), ([a, g), [dd, )) 224 assertEquals(holesReported, report.getHoles().size()); 225 assertEquals(overlapsReported + 3, report.getOverlaps().size()); 226 } 227 228 /** 229 * Take last byte and add one to it. 230 */ 231 private static byte[] incrementRow(byte[] row) { 232 if (row.length == 0) { 233 return new byte[] { '0' }; 234 } 235 row[row.length - 1] = (byte) (((int) row[row.length - 1]) + 1); 236 return row; 237 } 238 239 @Test 240 public void testHoles() throws IOException, InterruptedException { 241 CatalogJanitor janitor = TEST_UTIL.getHBaseCluster().getMaster().getCatalogJanitor(); 242 243 CatalogJanitorReport report = janitor.getLastReport(); 244 // Assert no problems. 245 assertTrue(report.isEmpty()); 246 // Verify start and end region holes 247 verifyCornerHoles(janitor, T1); 248 // Verify start and end region holes 249 verifyCornerHoles(janitor, T2); 250 // Verify start and end region holes when next table is disable see: HBASE-27560 251 disableTable(T7); 252 verifyCornerHoles(janitor, T6); 253 verifyMiddleHole(janitor); 254 // Verify that MetaFixer is able to fix these holes 255 fixHoles(janitor); 256 } 257 258 private void fixHoles(CatalogJanitor janitor) throws IOException { 259 MetaFixer metaFixer = new MetaFixer(TEST_UTIL.getHBaseCluster().getMaster()); 260 janitor.scan(); 261 CatalogJanitorReport report = janitor.getLastReport(); 262 // Verify total number of holes, 2 in t1, t2, t6 each and one in t3 263 assertEquals("Number of holes are not matching", 7, report.getHoles().size()); 264 metaFixer.fix(); 265 janitor.scan(); 266 report = janitor.getLastReport(); 267 assertEquals("Holes are not fixed", 0, report.getHoles().size()); 268 } 269 270 private void verifyMiddleHole(CatalogJanitor janitor) throws IOException { 271 // Verify middle holes 272 RegionInfo firstRegion = getRegionInfo(T3, "".getBytes()); 273 RegionInfo secondRegion = getRegionInfo(T3, "bbb".getBytes()); 274 RegionInfo thirdRegion = getRegionInfo(T3, "ccc".getBytes()); 275 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore() 276 .deleteRegion(secondRegion); 277 LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, T3); 278 Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.getFirst(); 279 assertTrue(regionInfoRegionInfoPair.getFirst().getTable().equals(T3)); 280 assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(T3)); 281 assertTrue( 282 regionInfoRegionInfoPair.getFirst().getEncodedName().equals(firstRegion.getEncodedName())); 283 assertTrue( 284 regionInfoRegionInfoPair.getSecond().getEncodedName().equals(thirdRegion.getEncodedName())); 285 } 286 287 private void verifyCornerHoles(CatalogJanitor janitor, TableName tableName) throws IOException { 288 RegionStateStore regionStateStore = 289 TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStateStore(); 290 RegionInfo firstRegion = getRegionInfo(tableName, "".getBytes()); 291 RegionInfo secondRegion = getRegionInfo(tableName, "bbb".getBytes()); 292 regionStateStore.deleteRegion(firstRegion); 293 LinkedList<Pair<RegionInfo, RegionInfo>> holes = getHoles(janitor, tableName); 294 295 assertEquals(1, holes.size()); 296 Pair<RegionInfo, RegionInfo> regionInfoRegionInfoPair = holes.get(0); 297 assertTrue(regionInfoRegionInfoPair.getFirst().getTable() 298 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 299 assertTrue(regionInfoRegionInfoPair.getSecond().getTable().equals(tableName)); 300 assertTrue( 301 regionInfoRegionInfoPair.getSecond().getEncodedName().equals(secondRegion.getEncodedName())); 302 303 RegionInfo lastRegion = getRegionInfo(tableName, "zzz".getBytes()); 304 RegionInfo secondLastRegion = getRegionInfo(tableName, "yyy".getBytes()); 305 regionStateStore.deleteRegion(lastRegion); 306 holes = getHoles(janitor, tableName); 307 assertEquals(2, holes.size()); 308 regionInfoRegionInfoPair = holes.get(1); 309 assertTrue(regionInfoRegionInfoPair.getFirst().getEncodedName() 310 .equals(secondLastRegion.getEncodedName())); 311 assertTrue(regionInfoRegionInfoPair.getSecond().getTable() 312 .equals(RegionInfoBuilder.UNDEFINED.getTable())); 313 } 314 315 // Get Holes filter by table 316 private LinkedList<Pair<RegionInfo, RegionInfo>> getHoles(CatalogJanitor janitor, 317 TableName tableName) throws IOException { 318 janitor.scan(); 319 CatalogJanitorReport lastReport = janitor.getLastReport(); 320 assertFalse(lastReport.isEmpty()); 321 LinkedList<Pair<RegionInfo, RegionInfo>> holes = new LinkedList<>(); 322 for (Pair<RegionInfo, RegionInfo> hole : lastReport.getHoles()) { 323 if ( 324 hole.getFirst().getTable().equals(tableName) 325 || hole.getSecond().getTable().equals(tableName) 326 ) { 327 holes.add(hole); 328 } 329 } 330 return holes; 331 } 332 333 private RegionInfo getRegionInfo(TableName tableName, byte[] row) throws IOException { 334 RegionInfo regionInfo = 335 TEST_UTIL.getConnection().getRegionLocator(tableName).getRegionLocation(row).getRegion(); 336 assertNotNull(regionInfo); 337 return regionInfo; 338 } 339 340 private void disableTable(TableName tableName) throws IOException, InterruptedException { 341 try { 342 TEST_UTIL.getAdmin().disableTable(tableName); 343 TEST_UTIL.waitTableDisabled(tableName, 30000); 344 } catch (TableNotEnabledException e) { 345 LOG.debug("Table: " + tableName + " already disabled, ignore."); 346 } 347 } 348}