001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.tool; 019 020import static org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster.closeRegion; 021import static org.junit.Assert.assertEquals; 022import static org.junit.Assert.assertFalse; 023import static org.junit.Assert.assertNotEquals; 024import static org.junit.Assert.assertNotNull; 025import static org.junit.Assert.assertTrue; 026import static org.mockito.ArgumentMatchers.anyLong; 027import static org.mockito.ArgumentMatchers.argThat; 028import static org.mockito.ArgumentMatchers.eq; 029import static org.mockito.ArgumentMatchers.isA; 030import static org.mockito.Mockito.atLeastOnce; 031import static org.mockito.Mockito.mock; 032import static org.mockito.Mockito.never; 033import static org.mockito.Mockito.spy; 034import static org.mockito.Mockito.times; 035import static org.mockito.Mockito.verify; 036import static org.mockito.Mockito.when; 037 038import java.util.List; 039import java.util.Map; 040import java.util.concurrent.ExecutorService; 041import java.util.concurrent.ScheduledThreadPoolExecutor; 042import java.util.concurrent.TimeUnit; 043import org.apache.hadoop.conf.Configuration; 044import org.apache.hadoop.hbase.HBaseClassTestRule; 045import org.apache.hadoop.hbase.HBaseConfiguration; 046import org.apache.hadoop.hbase.HBaseTestingUtil; 047import org.apache.hadoop.hbase.HConstants; 048import org.apache.hadoop.hbase.ServerName; 049import org.apache.hadoop.hbase.TableName; 050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 051import org.apache.hadoop.hbase.client.Put; 052import org.apache.hadoop.hbase.client.RegionInfo; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.regionserver.HRegionServer; 055import org.apache.hadoop.hbase.testclassification.LargeTests; 056import org.apache.hadoop.hbase.util.Bytes; 057import org.apache.hadoop.util.ToolRunner; 058import org.junit.After; 059import org.junit.Before; 060import org.junit.ClassRule; 061import org.junit.Rule; 062import org.junit.Test; 063import org.junit.experimental.categories.Category; 064import org.junit.rules.TestName; 065import org.mockito.ArgumentMatcher; 066 067@Category({ LargeTests.class }) 068public class TestCanaryTool { 069 070 @ClassRule 071 public static final HBaseClassTestRule CLASS_RULE = 072 HBaseClassTestRule.forClass(TestCanaryTool.class); 073 074 private HBaseTestingUtil testingUtility; 075 private static final byte[] FAMILY = Bytes.toBytes("f"); 076 private static final byte[] COLUMN = Bytes.toBytes("col"); 077 078 @Rule 079 public TestName name = new TestName(); 080 081 private org.apache.logging.log4j.core.Appender mockAppender; 082 083 @Before 084 public void setUp() throws Exception { 085 testingUtility = new HBaseTestingUtil(); 086 testingUtility.startMiniCluster(); 087 mockAppender = mock(org.apache.logging.log4j.core.Appender.class); 088 when(mockAppender.getName()).thenReturn("mockAppender"); 089 when(mockAppender.isStarted()).thenReturn(true); 090 ((org.apache.logging.log4j.core.Logger) org.apache.logging.log4j.LogManager 091 .getLogger("org.apache.hadoop.hbase")).addAppender(mockAppender); 092 } 093 094 @After 095 public void tearDown() throws Exception { 096 testingUtility.shutdownMiniCluster(); 097 ((org.apache.logging.log4j.core.Logger) org.apache.logging.log4j.LogManager 098 .getLogger("org.apache.hadoop.hbase")).removeAppender(mockAppender); 099 } 100 101 @Test 102 public void testBasicZookeeperCanaryWorks() throws Exception { 103 final String[] args = { "-t", "10000", "-zookeeper" }; 104 testZookeeperCanaryWithArgs(args); 105 } 106 107 @Test 108 public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception { 109 final String[] args = 110 { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" }; 111 testZookeeperCanaryWithArgs(args); 112 } 113 114 @Test 115 public void testBasicCanaryWorks() throws Exception { 116 final TableName tableName = TableName.valueOf(name.getMethodName()); 117 Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY }); 118 // insert some test rows 119 for (int i = 0; i < 1000; i++) { 120 byte[] iBytes = Bytes.toBytes(i); 121 Put p = new Put(iBytes); 122 p.addColumn(FAMILY, COLUMN, iBytes); 123 table.put(p); 124 } 125 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 126 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 127 CanaryTool canary = new CanaryTool(executor, sink); 128 String[] args = { "-writeSniffing", "-t", "10000", tableName.getNameAsString() }; 129 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 130 assertEquals("verify no read error count", 0, canary.getReadFailures().size()); 131 assertEquals("verify no write error count", 0, canary.getWriteFailures().size()); 132 verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class), 133 isA(ColumnFamilyDescriptor.class), anyLong()); 134 } 135 136 /** 137 * When CanaryTool times out, it should stop scanning and shutdown quickly and gracefully. This 138 * test helps to confirm that threadpools do not continue executing work after the canary 139 * finishes. It also verifies sink behavior and measures correct failure counts in the sink. 140 * @throws Exception if it can't create a table, communicate with minicluster, or run the canary. 141 */ 142 @Test 143 public void testCanaryStopsScanningAfterTimeout() throws Exception { 144 // Prepare a table with multiple regions, and close those regions on the regionserver. 145 // Do not notify HMaster or META. CanaryTool will scan and receive NotServingRegionExceptions. 146 final TableName tableName = TableName.valueOf(name.getMethodName()); 147 // Close the unused Table reference returned by createMultiRegionTable. 148 testingUtility.createMultiRegionTable(tableName, new byte[][] { FAMILY }).close(); 149 List<RegionInfo> regions = testingUtility.getAdmin().getRegions(tableName); 150 assertTrue("verify table has multiple regions", regions.size() > 1); 151 HRegionServer regionserver = testingUtility.getMiniHBaseCluster().getRegionServer(0); 152 for (RegionInfo region : regions) { 153 closeRegion(testingUtility, regionserver, region); 154 } 155 156 // Run CanaryTool with 1 thread. This thread will attempt to scan the first region. 157 // It will use default rpc retries and receive NotServingRegionExceptions for many seconds 158 // according to HConstants.RETRY_BACKOFF. The CanaryTool timeout is set to 4 seconds, so it 159 // will time out before the first region scan is complete. 160 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 161 CanaryTool canary = new CanaryTool(executor); 162 String[] args = { "-t", "4000", tableName.getNameAsString() }; 163 int retCode = ToolRunner.run(testingUtility.getConfiguration(), canary, args); 164 executor.shutdown(); 165 try { 166 if (!executor.awaitTermination(3, TimeUnit.SECONDS)) { 167 executor.shutdownNow(); 168 } 169 } catch (InterruptedException e) { 170 executor.shutdownNow(); 171 } 172 173 CanaryTool.Sink sink = canary.getActiveSink(); 174 assertEquals("verify canary timed out with TIMEOUT_ERROR_EXIT_CODE", 3, retCode); 175 assertEquals("verify only the first region failed", 1, sink.getReadFailureCount()); 176 assertEquals("verify no successful reads", 0, sink.getReadSuccessCount()); 177 assertEquals("verify we were attempting to scan all regions", regions.size(), 178 ((CanaryTool.RegionStdOutSink) sink).getTotalExpectedRegions()); 179 } 180 181 @Test 182 public void testCanaryRegionTaskReadAllCF() throws Exception { 183 final TableName tableName = TableName.valueOf(name.getMethodName()); 184 Table table = testingUtility.createTable(tableName, 185 new byte[][] { Bytes.toBytes("f1"), Bytes.toBytes("f2") }); 186 // insert some test rows 187 for (int i = 0; i < 1000; i++) { 188 byte[] iBytes = Bytes.toBytes(i); 189 Put p = new Put(iBytes); 190 p.addColumn(Bytes.toBytes("f1"), COLUMN, iBytes); 191 p.addColumn(Bytes.toBytes("f2"), COLUMN, iBytes); 192 table.put(p); 193 } 194 Configuration configuration = HBaseConfiguration.create(testingUtility.getConfiguration()); 195 String[] args = { "-t", "10000", "testCanaryRegionTaskReadAllCF" }; 196 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 197 for (boolean readAllCF : new boolean[] { true, false }) { 198 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 199 CanaryTool canary = new CanaryTool(executor, sink); 200 configuration.setBoolean(HConstants.HBASE_CANARY_READ_ALL_CF, readAllCF); 201 assertEquals(0, ToolRunner.run(configuration, canary, args)); 202 // the test table has two column family. If readAllCF set true, 203 // we expect read count is double of region count 204 int expectedReadCount = 205 readAllCF ? 2 * sink.getTotalExpectedRegions() : sink.getTotalExpectedRegions(); 206 assertEquals("canary region success count should equal total expected read count", 207 expectedReadCount, sink.getReadSuccessCount()); 208 Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap(); 209 assertFalse("verify region map has size > 0", regionMap.isEmpty()); 210 211 for (String regionName : regionMap.keySet()) { 212 for (CanaryTool.RegionTaskResult res : regionMap.get(regionName)) { 213 assertNotNull("verify getRegionNameAsString()", regionName); 214 assertNotNull("verify getRegionInfo()", res.getRegionInfo()); 215 assertNotNull("verify getTableName()", res.getTableName()); 216 assertNotNull("verify getTableNameAsString()", res.getTableNameAsString()); 217 assertNotNull("verify getServerName()", res.getServerName()); 218 assertNotNull("verify getServerNameAsString()", res.getServerNameAsString()); 219 assertNotNull("verify getColumnFamily()", res.getColumnFamily()); 220 assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString()); 221 assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess()); 222 assertTrue("read took some time", res.getReadLatency() > -1); 223 } 224 } 225 } 226 } 227 228 @Test 229 public void testCanaryRegionTaskResult() throws Exception { 230 TableName tableName = TableName.valueOf("testCanaryRegionTaskResult"); 231 Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY }); 232 // insert some test rows 233 for (int i = 0; i < 1000; i++) { 234 byte[] iBytes = Bytes.toBytes(i); 235 Put p = new Put(iBytes); 236 p.addColumn(FAMILY, COLUMN, iBytes); 237 table.put(p); 238 } 239 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 240 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 241 CanaryTool canary = new CanaryTool(executor, sink); 242 String[] args = { "-writeSniffing", "-t", "10000", "testCanaryRegionTaskResult" }; 243 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 244 245 assertTrue("canary should expect to scan at least 1 region", 246 sink.getTotalExpectedRegions() > 0); 247 assertTrue("there should be no read failures", sink.getReadFailureCount() == 0); 248 assertTrue("there should be no write failures", sink.getWriteFailureCount() == 0); 249 assertTrue("verify read success count > 0", sink.getReadSuccessCount() > 0); 250 assertTrue("verify write success count > 0", sink.getWriteSuccessCount() > 0); 251 verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class), 252 isA(ColumnFamilyDescriptor.class), anyLong()); 253 verify(sink, atLeastOnce()).publishWriteTiming(isA(ServerName.class), isA(RegionInfo.class), 254 isA(ColumnFamilyDescriptor.class), anyLong()); 255 256 assertEquals("canary region success count should equal total expected regions", 257 sink.getReadSuccessCount() + sink.getWriteSuccessCount(), sink.getTotalExpectedRegions()); 258 Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap(); 259 assertFalse("verify region map has size > 0", regionMap.isEmpty()); 260 261 for (String regionName : regionMap.keySet()) { 262 for (CanaryTool.RegionTaskResult res : regionMap.get(regionName)) { 263 assertNotNull("verify getRegionNameAsString()", regionName); 264 assertNotNull("verify getRegionInfo()", res.getRegionInfo()); 265 assertNotNull("verify getTableName()", res.getTableName()); 266 assertNotNull("verify getTableNameAsString()", res.getTableNameAsString()); 267 assertNotNull("verify getServerName()", res.getServerName()); 268 assertNotNull("verify getServerNameAsString()", res.getServerNameAsString()); 269 assertNotNull("verify getColumnFamily()", res.getColumnFamily()); 270 assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString()); 271 272 if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) { 273 assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess()); 274 assertTrue("write took some time", res.getWriteLatency() > -1); 275 } else { 276 assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess()); 277 assertTrue("read took some time", res.getReadLatency() > -1); 278 } 279 } 280 } 281 } 282 283 // Ignore this test. It fails w/ the below on some mac os x. 284 // [ERROR] Failures: 285 // [ERROR] TestCanaryTool.testReadTableTimeouts:216 286 // Argument(s) are different! Wanted: 287 // mockAppender.doAppend( 288 // <custom argument matcher> 289 // ); 290 // -> at org.apache.hadoop.hbase.tool.TestCanaryTool 291 // .testReadTableTimeouts(TestCanaryTool.java:216) 292 // Actual invocations have different arguments: 293 // mockAppender.doAppend( 294 // org.apache.log4j.spi.LoggingEvent@2055cfc1 295 // ); 296 // ) 297 // ) 298 // 299 @org.junit.Ignore 300 @Test 301 public void testReadTableTimeouts() throws Exception { 302 final TableName[] tableNames = new TableName[] { TableName.valueOf(name.getMethodName() + "1"), 303 TableName.valueOf(name.getMethodName() + "2") }; 304 // Create 2 test tables. 305 for (int j = 0; j < 2; j++) { 306 Table table = testingUtility.createTable(tableNames[j], new byte[][] { FAMILY }); 307 // insert some test rows 308 for (int i = 0; i < 10; i++) { 309 byte[] iBytes = Bytes.toBytes(i + j); 310 Put p = new Put(iBytes); 311 p.addColumn(FAMILY, COLUMN, iBytes); 312 table.put(p); 313 } 314 } 315 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 316 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 317 CanaryTool canary = new CanaryTool(executor, sink); 318 String configuredTimeoutStr = tableNames[0].getNameAsString() + "=" + Long.MAX_VALUE + "," 319 + tableNames[1].getNameAsString() + "=0"; 320 String[] args = { "-readTableTimeouts", configuredTimeoutStr, name.getMethodName() + "1", 321 name.getMethodName() + "2" }; 322 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 323 verify(sink, times(tableNames.length)).initializeAndGetReadLatencyForTable(isA(String.class)); 324 for (int i = 0; i < 2; i++) { 325 assertNotEquals("verify non-null read latency", null, 326 sink.getReadLatencyMap().get(tableNames[i].getNameAsString())); 327 assertNotEquals("verify non-zero read latency", 0L, 328 sink.getReadLatencyMap().get(tableNames[i].getNameAsString())); 329 } 330 // One table's timeout is set for 0 ms and thus, should lead to an error. 331 verify(mockAppender, times(1)) 332 .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() { 333 @Override 334 public boolean matches(org.apache.logging.log4j.core.LogEvent argument) { 335 return argument.getMessage().getFormattedMessage() 336 .contains("exceeded the configured read timeout."); 337 } 338 })); 339 verify(mockAppender, times(2)) 340 .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() { 341 @Override 342 public boolean matches(org.apache.logging.log4j.core.LogEvent argument) { 343 return argument.getMessage().getFormattedMessage().contains("Configured read timeout"); 344 } 345 })); 346 } 347 348 @Test 349 public void testWriteTableTimeout() throws Exception { 350 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 351 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 352 CanaryTool canary = new CanaryTool(executor, sink); 353 String[] args = { "-writeSniffing", "-writeTableTimeout", String.valueOf(Long.MAX_VALUE) }; 354 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 355 assertNotEquals("verify non-null write latency", null, sink.getWriteLatency()); 356 assertNotEquals("verify non-zero write latency", 0L, sink.getWriteLatency()); 357 verify(mockAppender, times(1)) 358 .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() { 359 @Override 360 public boolean matches(org.apache.logging.log4j.core.LogEvent argument) { 361 return argument.getMessage().getFormattedMessage().contains("Configured write timeout"); 362 } 363 })); 364 } 365 366 // no table created, so there should be no regions 367 @Test 368 public void testRegionserverNoRegions() throws Exception { 369 runRegionserverCanary(); 370 verify(mockAppender) 371 .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() { 372 @Override 373 public boolean matches(org.apache.logging.log4j.core.LogEvent argument) { 374 return argument.getMessage().getFormattedMessage() 375 .contains("Regionserver not serving any regions"); 376 } 377 })); 378 } 379 380 // by creating a table, there shouldn't be any region servers not serving any regions 381 @Test 382 public void testRegionserverWithRegions() throws Exception { 383 final TableName tableName = TableName.valueOf(name.getMethodName()); 384 testingUtility.createTable(tableName, new byte[][] { FAMILY }); 385 runRegionserverCanary(); 386 verify(mockAppender, never()) 387 .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() { 388 @Override 389 public boolean matches(org.apache.logging.log4j.core.LogEvent argument) { 390 return argument.getMessage().getFormattedMessage() 391 .contains("Regionserver not serving any regions"); 392 } 393 })); 394 } 395 396 @Test 397 public void testRawScanConfig() throws Exception { 398 final TableName tableName = TableName.valueOf(name.getMethodName()); 399 Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY }); 400 // insert some test rows 401 for (int i = 0; i < 1000; i++) { 402 byte[] iBytes = Bytes.toBytes(i); 403 Put p = new Put(iBytes); 404 p.addColumn(FAMILY, COLUMN, iBytes); 405 table.put(p); 406 } 407 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 408 CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink()); 409 CanaryTool canary = new CanaryTool(executor, sink); 410 String[] args = { "-t", "10000", name.getMethodName() }; 411 org.apache.hadoop.conf.Configuration conf = 412 new org.apache.hadoop.conf.Configuration(testingUtility.getConfiguration()); 413 conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true); 414 assertEquals(0, ToolRunner.run(conf, canary, args)); 415 verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class), 416 isA(ColumnFamilyDescriptor.class), anyLong()); 417 assertEquals("verify no read error count", 0, canary.getReadFailures().size()); 418 } 419 420 private void runRegionserverCanary() throws Exception { 421 ExecutorService executor = new ScheduledThreadPoolExecutor(1); 422 CanaryTool canary = new CanaryTool(executor, new CanaryTool.RegionServerStdOutSink()); 423 String[] args = { "-t", "10000", "-regionserver" }; 424 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 425 assertEquals("verify no read error count", 0, canary.getReadFailures().size()); 426 } 427 428 private void testZookeeperCanaryWithArgs(String[] args) throws Exception { 429 String hostPort = testingUtility.getZkCluster().getAddress().toString(); 430 testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM, hostPort); 431 ExecutorService executor = new ScheduledThreadPoolExecutor(2); 432 CanaryTool.ZookeeperStdOutSink sink = spy(new CanaryTool.ZookeeperStdOutSink()); 433 CanaryTool canary = new CanaryTool(executor, sink); 434 assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args)); 435 436 String baseZnode = testingUtility.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT, 437 HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT); 438 verify(sink, atLeastOnce()).publishReadTiming(eq(baseZnode), eq(hostPort), anyLong()); 439 } 440}