001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.tool;
019
020import static org.apache.hadoop.hbase.regionserver.TestRegionServerNoMaster.closeRegion;
021import static org.junit.Assert.assertEquals;
022import static org.junit.Assert.assertFalse;
023import static org.junit.Assert.assertNotEquals;
024import static org.junit.Assert.assertNotNull;
025import static org.junit.Assert.assertTrue;
026import static org.mockito.ArgumentMatchers.anyLong;
027import static org.mockito.ArgumentMatchers.argThat;
028import static org.mockito.ArgumentMatchers.eq;
029import static org.mockito.ArgumentMatchers.isA;
030import static org.mockito.Mockito.atLeastOnce;
031import static org.mockito.Mockito.mock;
032import static org.mockito.Mockito.never;
033import static org.mockito.Mockito.spy;
034import static org.mockito.Mockito.times;
035import static org.mockito.Mockito.verify;
036import static org.mockito.Mockito.when;
037
038import java.util.List;
039import java.util.Map;
040import java.util.concurrent.ExecutorService;
041import java.util.concurrent.ScheduledThreadPoolExecutor;
042import java.util.concurrent.TimeUnit;
043import org.apache.hadoop.conf.Configuration;
044import org.apache.hadoop.hbase.HBaseClassTestRule;
045import org.apache.hadoop.hbase.HBaseConfiguration;
046import org.apache.hadoop.hbase.HBaseTestingUtil;
047import org.apache.hadoop.hbase.HConstants;
048import org.apache.hadoop.hbase.ServerName;
049import org.apache.hadoop.hbase.TableName;
050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
051import org.apache.hadoop.hbase.client.Put;
052import org.apache.hadoop.hbase.client.RegionInfo;
053import org.apache.hadoop.hbase.client.Table;
054import org.apache.hadoop.hbase.regionserver.HRegionServer;
055import org.apache.hadoop.hbase.testclassification.LargeTests;
056import org.apache.hadoop.hbase.util.Bytes;
057import org.apache.hadoop.util.ToolRunner;
058import org.junit.After;
059import org.junit.Before;
060import org.junit.ClassRule;
061import org.junit.Rule;
062import org.junit.Test;
063import org.junit.experimental.categories.Category;
064import org.junit.rules.TestName;
065import org.mockito.ArgumentMatcher;
066
067@Category({ LargeTests.class })
068public class TestCanaryTool {
069
070  @ClassRule
071  public static final HBaseClassTestRule CLASS_RULE =
072    HBaseClassTestRule.forClass(TestCanaryTool.class);
073
074  private HBaseTestingUtil testingUtility;
075  private static final byte[] FAMILY = Bytes.toBytes("f");
076  private static final byte[] COLUMN = Bytes.toBytes("col");
077
078  @Rule
079  public TestName name = new TestName();
080
081  private org.apache.logging.log4j.core.Appender mockAppender;
082
083  @Before
084  public void setUp() throws Exception {
085    testingUtility = new HBaseTestingUtil();
086    testingUtility.startMiniCluster();
087    mockAppender = mock(org.apache.logging.log4j.core.Appender.class);
088    when(mockAppender.getName()).thenReturn("mockAppender");
089    when(mockAppender.isStarted()).thenReturn(true);
090    ((org.apache.logging.log4j.core.Logger) org.apache.logging.log4j.LogManager
091      .getLogger("org.apache.hadoop.hbase")).addAppender(mockAppender);
092  }
093
094  @After
095  public void tearDown() throws Exception {
096    testingUtility.shutdownMiniCluster();
097    ((org.apache.logging.log4j.core.Logger) org.apache.logging.log4j.LogManager
098      .getLogger("org.apache.hadoop.hbase")).removeAppender(mockAppender);
099  }
100
101  @Test
102  public void testBasicZookeeperCanaryWorks() throws Exception {
103    final String[] args = { "-t", "10000", "-zookeeper" };
104    testZookeeperCanaryWithArgs(args);
105  }
106
107  @Test
108  public void testZookeeperCanaryPermittedFailuresArgumentWorks() throws Exception {
109    final String[] args =
110      { "-t", "10000", "-zookeeper", "-treatFailureAsError", "-permittedZookeeperFailures", "1" };
111    testZookeeperCanaryWithArgs(args);
112  }
113
114  @Test
115  public void testBasicCanaryWorks() throws Exception {
116    final TableName tableName = TableName.valueOf(name.getMethodName());
117    Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY });
118    // insert some test rows
119    for (int i = 0; i < 1000; i++) {
120      byte[] iBytes = Bytes.toBytes(i);
121      Put p = new Put(iBytes);
122      p.addColumn(FAMILY, COLUMN, iBytes);
123      table.put(p);
124    }
125    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
126    CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
127    CanaryTool canary = new CanaryTool(executor, sink);
128    String[] args = { "-writeSniffing", "-t", "10000", tableName.getNameAsString() };
129    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
130    assertEquals("verify no read error count", 0, canary.getReadFailures().size());
131    assertEquals("verify no write error count", 0, canary.getWriteFailures().size());
132    verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
133      isA(ColumnFamilyDescriptor.class), anyLong());
134  }
135
136  /**
137   * When CanaryTool times out, it should stop scanning and shutdown quickly and gracefully. This
138   * test helps to confirm that threadpools do not continue executing work after the canary
139   * finishes. It also verifies sink behavior and measures correct failure counts in the sink.
140   * @throws Exception if it can't create a table, communicate with minicluster, or run the canary.
141   */
142  @Test
143  public void testCanaryStopsScanningAfterTimeout() throws Exception {
144    // Prepare a table with multiple regions, and close those regions on the regionserver.
145    // Do not notify HMaster or META. CanaryTool will scan and receive NotServingRegionExceptions.
146    final TableName tableName = TableName.valueOf(name.getMethodName());
147    // Close the unused Table reference returned by createMultiRegionTable.
148    testingUtility.createMultiRegionTable(tableName, new byte[][] { FAMILY }).close();
149    List<RegionInfo> regions = testingUtility.getAdmin().getRegions(tableName);
150    assertTrue("verify table has multiple regions", regions.size() > 1);
151    HRegionServer regionserver = testingUtility.getMiniHBaseCluster().getRegionServer(0);
152    for (RegionInfo region : regions) {
153      closeRegion(testingUtility, regionserver, region);
154    }
155
156    // Run CanaryTool with 1 thread. This thread will attempt to scan the first region.
157    // It will use default rpc retries and receive NotServingRegionExceptions for many seconds
158    // according to HConstants.RETRY_BACKOFF. The CanaryTool timeout is set to 4 seconds, so it
159    // will time out before the first region scan is complete.
160    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
161    CanaryTool canary = new CanaryTool(executor);
162    String[] args = { "-t", "4000", tableName.getNameAsString() };
163    int retCode = ToolRunner.run(testingUtility.getConfiguration(), canary, args);
164    executor.shutdown();
165    try {
166      if (!executor.awaitTermination(3, TimeUnit.SECONDS)) {
167        executor.shutdownNow();
168      }
169    } catch (InterruptedException e) {
170      executor.shutdownNow();
171    }
172
173    CanaryTool.Sink sink = canary.getActiveSink();
174    assertEquals("verify canary timed out with TIMEOUT_ERROR_EXIT_CODE", 3, retCode);
175    assertEquals("verify only the first region failed", 1, sink.getReadFailureCount());
176    assertEquals("verify no successful reads", 0, sink.getReadSuccessCount());
177    assertEquals("verify we were attempting to scan all regions", regions.size(),
178      ((CanaryTool.RegionStdOutSink) sink).getTotalExpectedRegions());
179  }
180
181  @Test
182  public void testCanaryRegionTaskReadAllCF() throws Exception {
183    final TableName tableName = TableName.valueOf(name.getMethodName());
184    Table table = testingUtility.createTable(tableName,
185      new byte[][] { Bytes.toBytes("f1"), Bytes.toBytes("f2") });
186    // insert some test rows
187    for (int i = 0; i < 1000; i++) {
188      byte[] iBytes = Bytes.toBytes(i);
189      Put p = new Put(iBytes);
190      p.addColumn(Bytes.toBytes("f1"), COLUMN, iBytes);
191      p.addColumn(Bytes.toBytes("f2"), COLUMN, iBytes);
192      table.put(p);
193    }
194    Configuration configuration = HBaseConfiguration.create(testingUtility.getConfiguration());
195    String[] args = { "-t", "10000", "testCanaryRegionTaskReadAllCF" };
196    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
197    for (boolean readAllCF : new boolean[] { true, false }) {
198      CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
199      CanaryTool canary = new CanaryTool(executor, sink);
200      configuration.setBoolean(HConstants.HBASE_CANARY_READ_ALL_CF, readAllCF);
201      assertEquals(0, ToolRunner.run(configuration, canary, args));
202      // the test table has two column family. If readAllCF set true,
203      // we expect read count is double of region count
204      int expectedReadCount =
205        readAllCF ? 2 * sink.getTotalExpectedRegions() : sink.getTotalExpectedRegions();
206      assertEquals("canary region success count should equal total expected read count",
207        expectedReadCount, sink.getReadSuccessCount());
208      Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap();
209      assertFalse("verify region map has size > 0", regionMap.isEmpty());
210
211      for (String regionName : regionMap.keySet()) {
212        for (CanaryTool.RegionTaskResult res : regionMap.get(regionName)) {
213          assertNotNull("verify getRegionNameAsString()", regionName);
214          assertNotNull("verify getRegionInfo()", res.getRegionInfo());
215          assertNotNull("verify getTableName()", res.getTableName());
216          assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
217          assertNotNull("verify getServerName()", res.getServerName());
218          assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
219          assertNotNull("verify getColumnFamily()", res.getColumnFamily());
220          assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString());
221          assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
222          assertTrue("read took some time", res.getReadLatency() > -1);
223        }
224      }
225    }
226  }
227
228  @Test
229  public void testCanaryRegionTaskResult() throws Exception {
230    TableName tableName = TableName.valueOf("testCanaryRegionTaskResult");
231    Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY });
232    // insert some test rows
233    for (int i = 0; i < 1000; i++) {
234      byte[] iBytes = Bytes.toBytes(i);
235      Put p = new Put(iBytes);
236      p.addColumn(FAMILY, COLUMN, iBytes);
237      table.put(p);
238    }
239    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
240    CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
241    CanaryTool canary = new CanaryTool(executor, sink);
242    String[] args = { "-writeSniffing", "-t", "10000", "testCanaryRegionTaskResult" };
243    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
244
245    assertTrue("canary should expect to scan at least 1 region",
246      sink.getTotalExpectedRegions() > 0);
247    assertTrue("there should be no read failures", sink.getReadFailureCount() == 0);
248    assertTrue("there should be no write failures", sink.getWriteFailureCount() == 0);
249    assertTrue("verify read success count > 0", sink.getReadSuccessCount() > 0);
250    assertTrue("verify write success count > 0", sink.getWriteSuccessCount() > 0);
251    verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
252      isA(ColumnFamilyDescriptor.class), anyLong());
253    verify(sink, atLeastOnce()).publishWriteTiming(isA(ServerName.class), isA(RegionInfo.class),
254      isA(ColumnFamilyDescriptor.class), anyLong());
255
256    assertEquals("canary region success count should equal total expected regions",
257      sink.getReadSuccessCount() + sink.getWriteSuccessCount(), sink.getTotalExpectedRegions());
258    Map<String, List<CanaryTool.RegionTaskResult>> regionMap = sink.getRegionMap();
259    assertFalse("verify region map has size > 0", regionMap.isEmpty());
260
261    for (String regionName : regionMap.keySet()) {
262      for (CanaryTool.RegionTaskResult res : regionMap.get(regionName)) {
263        assertNotNull("verify getRegionNameAsString()", regionName);
264        assertNotNull("verify getRegionInfo()", res.getRegionInfo());
265        assertNotNull("verify getTableName()", res.getTableName());
266        assertNotNull("verify getTableNameAsString()", res.getTableNameAsString());
267        assertNotNull("verify getServerName()", res.getServerName());
268        assertNotNull("verify getServerNameAsString()", res.getServerNameAsString());
269        assertNotNull("verify getColumnFamily()", res.getColumnFamily());
270        assertNotNull("verify getColumnFamilyNameAsString()", res.getColumnFamilyNameAsString());
271
272        if (regionName.contains(CanaryTool.DEFAULT_WRITE_TABLE_NAME.getNameAsString())) {
273          assertTrue("write to region " + regionName + " succeeded", res.isWriteSuccess());
274          assertTrue("write took some time", res.getWriteLatency() > -1);
275        } else {
276          assertTrue("read from region " + regionName + " succeeded", res.isReadSuccess());
277          assertTrue("read took some time", res.getReadLatency() > -1);
278        }
279      }
280    }
281  }
282
283  // Ignore this test. It fails w/ the below on some mac os x.
284  // [ERROR] Failures:
285  // [ERROR] TestCanaryTool.testReadTableTimeouts:216
286  // Argument(s) are different! Wanted:
287  // mockAppender.doAppend(
288  // <custom argument matcher>
289  // );
290  // -> at org.apache.hadoop.hbase.tool.TestCanaryTool
291  // .testReadTableTimeouts(TestCanaryTool.java:216)
292  // Actual invocations have different arguments:
293  // mockAppender.doAppend(
294  // org.apache.log4j.spi.LoggingEvent@2055cfc1
295  // );
296  // )
297  // )
298  //
299  @org.junit.Ignore
300  @Test
301  public void testReadTableTimeouts() throws Exception {
302    final TableName[] tableNames = new TableName[] { TableName.valueOf(name.getMethodName() + "1"),
303      TableName.valueOf(name.getMethodName() + "2") };
304    // Create 2 test tables.
305    for (int j = 0; j < 2; j++) {
306      Table table = testingUtility.createTable(tableNames[j], new byte[][] { FAMILY });
307      // insert some test rows
308      for (int i = 0; i < 10; i++) {
309        byte[] iBytes = Bytes.toBytes(i + j);
310        Put p = new Put(iBytes);
311        p.addColumn(FAMILY, COLUMN, iBytes);
312        table.put(p);
313      }
314    }
315    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
316    CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
317    CanaryTool canary = new CanaryTool(executor, sink);
318    String configuredTimeoutStr = tableNames[0].getNameAsString() + "=" + Long.MAX_VALUE + ","
319      + tableNames[1].getNameAsString() + "=0";
320    String[] args = { "-readTableTimeouts", configuredTimeoutStr, name.getMethodName() + "1",
321      name.getMethodName() + "2" };
322    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
323    verify(sink, times(tableNames.length)).initializeAndGetReadLatencyForTable(isA(String.class));
324    for (int i = 0; i < 2; i++) {
325      assertNotEquals("verify non-null read latency", null,
326        sink.getReadLatencyMap().get(tableNames[i].getNameAsString()));
327      assertNotEquals("verify non-zero read latency", 0L,
328        sink.getReadLatencyMap().get(tableNames[i].getNameAsString()));
329    }
330    // One table's timeout is set for 0 ms and thus, should lead to an error.
331    verify(mockAppender, times(1))
332      .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() {
333        @Override
334        public boolean matches(org.apache.logging.log4j.core.LogEvent argument) {
335          return argument.getMessage().getFormattedMessage()
336            .contains("exceeded the configured read timeout.");
337        }
338      }));
339    verify(mockAppender, times(2))
340      .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() {
341        @Override
342        public boolean matches(org.apache.logging.log4j.core.LogEvent argument) {
343          return argument.getMessage().getFormattedMessage().contains("Configured read timeout");
344        }
345      }));
346  }
347
348  @Test
349  public void testWriteTableTimeout() throws Exception {
350    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
351    CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
352    CanaryTool canary = new CanaryTool(executor, sink);
353    String[] args = { "-writeSniffing", "-writeTableTimeout", String.valueOf(Long.MAX_VALUE) };
354    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
355    assertNotEquals("verify non-null write latency", null, sink.getWriteLatency());
356    assertNotEquals("verify non-zero write latency", 0L, sink.getWriteLatency());
357    verify(mockAppender, times(1))
358      .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() {
359        @Override
360        public boolean matches(org.apache.logging.log4j.core.LogEvent argument) {
361          return argument.getMessage().getFormattedMessage().contains("Configured write timeout");
362        }
363      }));
364  }
365
366  // no table created, so there should be no regions
367  @Test
368  public void testRegionserverNoRegions() throws Exception {
369    runRegionserverCanary();
370    verify(mockAppender)
371      .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() {
372        @Override
373        public boolean matches(org.apache.logging.log4j.core.LogEvent argument) {
374          return argument.getMessage().getFormattedMessage()
375            .contains("Regionserver not serving any regions");
376        }
377      }));
378  }
379
380  // by creating a table, there shouldn't be any region servers not serving any regions
381  @Test
382  public void testRegionserverWithRegions() throws Exception {
383    final TableName tableName = TableName.valueOf(name.getMethodName());
384    testingUtility.createTable(tableName, new byte[][] { FAMILY });
385    runRegionserverCanary();
386    verify(mockAppender, never())
387      .append(argThat(new ArgumentMatcher<org.apache.logging.log4j.core.LogEvent>() {
388        @Override
389        public boolean matches(org.apache.logging.log4j.core.LogEvent argument) {
390          return argument.getMessage().getFormattedMessage()
391            .contains("Regionserver not serving any regions");
392        }
393      }));
394  }
395
396  @Test
397  public void testRawScanConfig() throws Exception {
398    final TableName tableName = TableName.valueOf(name.getMethodName());
399    Table table = testingUtility.createTable(tableName, new byte[][] { FAMILY });
400    // insert some test rows
401    for (int i = 0; i < 1000; i++) {
402      byte[] iBytes = Bytes.toBytes(i);
403      Put p = new Put(iBytes);
404      p.addColumn(FAMILY, COLUMN, iBytes);
405      table.put(p);
406    }
407    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
408    CanaryTool.RegionStdOutSink sink = spy(new CanaryTool.RegionStdOutSink());
409    CanaryTool canary = new CanaryTool(executor, sink);
410    String[] args = { "-t", "10000", name.getMethodName() };
411    org.apache.hadoop.conf.Configuration conf =
412      new org.apache.hadoop.conf.Configuration(testingUtility.getConfiguration());
413    conf.setBoolean(HConstants.HBASE_CANARY_READ_RAW_SCAN_KEY, true);
414    assertEquals(0, ToolRunner.run(conf, canary, args));
415    verify(sink, atLeastOnce()).publishReadTiming(isA(ServerName.class), isA(RegionInfo.class),
416      isA(ColumnFamilyDescriptor.class), anyLong());
417    assertEquals("verify no read error count", 0, canary.getReadFailures().size());
418  }
419
420  private void runRegionserverCanary() throws Exception {
421    ExecutorService executor = new ScheduledThreadPoolExecutor(1);
422    CanaryTool canary = new CanaryTool(executor, new CanaryTool.RegionServerStdOutSink());
423    String[] args = { "-t", "10000", "-regionserver" };
424    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
425    assertEquals("verify no read error count", 0, canary.getReadFailures().size());
426  }
427
428  private void testZookeeperCanaryWithArgs(String[] args) throws Exception {
429    String hostPort = testingUtility.getZkCluster().getAddress().toString();
430    testingUtility.getConfiguration().set(HConstants.ZOOKEEPER_QUORUM, hostPort);
431    ExecutorService executor = new ScheduledThreadPoolExecutor(2);
432    CanaryTool.ZookeeperStdOutSink sink = spy(new CanaryTool.ZookeeperStdOutSink());
433    CanaryTool canary = new CanaryTool(executor, sink);
434    assertEquals(0, ToolRunner.run(testingUtility.getConfiguration(), canary, args));
435
436    String baseZnode = testingUtility.getConfiguration().get(HConstants.ZOOKEEPER_ZNODE_PARENT,
437      HConstants.DEFAULT_ZOOKEEPER_ZNODE_PARENT);
438    verify(sink, atLeastOnce()).publishReadTiming(eq(baseZnode), eq(hostPort), anyLong());
439  }
440}