001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.fail;
021
022import java.io.IOException;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.CompatibilityFactory;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtil;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
031import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
037import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
038import org.apache.hadoop.hbase.test.MetricsAssertHelper;
039import org.apache.hadoop.hbase.testclassification.MasterTests;
040import org.apache.hadoop.hbase.testclassification.MediumTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.apache.hadoop.hbase.util.TableDescriptorChecker;
043import org.junit.AfterClass;
044import org.junit.BeforeClass;
045import org.junit.ClassRule;
046import org.junit.Rule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049import org.junit.rules.TestName;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053@Category({ MasterTests.class, MediumTests.class })
054public class TestAssignmentManagerMetrics {
055
056  @ClassRule
057  public static final HBaseClassTestRule CLASS_RULE =
058    HBaseClassTestRule.forClass(TestAssignmentManagerMetrics.class);
059
060  private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class);
061  private static final MetricsAssertHelper METRICS_HELPER =
062    CompatibilityFactory.getInstance(MetricsAssertHelper.class);
063
064  private static SingleProcessHBaseCluster CLUSTER;
065  private static HMaster MASTER;
066  private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
067  private static final int MSG_INTERVAL = 1000;
068
069  @Rule
070  public TestName name = new TestName();
071
072  @BeforeClass
073  public static void startCluster() throws Exception {
074    LOG.info("Starting cluster");
075    Configuration conf = TEST_UTIL.getConfiguration();
076
077    // Enable sanity check for coprocessor, so that region reopen fails on the RS
078    conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);
079
080    // set RIT stuck warning threshold to a small value
081    conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20);
082
083    // set msgInterval to 1 second
084    conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL);
085
086    // set tablesOnMaster to none
087    conf.set("hbase.balancer.tablesOnMaster", "none");
088
089    // set client sync wait timeout to 5sec
090    conf.setInt("hbase.client.sync.wait.timeout.msec", 5000);
091    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
092    conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500);
093    // set a small interval for updating rit metrics
094    conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL);
095    // set a small assign attempts for avoiding assert when retrying. (HBASE-20533)
096    conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3);
097
098    // keep rs online so it can report the failed opens.
099    conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false);
100    TEST_UTIL.startMiniCluster(1);
101    CLUSTER = TEST_UTIL.getHBaseCluster();
102    MASTER = CLUSTER.getMaster();
103    // Disable sanity check for coprocessor, so that modify table runs on the HMaster
104    MASTER.getConfiguration().setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false);
105  }
106
107  @AfterClass
108  public static void after() throws Exception {
109    LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL);
110    TEST_UTIL.shutdownMiniCluster();
111  }
112
113  @Test
114  public void testRITAssignmentManagerMetrics() throws Exception {
115    final TableName TABLENAME = TableName.valueOf(name.getMethodName());
116    final byte[] FAMILY = Bytes.toBytes("family");
117    try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)) {
118      final byte[] row = Bytes.toBytes("row");
119      final byte[] qualifier = Bytes.toBytes("qualifier");
120      final byte[] value = Bytes.toBytes("value");
121
122      Put put = new Put(row);
123      put.addColumn(FAMILY, qualifier, value);
124      table.put(put);
125
126      // Sleep 3 seconds, wait for doMetrics chore catching up
127      Thread.sleep(MSG_INTERVAL * 3);
128
129      // check the RIT is 0
130      MetricsAssignmentManagerSource amSource =
131        MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource();
132
133      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource);
134      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0,
135        amSource);
136
137      // alter table with a non-existing coprocessor
138      TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
139        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
140        .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver")
141          .setJarPath("hdfs:///foo.jar").setPriority(1001).setProperty("arg1", "1")
142          .setProperty("arg2", "2").build())
143        .build();
144      try {
145        TEST_UTIL.getAdmin().modifyTable(htd);
146        fail("Expected region failed to open");
147      } catch (IOException e) {
148        // expected, the RS will crash and the assignment will spin forever waiting for a RS
149        // to assign the region. the region will not go to FAILED_OPEN because in this case
150        // we have just one RS and it will do one retry.
151        LOG.info("Expected error", e);
152      }
153
154      // Sleep 5 seconds, wait for doMetrics chore catching up
155      // the rit count consists of rit and failed opens. see RegionInTransitionStat#update
156      // Waiting for the completion of rit makes the assert stable.
157      TEST_UTIL.waitUntilNoRegionsInTransition();
158      Thread.sleep(MSG_INTERVAL * 5);
159      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
160      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,
161        amSource);
162      METRICS_HELPER.assertCounter(
163        MetricsAssignmentManagerSource.ASSIGN_METRIC_PREFIX + "SubmittedCount", 2, amSource);
164    }
165  }
166}