001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.fail; 021 022import java.io.IOException; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.CompatibilityFactory; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.MiniHBaseCluster; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 031import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder; 032import org.apache.hadoop.hbase.client.Put; 033import org.apache.hadoop.hbase.client.Table; 034import org.apache.hadoop.hbase.client.TableDescriptor; 035import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 036import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 037import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 038import org.apache.hadoop.hbase.test.MetricsAssertHelper; 039import org.apache.hadoop.hbase.testclassification.MasterTests; 040import org.apache.hadoop.hbase.testclassification.MediumTests; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.apache.hadoop.hbase.util.TableDescriptorChecker; 043import org.junit.AfterClass; 044import org.junit.BeforeClass; 045import org.junit.ClassRule; 046import org.junit.Rule; 047import org.junit.Test; 048import org.junit.experimental.categories.Category; 049import org.junit.rules.TestName; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053@Category({ MasterTests.class, MediumTests.class }) 054public class TestAssignmentManagerMetrics { 055 056 @ClassRule 057 public static final HBaseClassTestRule CLASS_RULE = 058 HBaseClassTestRule.forClass(TestAssignmentManagerMetrics.class); 059 060 private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class); 061 private static final MetricsAssertHelper METRICS_HELPER = 062 CompatibilityFactory.getInstance(MetricsAssertHelper.class); 063 064 private static MiniHBaseCluster CLUSTER; 065 private static HMaster MASTER; 066 private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 067 private static final int MSG_INTERVAL = 1000; 068 069 @Rule 070 public TestName name = new TestName(); 071 072 @BeforeClass 073 public static void startCluster() throws Exception { 074 LOG.info("Starting cluster"); 075 Configuration conf = TEST_UTIL.getConfiguration(); 076 077 // Enable sanity check for coprocessor, so that region reopen fails on the RS 078 conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true); 079 080 // set RIT stuck warning threshold to a small value 081 conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20); 082 083 // set msgInterval to 1 second 084 conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL); 085 086 // set tablesOnMaster to none 087 conf.set("hbase.balancer.tablesOnMaster", "none"); 088 089 // set client sync wait timeout to 5sec 090 conf.setInt("hbase.client.sync.wait.timeout.msec", 5000); 091 conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); 092 conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500); 093 // set a small interval for updating rit metrics 094 conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL); 095 // set a small assign attempts for avoiding assert when retrying. (HBASE-20533) 096 conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3); 097 098 // keep rs online so it can report the failed opens. 099 conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false); 100 TEST_UTIL.startMiniCluster(1); 101 CLUSTER = TEST_UTIL.getHBaseCluster(); 102 MASTER = CLUSTER.getMaster(); 103 // Disable sanity check for coprocessor, so that modify table runs on the HMaster 104 MASTER.getConfiguration().setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false); 105 } 106 107 @AfterClass 108 public static void after() throws Exception { 109 LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL); 110 TEST_UTIL.shutdownMiniCluster(); 111 } 112 113 @Test 114 public void testRITAssignmentManagerMetrics() throws Exception { 115 final TableName TABLENAME = TableName.valueOf(name.getMethodName()); 116 final byte[] FAMILY = Bytes.toBytes("family"); 117 try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)) { 118 final byte[] row = Bytes.toBytes("row"); 119 final byte[] qualifier = Bytes.toBytes("qualifier"); 120 final byte[] value = Bytes.toBytes("value"); 121 122 Put put = new Put(row); 123 put.addColumn(FAMILY, qualifier, value); 124 table.put(put); 125 126 // Sleep 3 seconds, wait for doMetrics chore catching up 127 Thread.sleep(MSG_INTERVAL * 3); 128 129 // check the RIT is 0 130 MetricsAssignmentManagerSource amSource = 131 MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource(); 132 133 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource); 134 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0, 135 amSource); 136 137 // alter table with a non-existing coprocessor 138 TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME) 139 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) 140 .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver") 141 .setJarPath("hdfs:///foo.jar").setPriority(1001).setProperty("arg1", "1") 142 .setProperty("arg2", "2").build()) 143 .build(); 144 try { 145 TEST_UTIL.getAdmin().modifyTable(htd); 146 fail("Expected region failed to open"); 147 } catch (IOException e) { 148 // expected, the RS will crash and the assignment will spin forever waiting for a RS 149 // to assign the region. the region will not go to FAILED_OPEN because in this case 150 // we have just one RS and it will do one retry. 151 LOG.info("Expected error", e); 152 } 153 154 // Sleep 5 seconds, wait for doMetrics chore catching up 155 // the rit count consists of rit and failed opens. see RegionInTransitionStat#update 156 // Waiting for the completion of rit makes the assert stable. 157 TEST_UTIL.waitUntilNoRegionsInTransition(); 158 Thread.sleep(MSG_INTERVAL * 5); 159 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource); 160 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1, 161 amSource); 162 METRICS_HELPER.assertCounter( 163 MetricsAssignmentManagerSource.ASSIGN_METRIC_PREFIX + "SubmittedCount", 3, amSource); 164 } 165 } 166}