001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.hbase.HBaseClassTestRule; 027import org.apache.hadoop.hbase.HBaseTestingUtility; 028import org.apache.hadoop.hbase.HRegionInfo; 029import org.apache.hadoop.hbase.MiniHBaseCluster; 030import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer; 031import org.apache.hadoop.hbase.ServerName; 032import org.apache.hadoop.hbase.StartMiniClusterOption; 033import org.apache.hadoop.hbase.Waiter; 034import org.apache.hadoop.hbase.master.assignment.RegionStates; 035import org.apache.hadoop.hbase.regionserver.HRegionServer; 036import org.apache.hadoop.hbase.testclassification.MediumTests; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 039import org.apache.hadoop.hbase.zookeeper.ZKUtil; 040import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 041import org.apache.zookeeper.KeeperException; 042import org.junit.AfterClass; 043import org.junit.BeforeClass; 044import org.junit.ClassRule; 045import org.junit.Test; 046import org.junit.experimental.categories.Category; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050/** 051 * Tests handling of meta-carrying region server failover. 052 */ 053@Category(MediumTests.class) 054public class TestMetaShutdownHandler { 055 private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class); 056 @ClassRule 057 public static final HBaseClassTestRule CLASS_RULE = 058 HBaseClassTestRule.forClass(TestMetaShutdownHandler.class); 059 060 private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 061 final static Configuration conf = TEST_UTIL.getConfiguration(); 062 063 @BeforeClass 064 public static void setUpBeforeClass() throws Exception { 065 StartMiniClusterOption option = StartMiniClusterOption.builder().numRegionServers(3) 066 .rsClass(MyRegionServer.class).numDataNodes(3).build(); 067 TEST_UTIL.startMiniCluster(option); 068 } 069 070 @AfterClass 071 public static void tearDownAfterClass() throws Exception { 072 TEST_UTIL.shutdownMiniCluster(); 073 } 074 075 /** 076 * This test will test the expire handling of a meta-carrying region server. After 077 * HBaseMiniCluster is up, we will delete the ephemeral node of the meta-carrying region server, 078 * which will trigger the expire of this region server on the master. On the other hand, we will 079 * slow down the abort process on the region server so that it is still up during the master SSH. 080 * We will check that the master SSH is still successfully done. 081 */ 082 @Test 083 public void testExpireMetaRegionServer() throws Exception { 084 MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); 085 HMaster master = cluster.getMaster(); 086 RegionStates regionStates = master.getAssignmentManager().getRegionStates(); 087 ServerName metaServerName = 088 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 089 if ( 090 master.getServerName().equals(metaServerName) || metaServerName == null 091 || !metaServerName.equals(cluster.getServerHoldingMeta()) 092 ) { 093 // Move meta off master 094 metaServerName = 095 cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName(); 096 master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), 097 Bytes.toBytes(metaServerName.getServerName())); 098 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 099 metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO); 100 } 101 RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 102 assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState()); 103 assertNotEquals("Meta is on master!", metaServerName, master.getServerName()); 104 HRegionServer metaRegionServer = cluster.getRegionServer(metaServerName); 105 106 // Delete the ephemeral node of the meta-carrying region server. 107 // This is trigger the expire of this region server on the master. 108 String rsEphemeralNodePath = ZNodePaths.joinZNode(master.getZooKeeper().getZNodePaths().rsZNode, 109 metaServerName.toString()); 110 ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath); 111 LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH"); 112 // Wait for SSH to finish 113 final ServerManager serverManager = master.getServerManager(); 114 final ServerName priorMetaServerName = metaServerName; 115 TEST_UTIL.waitFor(60000, 100, () -> metaRegionServer.isStopped()); 116 TEST_UTIL.waitFor(120000, 200, new Waiter.Predicate<Exception>() { 117 @Override 118 public boolean evaluate() throws Exception { 119 return !serverManager.isServerOnline(priorMetaServerName) 120 && !serverManager.areDeadServersInProgress(); 121 } 122 }); 123 LOG.info("Past wait on RIT"); 124 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 125 // Now, make sure meta is assigned 126 assertTrue("Meta should be assigned", 127 regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); 128 // Now, make sure meta is registered in zk 129 metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); 130 assertEquals("Meta should not be in transition", RegionState.State.OPEN, metaState.getState()); 131 assertEquals("Meta should be assigned", metaState.getServerName(), 132 regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); 133 assertNotEquals("Meta should be assigned on a different server", metaState.getServerName(), 134 metaServerName); 135 } 136 137 public static class MyRegionServer extends MiniHBaseClusterRegionServer { 138 139 public MyRegionServer(Configuration conf) 140 throws IOException, KeeperException, InterruptedException { 141 super(conf); 142 } 143 144 @Override 145 public void abort(String reason, Throwable cause) { 146 // sleep to slow down the region server abort 147 try { 148 Thread.sleep(30 * 1000); 149 } catch (InterruptedException e) { 150 return; 151 } 152 super.abort(reason, cause); 153 } 154 } 155}