001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.fail; 022 023import java.io.IOException; 024import java.util.Arrays; 025import java.util.List; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.hbase.HBaseClassTestRule; 028import org.apache.hadoop.hbase.HBaseTestingUtil; 029import org.apache.hadoop.hbase.HConstants; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; 032import org.apache.hadoop.hbase.YouAreDeadException; 033import org.apache.hadoop.hbase.client.Get; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.hbase.client.Table; 036import org.apache.hadoop.hbase.testclassification.LargeTests; 037import org.apache.hadoop.hbase.testclassification.RegionServerTests; 038import org.apache.hadoop.hbase.util.Bytes; 039import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; 040import org.apache.hadoop.hbase.wal.AsyncFSWALProvider; 041import org.apache.hadoop.hbase.wal.FSHLogProvider; 042import org.apache.hadoop.hbase.wal.WALFactory; 043import org.apache.hadoop.hbase.wal.WALProvider; 044import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 045import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 046import org.junit.After; 047import org.junit.Before; 048import org.junit.ClassRule; 049import org.junit.Test; 050import org.junit.experimental.categories.Category; 051import org.junit.runner.RunWith; 052import org.junit.runners.Parameterized; 053import org.junit.runners.Parameterized.Parameter; 054import org.junit.runners.Parameterized.Parameters; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058/** 059 * This testcase is used to ensure that the compaction marker will fail a compaction if the RS is 060 * already dead. It can not eliminate FNFE when scanning but it does reduce the possibility a lot. 061 */ 062@RunWith(Parameterized.class) 063@Category({ RegionServerTests.class, LargeTests.class }) 064public class TestCompactionInDeadRegionServer { 065 066 @ClassRule 067 public static final HBaseClassTestRule CLASS_RULE = 068 HBaseClassTestRule.forClass(TestCompactionInDeadRegionServer.class); 069 070 private static final Logger LOG = LoggerFactory.getLogger(TestCompactionInDeadRegionServer.class); 071 072 private static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 073 074 private static final TableName TABLE_NAME = TableName.valueOf("test"); 075 076 private static final byte[] CF = Bytes.toBytes("cf"); 077 078 private static final byte[] CQ = Bytes.toBytes("cq"); 079 080 public static final class IgnoreYouAreDeadRS extends HRegionServer { 081 082 public IgnoreYouAreDeadRS(Configuration conf) throws IOException, InterruptedException { 083 super(conf); 084 } 085 086 @Override 087 protected void tryRegionServerReport(long reportStartTime, long reportEndTime) 088 throws IOException { 089 try { 090 super.tryRegionServerReport(reportStartTime, reportEndTime); 091 } catch (YouAreDeadException e) { 092 // ignore, do not abort 093 } 094 } 095 } 096 097 @Parameter 098 public Class<? extends WALProvider> walProvider; 099 100 @Parameters(name = "{index}: wal={0}") 101 public static List<Object[]> params() { 102 return Arrays.asList(new Object[] { FSHLogProvider.class }, 103 new Object[] { AsyncFSWALProvider.class }); 104 } 105 106 @Before 107 public void setUp() throws Exception { 108 UTIL.getConfiguration().setClass(WALFactory.WAL_PROVIDER, walProvider, WALProvider.class); 109 UTIL.getConfiguration().setInt(HConstants.ZK_SESSION_TIMEOUT, 2000); 110 UTIL.getConfiguration().setClass(HConstants.REGION_SERVER_IMPL, IgnoreYouAreDeadRS.class, 111 HRegionServer.class); 112 UTIL.startMiniCluster(2); 113 Table table = UTIL.createTable(TABLE_NAME, CF); 114 for (int i = 0; i < 10; i++) { 115 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i))); 116 } 117 UTIL.getAdmin().flush(TABLE_NAME); 118 for (int i = 10; i < 20; i++) { 119 table.put(new Put(Bytes.toBytes(i)).addColumn(CF, CQ, Bytes.toBytes(i))); 120 } 121 UTIL.getAdmin().flush(TABLE_NAME); 122 } 123 124 @After 125 public void tearDown() throws Exception { 126 UTIL.shutdownMiniCluster(); 127 } 128 129 @Test 130 public void test() throws Exception { 131 HRegionServer regionSvr = UTIL.getRSForFirstRegionInTable(TABLE_NAME); 132 HRegion region = regionSvr.getRegions(TABLE_NAME).get(0); 133 String regName = region.getRegionInfo().getEncodedName(); 134 List<HRegion> metaRegs = regionSvr.getRegions(TableName.META_TABLE_NAME); 135 if (metaRegs != null && !metaRegs.isEmpty()) { 136 LOG.info("meta is on the same server: " + regionSvr); 137 // when region is on same server as hbase:meta, reassigning meta would abort the server 138 // since WAL is broken. 139 // so the region is moved to a different server 140 HRegionServer otherRs = UTIL.getOtherRegionServer(regionSvr); 141 UTIL.moveRegionAndWait(region.getRegionInfo(), otherRs.getServerName()); 142 LOG.info("Moved region: " + regName + " to " + otherRs.getServerName()); 143 } 144 HRegionServer rsToSuspend = UTIL.getRSForFirstRegionInTable(TABLE_NAME); 145 region = rsToSuspend.getRegions(TABLE_NAME).get(0); 146 147 ZKWatcher watcher = UTIL.getZooKeeperWatcher(); 148 watcher.getRecoverableZooKeeper().delete( 149 ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, rsToSuspend.getServerName().toString()), 150 -1); 151 LOG.info("suspending " + rsToSuspend); 152 UTIL.waitFor(60000, 1000, new ExplainingPredicate<Exception>() { 153 154 @Override 155 public boolean evaluate() throws Exception { 156 for (RegionServerThread thread : UTIL.getHBaseCluster().getRegionServerThreads()) { 157 HRegionServer rs = thread.getRegionServer(); 158 if (rs != rsToSuspend) { 159 return !rs.getRegions(TABLE_NAME).isEmpty(); 160 } 161 } 162 return false; 163 } 164 165 @Override 166 public String explainFailure() throws Exception { 167 return "The region for " + TABLE_NAME + " is still on " + rsToSuspend.getServerName(); 168 } 169 }); 170 try { 171 region.compact(true); 172 fail("Should fail as our wal file has already been closed, " 173 + "and walDir has also been renamed"); 174 } catch (Exception e) { 175 LOG.debug("expected exception: ", e); 176 } 177 Table table = UTIL.getConnection().getTable(TABLE_NAME); 178 // should not hit FNFE 179 for (int i = 0; i < 20; i++) { 180 assertEquals(i, Bytes.toInt(table.get(new Get(Bytes.toBytes(i))).getValue(CF, CQ))); 181 } 182 } 183}