001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.wal; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022 023import java.io.IOException; 024import java.util.List; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.Abortable; 029import org.apache.hadoop.hbase.HBaseClassTestRule; 030import org.apache.hadoop.hbase.HBaseTestingUtil; 031import org.apache.hadoop.hbase.HConstants; 032import org.apache.hadoop.hbase.StartTestingClusterOption; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 035import org.apache.hadoop.hbase.client.Put; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.TableDescriptor; 038import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 039import org.apache.hadoop.hbase.io.asyncfs.monitor.StreamSlowMonitor; 040import org.apache.hadoop.hbase.regionserver.HRegion; 041import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 042import org.apache.hadoop.hbase.regionserver.HRegionServer; 043import org.apache.hadoop.hbase.regionserver.RegionServerServices; 044import org.apache.hadoop.hbase.regionserver.regionreplication.RegionReplicationSink; 045import org.apache.hadoop.hbase.testclassification.LargeTests; 046import org.apache.hadoop.hbase.testclassification.RegionServerTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.CommonFSUtils; 049import org.apache.hadoop.hbase.wal.AsyncFSWALProvider; 050import org.apache.hadoop.hbase.wal.WAL; 051import org.apache.hadoop.hbase.wal.WALFactory; 052import org.apache.hadoop.hbase.wal.WALProvider; 053import org.junit.AfterClass; 054import org.junit.BeforeClass; 055import org.junit.ClassRule; 056import org.junit.Test; 057import org.junit.experimental.categories.Category; 058 059import org.apache.hbase.thirdparty.io.netty.channel.Channel; 060import org.apache.hbase.thirdparty.io.netty.channel.EventLoopGroup; 061 062@Category({ RegionServerTests.class, LargeTests.class }) 063public class TestWALSyncTimeoutException { 064 065 @ClassRule 066 public static final HBaseClassTestRule CLASS_RULE = 067 HBaseClassTestRule.forClass(TestWALSyncTimeoutException.class); 068 069 private static final byte[] FAMILY = Bytes.toBytes("family_test"); 070 071 private static final byte[] QUAL = Bytes.toBytes("qualifier_test"); 072 073 private static final HBaseTestingUtil HTU = new HBaseTestingUtil(); 074 075 private static TableName tableName = TableName.valueOf("TestWALSyncTimeoutException"); 076 private static volatile boolean testWALTimout = false; 077 private static final long timeoutMIlliseconds = 3000; 078 private static final String USER_THREAD_NAME = tableName.getNameAsString(); 079 080 @BeforeClass 081 public static void setUp() throws Exception { 082 Configuration conf = HTU.getConfiguration(); 083 conf.setClass(HConstants.REGION_IMPL, HRegionForTest.class, HRegion.class); 084 conf.setInt(RegionReplicationSink.RETRIES_NUMBER, 1); 085 conf.setLong(RegionReplicationSink.RPC_TIMEOUT_MS, 10 * 60 * 1000); 086 conf.setLong(RegionReplicationSink.OPERATION_TIMEOUT_MS, 20 * 60 * 1000); 087 conf.setLong(RegionReplicationSink.META_EDIT_RPC_TIMEOUT_MS, 10 * 60 * 1000); 088 conf.setLong(RegionReplicationSink.META_EDIT_OPERATION_TIMEOUT_MS, 20 * 60 * 1000); 089 conf.setClass(WALFactory.WAL_PROVIDER, SlowAsyncFSWALProvider.class, WALProvider.class); 090 conf.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, timeoutMIlliseconds); 091 HTU.startMiniCluster(StartTestingClusterOption.builder().numRegionServers(1).build()); 092 093 } 094 095 @AfterClass 096 public static void tearDown() throws Exception { 097 HTU.shutdownMiniCluster(); 098 } 099 100 /** 101 * This test is for HBASE-27230. When {@link WAL#sync} timeout, it would throws 102 * {@link WALSyncTimeoutIOException},and when {@link HRegion#doWALAppend} catches this exception 103 * it aborts the RegionServer. 104 */ 105 @Test 106 public void testWALSyncWriteException() throws Exception { 107 final HRegionForTest region = this.createTable(); 108 109 String oldThreadName = Thread.currentThread().getName(); 110 Thread.currentThread().setName(USER_THREAD_NAME); 111 try { 112 byte[] rowKey1 = Bytes.toBytes(1); 113 byte[] value1 = Bytes.toBytes(3); 114 Thread.sleep(2000); 115 testWALTimout = true; 116 117 /** 118 * The {@link WAL#sync} would timeout and throws {@link WALSyncTimeoutIOException},when 119 * {@link HRegion#doWALAppend} catches this exception it aborts the RegionServer. 120 */ 121 try { 122 region.put(new Put(rowKey1).addColumn(FAMILY, QUAL, value1)); 123 fail(); 124 } catch (WALSyncTimeoutIOException e) { 125 assertTrue(e != null); 126 } 127 assertTrue(region.getRSServices().isAborted()); 128 } finally { 129 Thread.currentThread().setName(oldThreadName); 130 testWALTimout = false; 131 } 132 } 133 134 private HRegionForTest createTable() throws Exception { 135 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 136 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).build(); 137 HTU.getAdmin().createTable(tableDescriptor); 138 HRegionServer rs = HTU.getMiniHBaseCluster().getRegionServer(0); 139 return (HRegionForTest) rs.getRegions(tableName).get(0); 140 } 141 142 public static final class HRegionForTest extends HRegion { 143 144 public HRegionForTest(HRegionFileSystem fs, WAL wal, Configuration confParam, 145 TableDescriptor htd, RegionServerServices rsServices) { 146 super(fs, wal, confParam, htd, rsServices); 147 } 148 149 @SuppressWarnings("deprecation") 150 public HRegionForTest(Path tableDir, WAL wal, FileSystem fs, Configuration confParam, 151 RegionInfo regionInfo, TableDescriptor htd, RegionServerServices rsServices) { 152 super(tableDir, wal, fs, confParam, regionInfo, htd, rsServices); 153 } 154 155 public RegionServerServices getRSServices() { 156 return this.rsServices; 157 } 158 159 } 160 161 public static class SlowAsyncFSWAL extends AsyncFSWAL { 162 163 public SlowAsyncFSWAL(FileSystem fs, Abortable abortable, Path rootDir, String logDir, 164 String archiveDir, Configuration conf, List<WALActionsListener> listeners, 165 boolean failIfWALExists, String prefix, String suffix, EventLoopGroup eventLoopGroup, 166 Class<? extends Channel> channelClass, StreamSlowMonitor monitor) 167 throws FailedLogCloseException, IOException { 168 super(fs, abortable, rootDir, logDir, archiveDir, conf, listeners, failIfWALExists, prefix, 169 suffix, null, null, eventLoopGroup, channelClass, monitor); 170 } 171 172 @Override 173 protected void atHeadOfRingBufferEventHandlerAppend() { 174 if (testWALTimout) { 175 try { 176 Thread.sleep(timeoutMIlliseconds + 1000); 177 } catch (InterruptedException e) { 178 throw new RuntimeException(e); 179 } 180 } 181 super.atHeadOfRingBufferEventHandlerAppend(); 182 } 183 184 } 185 186 public static class SlowAsyncFSWALProvider extends AsyncFSWALProvider { 187 188 @Override 189 protected AsyncFSWAL createWAL() throws IOException { 190 return new SlowAsyncFSWAL(CommonFSUtils.getWALFileSystem(conf), this.abortable, 191 CommonFSUtils.getWALRootDir(conf), getWALDirectoryName(factory.getFactoryId()), 192 getWALArchiveDirectoryName(conf, factory.getFactoryId()), conf, listeners, true, logPrefix, 193 META_WAL_PROVIDER_ID.equals(providerId) ? META_WAL_PROVIDER_ID : null, eventLoopGroup, 194 channelClass, factory.getExcludeDatanodeManager().getStreamSlowMonitor(providerId)); 195 } 196 197 } 198}