001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.ArgumentMatchers.any; 023import static org.mockito.ArgumentMatchers.anyString; 024import static org.mockito.Mockito.atLeast; 025import static org.mockito.Mockito.mock; 026import static org.mockito.Mockito.verify; 027import static org.mockito.Mockito.when; 028 029import java.io.IOException; 030import java.util.List; 031import java.util.Map; 032import java.util.concurrent.atomic.AtomicLong; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.FileSystem; 035import org.apache.hadoop.fs.Path; 036import org.apache.hadoop.hbase.DroppedSnapshotException; 037import org.apache.hadoop.hbase.HBaseClassTestRule; 038import org.apache.hadoop.hbase.HBaseTestingUtil; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.Server; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.Durability; 043import org.apache.hadoop.hbase.client.Put; 044import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL; 045import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 046import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 047import org.apache.hadoop.hbase.testclassification.SmallTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 050import org.apache.hadoop.hbase.util.Pair; 051import org.apache.hadoop.hbase.util.Threads; 052import org.apache.hadoop.hbase.wal.WAL; 053import org.apache.hadoop.hbase.wal.WALProvider.Writer; 054import org.junit.After; 055import org.junit.Before; 056import org.junit.ClassRule; 057import org.junit.Rule; 058import org.junit.Test; 059import org.junit.experimental.categories.Category; 060import org.junit.rules.TestName; 061import org.mockito.exceptions.verification.WantedButNotInvoked; 062import org.slf4j.Logger; 063import org.slf4j.LoggerFactory; 064 065/** 066 * Testing sync/append failures. Copied from TestHRegion. 067 */ 068@Category({ SmallTests.class }) 069public class TestFailedAppendAndSync { 070 071 @ClassRule 072 public static final HBaseClassTestRule CLASS_RULE = 073 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 074 075 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 076 @Rule 077 public TestName name = new TestName(); 078 079 private static final String COLUMN_FAMILY = "MyCF"; 080 private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 081 082 HRegion region = null; 083 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 084 private static HBaseTestingUtil TEST_UTIL; 085 public static Configuration CONF; 086 private String dir; 087 088 // Test names 089 protected TableName tableName; 090 091 @Before 092 public void setup() throws IOException { 093 TEST_UTIL = new HBaseTestingUtil(); 094 CONF = TEST_UTIL.getConfiguration(); 095 // Disable block cache. 096 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 097 CONF.setLong(AbstractFSWAL.WAL_SYNC_TIMEOUT_MS, 10000); 098 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 099 tableName = TableName.valueOf(name.getMethodName()); 100 } 101 102 @After 103 public void tearDown() throws Exception { 104 EnvironmentEdgeManagerTestHelper.reset(); 105 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 106 TEST_UTIL.cleanupTestDir(); 107 } 108 109 String getName() { 110 return name.getMethodName(); 111 } 112 113 // Dodgy WAL. Will throw exceptions when flags set. 114 class DodgyFSLog extends FSHLog { 115 volatile boolean throwSyncException = false; 116 volatile boolean throwAppendException = false; 117 volatile boolean throwArchiveException = false; 118 119 final AtomicLong rolls = new AtomicLong(0); 120 121 public DodgyFSLog(FileSystem fs, Server server, Path root, String logDir, Configuration conf) 122 throws IOException { 123 super(fs, server, root, logDir, conf); 124 } 125 126 @Override 127 public Map<byte[], List<byte[]>> rollWriter(boolean force) 128 throws FailedLogCloseException, IOException { 129 Map<byte[], List<byte[]>> regions = super.rollWriter(force); 130 rolls.getAndIncrement(); 131 return regions; 132 } 133 134 @Override 135 protected void archiveLogFile(Path p) throws IOException { 136 if (throwArchiveException) { 137 throw new IOException("throw archival exception"); 138 } 139 } 140 141 @Override 142 protected void archive(Pair<Path, Long> localLogsToArchive) { 143 super.archive(localLogsToArchive); 144 } 145 146 @Override 147 protected Writer createWriterInstance(FileSystem fs, Path path) throws IOException { 148 final Writer w = super.createWriterInstance(fs, path); 149 return new Writer() { 150 @Override 151 public void close() throws IOException { 152 w.close(); 153 } 154 155 @Override 156 public void sync(boolean forceSync) throws IOException { 157 if (throwSyncException) { 158 throw new IOException("FAKE! Failed to replace a bad datanode..."); 159 } 160 w.sync(forceSync); 161 } 162 163 @Override 164 public void append(Entry entry) throws IOException { 165 if (throwAppendException) { 166 throw new IOException("FAKE! Failed to replace a bad datanode..."); 167 } 168 w.append(entry); 169 } 170 171 @Override 172 public long getLength() { 173 return w.getLength(); 174 } 175 176 @Override 177 public long getSyncedLength() { 178 return w.getSyncedLength(); 179 } 180 }; 181 } 182 } 183 184 /** 185 * Reproduce locking up that happens when we get an exceptions appending and syncing. See 186 * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need 187 * to set up a dodgy WAL that will throw an exception when we go to append to it. 188 */ 189 @Test 190 public void testLockupAroundBadAssignSync() throws IOException { 191 // Make up mocked server and services. 192 RegionServerServices services = mock(RegionServerServices.class); 193 when(services.getConfiguration()).thenReturn(CONF); 194 when(services.isStopped()).thenReturn(false); 195 when(services.isAborted()).thenReturn(false); 196 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 197 // the test. 198 FileSystem fs = FileSystem.get(CONF); 199 Path rootDir = new Path(dir + getName()); 200 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, (Server) services, rootDir, getName(), CONF); 201 dodgyWAL.init(); 202 LogRoller logRoller = new LogRoller(services); 203 logRoller.addWAL(dodgyWAL); 204 logRoller.start(); 205 206 boolean threwOnSync = false; 207 boolean threwOnAppend = false; 208 boolean threwOnBoth = false; 209 210 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL); 211 try { 212 // Get some random bytes. 213 byte[] value = Bytes.toBytes(getName()); 214 try { 215 // First get something into memstore 216 Put put = new Put(value); 217 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 218 region.put(put); 219 } catch (IOException ioe) { 220 fail(); 221 } 222 long rollsCount = dodgyWAL.rolls.get(); 223 try { 224 dodgyWAL.throwAppendException = true; 225 dodgyWAL.throwSyncException = false; 226 Put put = new Put(value); 227 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 228 region.put(put); 229 } catch (IOException ioe) { 230 threwOnAppend = true; 231 } 232 while (rollsCount == dodgyWAL.rolls.get()) { 233 Threads.sleep(100); 234 } 235 rollsCount = dodgyWAL.rolls.get(); 236 237 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 238 // appends to sync. We should be able to continue. 239 240 try { 241 dodgyWAL.throwAppendException = true; 242 dodgyWAL.throwSyncException = true; 243 Put put = new Put(value); 244 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 245 region.put(put); 246 } catch (IOException ioe) { 247 threwOnBoth = true; 248 } 249 while (rollsCount == dodgyWAL.rolls.get()) { 250 Threads.sleep(100); 251 } 252 253 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 254 // to just continue. 255 256 // So, should be no abort at this stage. Verify. 257 verify(services, atLeast(0)).abort(anyString(), any(Throwable.class)); 258 try { 259 dodgyWAL.throwAppendException = false; 260 dodgyWAL.throwSyncException = true; 261 Put put = new Put(value); 262 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 263 region.rsServices = services; 264 region.put(put); 265 } catch (IOException ioe) { 266 threwOnSync = true; 267 } 268 269 region.rsServices = null; 270 // An append in the WAL but the sync failed is a server abort condition. That is our 271 // current semantic. Verify. 272 verify(services, atLeast(1)).abort(anyString(), any()); 273 try { 274 dodgyWAL.throwAppendException = false; 275 dodgyWAL.throwSyncException = false; 276 dodgyWAL.throwArchiveException = true; 277 Pair<Path, Long> pair = new Pair<Path, Long>(); 278 pair.setFirst(new Path("/a/b/")); 279 pair.setSecond(100L); 280 dodgyWAL.archive(pair); 281 } catch (Throwable ioe) { 282 } 283 while (true) { 284 try { 285 // one more abort needs to be called 286 verify(services, atLeast(2)).abort(anyString(), any()); 287 break; 288 } catch (WantedButNotInvoked t) { 289 Threads.sleep(1); 290 } 291 } 292 } finally { 293 // To stop logRoller, its server has to say it is stopped. 294 when(services.isStopped()).thenReturn(true); 295 if (logRoller != null) logRoller.close(); 296 if (region != null) { 297 try { 298 region.close(true); 299 } catch (DroppedSnapshotException e) { 300 LOG.info("On way out; expected!", e); 301 } 302 } 303 if (dodgyWAL != null) dodgyWAL.close(); 304 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 305 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 306 assertTrue("The regionserver should have thrown an exception", threwOnSync); 307 } 308 } 309 310 /** 311 * @return A region on which you must call {@link HBaseTestingUtil#closeRegionAndWAL(HRegion)} 312 * when done. 313 */ 314 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, 315 Configuration conf, WAL wal) throws IOException { 316 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null, 317 MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT); 318 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false, 319 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); 320 } 321}