001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022import static org.mockito.ArgumentMatchers.any; 023import static org.mockito.ArgumentMatchers.anyString; 024import static org.mockito.Mockito.atLeast; 025import static org.mockito.Mockito.mock; 026import static org.mockito.Mockito.verify; 027import static org.mockito.Mockito.when; 028 029import java.io.IOException; 030import java.util.List; 031import java.util.Map; 032import java.util.concurrent.atomic.AtomicLong; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.FileSystem; 035import org.apache.hadoop.fs.Path; 036import org.apache.hadoop.hbase.Abortable; 037import org.apache.hadoop.hbase.DroppedSnapshotException; 038import org.apache.hadoop.hbase.HBaseClassTestRule; 039import org.apache.hadoop.hbase.HBaseTestingUtility; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.Durability; 043import org.apache.hadoop.hbase.client.Put; 044import org.apache.hadoop.hbase.regionserver.wal.FSHLog; 045import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 046import org.apache.hadoop.hbase.testclassification.SmallTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.apache.hadoop.hbase.util.EnvironmentEdgeManagerTestHelper; 049import org.apache.hadoop.hbase.util.Pair; 050import org.apache.hadoop.hbase.util.Threads; 051import org.apache.hadoop.hbase.wal.WAL; 052import org.apache.hadoop.hbase.wal.WALProvider.Writer; 053import org.junit.After; 054import org.junit.Before; 055import org.junit.ClassRule; 056import org.junit.Rule; 057import org.junit.Test; 058import org.junit.experimental.categories.Category; 059import org.junit.rules.TestName; 060import org.mockito.exceptions.verification.WantedButNotInvoked; 061import org.slf4j.Logger; 062import org.slf4j.LoggerFactory; 063 064/** 065 * Testing sync/append failures. Copied from TestHRegion. 066 */ 067@Category({ SmallTests.class }) 068public class TestFailedAppendAndSync { 069 070 @ClassRule 071 public static final HBaseClassTestRule CLASS_RULE = 072 HBaseClassTestRule.forClass(TestFailedAppendAndSync.class); 073 074 private static final Logger LOG = LoggerFactory.getLogger(TestFailedAppendAndSync.class); 075 @Rule 076 public TestName name = new TestName(); 077 078 private static final String COLUMN_FAMILY = "MyCF"; 079 private static final byte[] COLUMN_FAMILY_BYTES = Bytes.toBytes(COLUMN_FAMILY); 080 081 HRegion region = null; 082 // Do not run unit tests in parallel (? Why not? It don't work? Why not? St.Ack) 083 private static HBaseTestingUtility TEST_UTIL; 084 public static Configuration CONF; 085 private String dir; 086 087 // Test names 088 protected TableName tableName; 089 090 @Before 091 public void setup() throws IOException { 092 TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 093 CONF = TEST_UTIL.getConfiguration(); 094 // Disable block cache. 095 CONF.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f); 096 dir = TEST_UTIL.getDataTestDir("TestHRegion").toString(); 097 tableName = TableName.valueOf(name.getMethodName()); 098 } 099 100 @After 101 public void tearDown() throws Exception { 102 EnvironmentEdgeManagerTestHelper.reset(); 103 LOG.info("Cleaning test directory: " + TEST_UTIL.getDataTestDir()); 104 TEST_UTIL.cleanupTestDir(); 105 } 106 107 String getName() { 108 return name.getMethodName(); 109 } 110 111 /** 112 * Reproduce locking up that happens when we get an exceptions appending and syncing. See 113 * HBASE-14317. First I need to set up some mocks for Server and RegionServerServices. I also need 114 * to set up a dodgy WAL that will throw an exception when we go to append to it. 115 */ 116 @Test 117 public void testLockupAroundBadAssignSync() throws IOException { 118 final AtomicLong rolls = new AtomicLong(0); 119 // Dodgy WAL. Will throw exceptions when flags set. 120 class DodgyFSLog extends FSHLog { 121 volatile boolean throwSyncException = false; 122 volatile boolean throwAppendException = false; 123 volatile boolean throwArchiveException = false; 124 125 public DodgyFSLog(FileSystem fs, Abortable abortable, Path root, String logDir, 126 Configuration conf) throws IOException { 127 super(fs, abortable, root, logDir, conf); 128 } 129 130 @Override 131 public Map<byte[], List<byte[]>> rollWriter(boolean force) 132 throws FailedLogCloseException, IOException { 133 Map<byte[], List<byte[]>> regions = super.rollWriter(force); 134 rolls.getAndIncrement(); 135 return regions; 136 } 137 138 @Override 139 protected void archiveLogFile(Path p) throws IOException { 140 if (throwArchiveException) { 141 throw new IOException("throw archival exception"); 142 } 143 } 144 145 @Override 146 protected void archive(Pair<Path, Long> localLogsToArchive) { 147 super.archive(localLogsToArchive); 148 } 149 150 @Override 151 protected Writer createWriterInstance(Path path) throws IOException { 152 final Writer w = super.createWriterInstance(path); 153 return new Writer() { 154 @Override 155 public void close() throws IOException { 156 w.close(); 157 } 158 159 @Override 160 public void sync(boolean forceSync) throws IOException { 161 if (throwSyncException) { 162 throw new IOException("FAKE! Failed to replace a bad datanode..."); 163 } 164 w.sync(forceSync); 165 } 166 167 @Override 168 public void append(Entry entry) throws IOException { 169 if (throwAppendException) { 170 throw new IOException("FAKE! Failed to replace a bad datanode..."); 171 } 172 w.append(entry); 173 } 174 175 @Override 176 public long getLength() { 177 return w.getLength(); 178 } 179 180 @Override 181 public long getSyncedLength() { 182 return w.getSyncedLength(); 183 } 184 }; 185 } 186 } 187 188 // Make up mocked server and services. 189 RegionServerServices services = mock(RegionServerServices.class); 190 when(services.getConfiguration()).thenReturn(CONF); 191 when(services.isStopped()).thenReturn(false); 192 when(services.isAborted()).thenReturn(false); 193 // OK. Now I have my mocked up Server and RegionServerServices and my dodgy WAL, go ahead with 194 // the test. 195 FileSystem fs = FileSystem.get(CONF); 196 Path rootDir = new Path(dir + getName()); 197 DodgyFSLog dodgyWAL = new DodgyFSLog(fs, services, rootDir, getName(), CONF); 198 dodgyWAL.init(); 199 LogRoller logRoller = new LogRoller(services); 200 logRoller.addWAL(dodgyWAL); 201 logRoller.start(); 202 203 boolean threwOnSync = false; 204 boolean threwOnAppend = false; 205 boolean threwOnBoth = false; 206 207 HRegion region = initHRegion(tableName, null, null, CONF, dodgyWAL); 208 try { 209 // Get some random bytes. 210 byte[] value = Bytes.toBytes(getName()); 211 try { 212 // First get something into memstore 213 Put put = new Put(value); 214 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("1"), value); 215 region.put(put); 216 } catch (IOException ioe) { 217 fail(); 218 } 219 long rollsCount = rolls.get(); 220 try { 221 dodgyWAL.throwAppendException = true; 222 dodgyWAL.throwSyncException = false; 223 Put put = new Put(value); 224 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("3"), value); 225 region.put(put); 226 } catch (IOException ioe) { 227 threwOnAppend = true; 228 } 229 while (rollsCount == rolls.get()) 230 Threads.sleep(100); 231 rollsCount = rolls.get(); 232 233 // When we get to here.. we should be ok. A new WAL has been put in place. There were no 234 // appends to sync. We should be able to continue. 235 236 try { 237 dodgyWAL.throwAppendException = true; 238 dodgyWAL.throwSyncException = true; 239 Put put = new Put(value); 240 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("4"), value); 241 region.put(put); 242 } catch (IOException ioe) { 243 threwOnBoth = true; 244 } 245 while (rollsCount == rolls.get()) 246 Threads.sleep(100); 247 248 // Again, all should be good. New WAL and no outstanding unsync'd edits so we should be able 249 // to just continue. 250 251 // So, should be no abort at this stage. Verify. 252 verify(services, atLeast(0)).abort(anyString(), any(Throwable.class)); 253 try { 254 dodgyWAL.throwAppendException = false; 255 dodgyWAL.throwSyncException = true; 256 Put put = new Put(value); 257 put.addColumn(COLUMN_FAMILY_BYTES, Bytes.toBytes("2"), value); 258 region.put(put); 259 } catch (IOException ioe) { 260 threwOnSync = true; 261 } 262 // An append in the WAL but the sync failed is a server abort condition. That is our 263 // current semantic. Verify. It takes a while for abort to be called. Just hang here till it 264 // happens. If it don't we'll timeout the whole test. That is fine. 265 while (true) { 266 try { 267 verify(services, atLeast(1)).abort(anyString(), any(Throwable.class)); 268 break; 269 } catch (WantedButNotInvoked t) { 270 Threads.sleep(1); 271 } 272 } 273 274 try { 275 dodgyWAL.throwAppendException = false; 276 dodgyWAL.throwSyncException = false; 277 dodgyWAL.throwArchiveException = true; 278 Pair<Path, Long> pair = new Pair<Path, Long>(); 279 pair.setFirst(new Path("/a/b/")); 280 pair.setSecond(100L); 281 dodgyWAL.archive(pair); 282 } catch (Throwable ioe) { 283 } 284 while (true) { 285 try { 286 // one more abort needs to be called 287 verify(services, atLeast(2)).abort(anyString(), any()); 288 break; 289 } catch (WantedButNotInvoked t) { 290 Threads.sleep(1); 291 } 292 } 293 } finally { 294 // To stop logRoller, its server has to say it is stopped. 295 when(services.isStopped()).thenReturn(true); 296 if (logRoller != null) logRoller.close(); 297 if (region != null) { 298 try { 299 region.close(true); 300 } catch (DroppedSnapshotException e) { 301 LOG.info("On way out; expected!", e); 302 } 303 } 304 if (dodgyWAL != null) dodgyWAL.close(); 305 assertTrue("The regionserver should have thrown an exception", threwOnBoth); 306 assertTrue("The regionserver should have thrown an exception", threwOnAppend); 307 assertTrue("The regionserver should have thrown an exception", threwOnSync); 308 } 309 } 310 311 /** 312 * @return A region on which you must call {@link HBaseTestingUtility#closeRegionAndWAL(HRegion)} 313 * when done. 314 */ 315 public static HRegion initHRegion(TableName tableName, byte[] startKey, byte[] stopKey, 316 Configuration conf, WAL wal) throws IOException { 317 ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0, 0, null, 318 MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT); 319 return TEST_UTIL.createLocalHRegion(tableName, startKey, stopKey, conf, false, 320 Durability.SYNC_WAL, wal, COLUMN_FAMILY_BYTES); 321 } 322}