001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.junit.Assert.assertNotEquals; 021import static org.junit.Assert.fail; 022 023import java.io.IOException; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.SnapshotDescription; 029import org.apache.hadoop.hbase.client.SnapshotType; 030import org.apache.hadoop.hbase.client.Table; 031import org.apache.hadoop.hbase.master.HMaster; 032import org.apache.hadoop.hbase.master.snapshot.TakeSnapshotHandler; 033import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; 034import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils; 035import org.apache.hadoop.hbase.testclassification.MasterTests; 036import org.apache.hadoop.hbase.testclassification.MediumTests; 037import org.apache.hadoop.hbase.util.Bytes; 038import org.apache.hadoop.hbase.util.RegionSplitter; 039import org.junit.After; 040import org.junit.Before; 041import org.junit.ClassRule; 042import org.junit.Test; 043import org.junit.experimental.categories.Category; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 048import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 049import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 050 051/** 052 * Snapshot creation with master lock timeout test. 053 */ 054@Category({ MasterTests.class, MediumTests.class }) 055public class TestSnapshotProcedureWithLockTimeout { 056 057 private static final Logger LOG = 058 LoggerFactory.getLogger(TestSnapshotProcedureWithLockTimeout.class); 059 060 @ClassRule 061 public static final HBaseClassTestRule CLASS_RULE = 062 HBaseClassTestRule.forClass(TestSnapshotProcedureWithLockTimeout.class); 063 064 private static HBaseTestingUtility TEST_UTIL; 065 private HMaster master; 066 private TableName TABLE_NAME; 067 private byte[] CF; 068 private String SNAPSHOT_NAME; 069 070 @Before 071 public void setup() throws Exception { 072 TEST_UTIL = new HBaseTestingUtility(); 073 Configuration config = TEST_UTIL.getConfiguration(); 074 config.setInt("hbase.snapshot.remote.verify.threshold", 1); 075 config.setLong(TakeSnapshotHandler.HBASE_SNAPSHOT_MASTER_LOCK_ACQUIRE_TIMEOUT, 1L); 076 TEST_UTIL.startMiniCluster(3); 077 master = TEST_UTIL.getHBaseCluster().getMaster(); 078 TABLE_NAME = TableName.valueOf(Bytes.toBytes("TestSnapshotProcedureWithLockTimeout")); 079 CF = Bytes.toBytes("cf"); 080 SNAPSHOT_NAME = "SnapshotProcLockTimeout"; 081 final byte[][] splitKeys = new RegionSplitter.HexStringSplit().split(10); 082 Table table = TEST_UTIL.createTable(TABLE_NAME, CF, splitKeys); 083 TEST_UTIL.loadTable(table, CF, false); 084 } 085 086 @After 087 public void teardown() throws Exception { 088 if (this.master != null) { 089 ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(master.getMasterProcedureExecutor(), 090 false); 091 } 092 TEST_UTIL.shutdownMiniCluster(); 093 } 094 095 @Test 096 public void testTakeZkCoordinatedSnapshot() { 097 for (int i = 0; i < 10; i++) { 098 try { 099 // Verify that snapshot creation is not possible because lock could not be 100 // acquired on time. This can be flaky behavior because even though we provide 1ms 101 // as lock timeout, it could still be fast enough and eventually lead to successful 102 // snapshot creation. If that happens, retry again. 103 testTakeZkCoordinatedSnapshot(i); 104 break; 105 } catch (Exception e) { 106 LOG.error("Error because of faster lock acquisition. retrying....", e); 107 } 108 assertNotEquals("Retries exhausted", 9, i); 109 } 110 } 111 112 private void testTakeZkCoordinatedSnapshot(int i) throws Exception { 113 SnapshotDescription snapshotOnSameTable = 114 new SnapshotDescription(SNAPSHOT_NAME + i, TABLE_NAME, SnapshotType.SKIPFLUSH); 115 SnapshotProtos.SnapshotDescription snapshotOnSameTableProto = 116 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotOnSameTable); 117 Thread second = new Thread("zk-snapshot") { 118 @Override 119 public void run() { 120 try { 121 master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto); 122 } catch (IOException e) { 123 LOG.error("zk snapshot failed", e); 124 fail("zk snapshot failed"); 125 } 126 } 127 }; 128 second.start(); 129 130 Thread.sleep(5000); 131 boolean snapshotCreated = false; 132 try { 133 SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME, 134 CF); 135 snapshotCreated = true; 136 } catch (AssertionError e) { 137 LOG.error("Assertion error..", e); 138 if ( 139 e.getMessage() != null && e.getMessage().contains("target snapshot directory") 140 && e.getMessage().contains("doesn't exist.") 141 ) { 142 LOG.debug("Expected behaviour - snapshot could not be created"); 143 } else { 144 throw new IOException(e); 145 } 146 } 147 148 if (snapshotCreated) { 149 throw new IOException("Snapshot created successfully"); 150 } 151 152 // ensure all scheduled procedures are successfully completed 153 TEST_UTIL.waitFor(4000, 400, 154 () -> master.getMasterProcedureExecutor().getProcedures().stream() 155 .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState() 156 == ProcedureProtos.ProcedureState.SUCCESS) 157 .count() == master.getMasterProcedureExecutor().getProcedures().size()); 158 } 159}