001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertNotEquals;
021import static org.junit.Assert.fail;
022
023import java.io.IOException;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.SnapshotDescription;
029import org.apache.hadoop.hbase.client.SnapshotType;
030import org.apache.hadoop.hbase.client.Table;
031import org.apache.hadoop.hbase.master.HMaster;
032import org.apache.hadoop.hbase.master.snapshot.TakeSnapshotHandler;
033import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
034import org.apache.hadoop.hbase.snapshot.SnapshotTestingUtils;
035import org.apache.hadoop.hbase.testclassification.MasterTests;
036import org.apache.hadoop.hbase.testclassification.MediumTests;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.hadoop.hbase.util.RegionSplitter;
039import org.junit.After;
040import org.junit.Before;
041import org.junit.ClassRule;
042import org.junit.Test;
043import org.junit.experimental.categories.Category;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
049import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
050
051/**
052 * Snapshot creation with master lock timeout test.
053 */
054@Category({ MasterTests.class, MediumTests.class })
055public class TestSnapshotProcedureWithLockTimeout {
056
057  private static final Logger LOG =
058    LoggerFactory.getLogger(TestSnapshotProcedureWithLockTimeout.class);
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestSnapshotProcedureWithLockTimeout.class);
063
064  private static HBaseTestingUtility TEST_UTIL;
065  private HMaster master;
066  private TableName TABLE_NAME;
067  private byte[] CF;
068  private String SNAPSHOT_NAME;
069
070  @Before
071  public void setup() throws Exception {
072    TEST_UTIL = new HBaseTestingUtility();
073    Configuration config = TEST_UTIL.getConfiguration();
074    config.setInt("hbase.snapshot.remote.verify.threshold", 1);
075    config.setLong(TakeSnapshotHandler.HBASE_SNAPSHOT_MASTER_LOCK_ACQUIRE_TIMEOUT, 1L);
076    TEST_UTIL.startMiniCluster(3);
077    master = TEST_UTIL.getHBaseCluster().getMaster();
078    TABLE_NAME = TableName.valueOf(Bytes.toBytes("TestSnapshotProcedureWithLockTimeout"));
079    CF = Bytes.toBytes("cf");
080    SNAPSHOT_NAME = "SnapshotProcLockTimeout";
081    final byte[][] splitKeys = new RegionSplitter.HexStringSplit().split(10);
082    Table table = TEST_UTIL.createTable(TABLE_NAME, CF, splitKeys);
083    TEST_UTIL.loadTable(table, CF, false);
084  }
085
086  @After
087  public void teardown() throws Exception {
088    if (this.master != null) {
089      ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(master.getMasterProcedureExecutor(),
090        false);
091    }
092    TEST_UTIL.shutdownMiniCluster();
093  }
094
095  @Test
096  public void testTakeZkCoordinatedSnapshot() {
097    for (int i = 0; i < 10; i++) {
098      try {
099        // Verify that snapshot creation is not possible because lock could not be
100        // acquired on time. This can be flaky behavior because even though we provide 1ms
101        // as lock timeout, it could still be fast enough and eventually lead to successful
102        // snapshot creation. If that happens, retry again.
103        testTakeZkCoordinatedSnapshot(i);
104        break;
105      } catch (Exception e) {
106        LOG.error("Error because of faster lock acquisition. retrying....", e);
107      }
108      assertNotEquals("Retries exhausted", 9, i);
109    }
110  }
111
112  private void testTakeZkCoordinatedSnapshot(int i) throws Exception {
113    SnapshotDescription snapshotOnSameTable =
114      new SnapshotDescription(SNAPSHOT_NAME + i, TABLE_NAME, SnapshotType.SKIPFLUSH);
115    SnapshotProtos.SnapshotDescription snapshotOnSameTableProto =
116      ProtobufUtil.createHBaseProtosSnapshotDesc(snapshotOnSameTable);
117    Thread second = new Thread("zk-snapshot") {
118      @Override
119      public void run() {
120        try {
121          master.getSnapshotManager().takeSnapshot(snapshotOnSameTableProto);
122        } catch (IOException e) {
123          LOG.error("zk snapshot failed", e);
124          fail("zk snapshot failed");
125        }
126      }
127    };
128    second.start();
129
130    Thread.sleep(5000);
131    boolean snapshotCreated = false;
132    try {
133      SnapshotTestingUtils.confirmSnapshotValid(TEST_UTIL, snapshotOnSameTableProto, TABLE_NAME,
134        CF);
135      snapshotCreated = true;
136    } catch (AssertionError e) {
137      LOG.error("Assertion error..", e);
138      if (
139        e.getMessage() != null && e.getMessage().contains("target snapshot directory")
140          && e.getMessage().contains("doesn't exist.")
141      ) {
142        LOG.debug("Expected behaviour - snapshot could not be created");
143      } else {
144        throw new IOException(e);
145      }
146    }
147
148    if (snapshotCreated) {
149      throw new IOException("Snapshot created successfully");
150    }
151
152    // ensure all scheduled procedures are successfully completed
153    TEST_UTIL.waitFor(4000, 400,
154      () -> master.getMasterProcedureExecutor().getProcedures().stream()
155        .filter(masterProcedureEnvProcedure -> masterProcedureEnvProcedure.getState()
156            == ProcedureProtos.ProcedureState.SUCCESS)
157        .count() == master.getMasterProcedureExecutor().getProcedures().size());
158  }
159}