001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022
023import java.io.IOException;
024import java.util.List;
025import java.util.Set;
026import java.util.stream.Collectors;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HBaseClassTestRule;
029import org.apache.hadoop.hbase.HBaseTestingUtility;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.master.RegionState.State;
033import org.apache.hadoop.hbase.master.ServerManager;
034import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
035import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
036import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
037import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
038import org.apache.hadoop.hbase.testclassification.MasterTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.Bytes;
041import org.junit.AfterClass;
042import org.junit.BeforeClass;
043import org.junit.ClassRule;
044import org.junit.Test;
045import org.junit.experimental.categories.Category;
046
047import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
048
049/**
050 * Confirm that we will batch region reopens when reopening all table regions. This can avoid the
051 * pain associated with reopening too many regions at once.
052 */
053@Category({ MasterTests.class, MediumTests.class })
054public class TestReopenTableRegionsProcedureBatching {
055
056  @ClassRule
057  public static final HBaseClassTestRule CLASS_RULE =
058    HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureBatching.class);
059
060  private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
061  private static final int BACKOFF_MILLIS_PER_RS = 0;
062  private static final int REOPEN_BATCH_SIZE_MAX = 1;
063
064  private static TableName TABLE_NAME = TableName.valueOf("Batching");
065
066  private static byte[] CF = Bytes.toBytes("cf");
067
068  @BeforeClass
069  public static void setUp() throws Exception {
070    Configuration conf = UTIL.getConfiguration();
071    conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
072    UTIL.startMiniCluster(1);
073    UTIL.createMultiRegionTable(TABLE_NAME, CF);
074  }
075
076  @AfterClass
077  public static void tearDown() throws Exception {
078    UTIL.shutdownMiniCluster();
079  }
080
081  @Test
082  public void testSmallMaxBatchSize() throws IOException {
083    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
084    ProcedureExecutor<MasterProcedureEnv> procExec =
085      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
086    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
087    assertTrue(2 <= regions.size());
088    Set<StuckRegion> stuckRegions =
089      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
090    ReopenTableRegionsProcedure proc =
091      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, REOPEN_BATCH_SIZE_MAX);
092    procExec.submitProcedure(proc);
093    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
094
095    // the first batch should be small
096    confirmBatchSize(1, stuckRegions, proc);
097    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
098
099    // other batches should also be small
100    assertTrue(proc.getBatchesProcessed() >= regions.size());
101
102    // all regions should only be opened once
103    assertEquals(proc.getRegionsReopened(), regions.size());
104  }
105
106  @Test
107  public void testDefaultMaxBatchSize() throws IOException {
108    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
109    ProcedureExecutor<MasterProcedureEnv> procExec =
110      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
111    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
112    assertTrue(2 <= regions.size());
113    Set<StuckRegion> stuckRegions =
114      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
115    ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
116    procExec.submitProcedure(proc);
117    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
118
119    // the first batch should be large
120    confirmBatchSize(regions.size(), stuckRegions, proc);
121    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
122
123    // all regions should only be opened once
124    assertEquals(proc.getRegionsReopened(), regions.size());
125  }
126
127  @Test
128  public void testNegativeBatchSizeDoesNotBreak() throws IOException {
129    AssignmentManager am = UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
130    ProcedureExecutor<MasterProcedureEnv> procExec =
131      UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor();
132    List<RegionInfo> regions = UTIL.getAdmin().getRegions(TABLE_NAME);
133    assertTrue(2 <= regions.size());
134    Set<StuckRegion> stuckRegions =
135      regions.stream().map(r -> stickRegion(am, procExec, r)).collect(Collectors.toSet());
136    ReopenTableRegionsProcedure proc =
137      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, -100);
138    procExec.submitProcedure(proc);
139    UTIL.waitFor(10000, () -> proc.getState() == ProcedureState.WAITING_TIMEOUT);
140
141    // the first batch should be small
142    confirmBatchSize(1, stuckRegions, proc);
143    ProcedureSyncWait.waitForProcedureToComplete(procExec, proc, 60_000);
144
145    // other batches should also be small
146    assertTrue(proc.getBatchesProcessed() >= regions.size());
147
148    // all regions should only be opened once
149    assertEquals(proc.getRegionsReopened(), regions.size());
150  }
151
152  @Test
153  public void testBatchSizeDoesNotOverflow() {
154    ReopenTableRegionsProcedure proc =
155      new ReopenTableRegionsProcedure(TABLE_NAME, BACKOFF_MILLIS_PER_RS, Integer.MAX_VALUE);
156    int currentBatchSize = 1;
157    while (currentBatchSize < Integer.MAX_VALUE) {
158      currentBatchSize = proc.progressBatchSize();
159      assertTrue(currentBatchSize > 0);
160    }
161  }
162
163  private void confirmBatchSize(int expectedBatchSize, Set<StuckRegion> stuckRegions,
164    ReopenTableRegionsProcedure proc) {
165    while (true) {
166      if (proc.getBatchesProcessed() == 0) {
167        continue;
168      }
169      stuckRegions.forEach(this::unstickRegion);
170      UTIL.waitFor(5000, () -> expectedBatchSize == proc.getRegionsReopened());
171      break;
172    }
173  }
174
175  static class StuckRegion {
176    final TransitRegionStateProcedure trsp;
177    final RegionStateNode regionNode;
178    final long openSeqNum;
179
180    public StuckRegion(TransitRegionStateProcedure trsp, RegionStateNode regionNode,
181      long openSeqNum) {
182      this.trsp = trsp;
183      this.regionNode = regionNode;
184      this.openSeqNum = openSeqNum;
185    }
186  }
187
188  private StuckRegion stickRegion(AssignmentManager am,
189    ProcedureExecutor<MasterProcedureEnv> procExec, RegionInfo regionInfo) {
190    RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
191    TransitRegionStateProcedure trsp =
192      TransitRegionStateProcedure.unassign(procExec.getEnvironment(), regionInfo);
193    regionNode.lock();
194    long openSeqNum;
195    try {
196      openSeqNum = regionNode.getOpenSeqNum();
197      regionNode.setState(State.OPENING);
198      regionNode.setOpenSeqNum(-1L);
199      regionNode.setProcedure(trsp);
200    } finally {
201      regionNode.unlock();
202    }
203    return new StuckRegion(trsp, regionNode, openSeqNum);
204  }
205
206  private void unstickRegion(StuckRegion stuckRegion) {
207    stuckRegion.regionNode.lock();
208    try {
209      stuckRegion.regionNode.setState(State.OPEN);
210      stuckRegion.regionNode.setOpenSeqNum(stuckRegion.openSeqNum);
211      stuckRegion.regionNode.unsetProcedure(stuckRegion.trsp);
212    } finally {
213      stuckRegion.regionNode.unlock();
214    }
215  }
216}