001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import com.google.errorprone.annotations.RestrictedApi;
021import java.io.IOException;
022import java.util.Optional;
023import org.apache.hadoop.hbase.ServerName;
024import org.apache.hadoop.hbase.TableName;
025import org.apache.hadoop.hbase.client.RegionInfo;
026import org.apache.hadoop.hbase.master.RegionState;
027import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
028import org.apache.hadoop.hbase.master.assignment.RegionStates;
029import org.apache.hadoop.hbase.master.assignment.ServerState;
030import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
031import org.apache.hadoop.hbase.procedure2.Procedure;
032import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
033import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
034import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
035import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
036import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
037import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
038import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure;
039import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
040import org.apache.hadoop.hbase.regionserver.SnapshotRegionCallable;
041import org.apache.hadoop.hbase.util.RetryCounter;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotRegionProcedureStateData;
049import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState;
050import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
051
052/**
053 * A remote procedure which is used to send region snapshot request to region server. The basic
054 * logic of SnapshotRegionProcedure is similar like {@link ServerRemoteProcedure}, only with a
055 * little difference, when {@link FailedRemoteDispatchException} was thrown, SnapshotRegionProcedure
056 * will sleep some time and continue retrying until success.
057 */
058@InterfaceAudience.Private
059public class SnapshotRegionProcedure extends Procedure<MasterProcedureEnv>
060  implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> {
061  private static final Logger LOG = LoggerFactory.getLogger(SnapshotRegionProcedure.class);
062
063  private SnapshotDescription snapshot;
064  private ProcedureEvent<?> event;
065  private RegionInfo region;
066  private boolean dispatched;
067  private boolean succ;
068  private RetryCounter retryCounter;
069
070  public SnapshotRegionProcedure() {
071  }
072
073  public SnapshotRegionProcedure(SnapshotDescription snapshot, RegionInfo region) {
074    this.snapshot = snapshot;
075    this.region = region;
076  }
077
078  @Override
079  protected LockState acquireLock(final MasterProcedureEnv env) {
080    if (env.getProcedureScheduler().waitRegions(this, getTableName(), region)) {
081      return LockState.LOCK_EVENT_WAIT;
082    }
083    return LockState.LOCK_ACQUIRED;
084  }
085
086  @Override
087  protected void releaseLock(final MasterProcedureEnv env) {
088    env.getProcedureScheduler().wakeRegions(this, getTableName(), region);
089  }
090
091  @Override
092  protected boolean holdLock(MasterProcedureEnv env) {
093    return false;
094  }
095
096  @Override
097  public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) {
098    return Optional.of(new RSProcedureDispatcher.ServerOperation(this, getProcId(),
099      SnapshotRegionCallable.class, MasterProcedureProtos.SnapshotRegionParameter.newBuilder()
100        .setRegion(ProtobufUtil.toRegionInfo(region)).setSnapshot(snapshot).build().toByteArray()));
101  }
102
103  @Override
104  public void remoteCallFailed(MasterProcedureEnv env, ServerName serverName, IOException e) {
105    complete(env, e);
106  }
107
108  @Override
109  public void remoteOperationCompleted(MasterProcedureEnv env) {
110    complete(env, null);
111  }
112
113  @Override
114  public void remoteOperationFailed(MasterProcedureEnv env, RemoteProcedureException e) {
115    complete(env, e);
116  }
117
118  // keep retrying until success
119  private void complete(MasterProcedureEnv env, Throwable error) {
120    if (isFinished()) {
121      LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId());
122      return;
123    }
124    if (event == null) {
125      LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery",
126        getProcId());
127      return;
128    }
129    if (error == null) {
130      LOG.info("finish snapshot {} on region {}", snapshot.getName(), region.getEncodedName());
131      succ = true;
132    }
133
134    event.wake(env.getProcedureScheduler());
135    event = null;
136  }
137
138  @Override
139  public TableName getTableName() {
140    return region.getTable();
141  }
142
143  @Override
144  public TableOperationType getTableOperationType() {
145    return TableOperationType.REGION_SNAPSHOT;
146  }
147
148  @Override
149  protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
150    throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
151    if (dispatched) {
152      if (succ) {
153        return null;
154      }
155      dispatched = false;
156    }
157
158    RegionStates regionStates = env.getAssignmentManager().getRegionStates();
159    RegionStateNode regionNode = regionStates.getRegionStateNode(region);
160    regionNode.lock();
161    try {
162      if (regionNode.getProcedure() != null) {
163        setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s",
164          region.getRegionNameAsString(), regionNode.getProcedure()));
165        throw new ProcedureSuspendedException();
166      }
167      if (!regionNode.isInState(RegionState.State.OPEN)) {
168        setTimeoutForSuspend(env, String.format("region state of %s is %s",
169          region.getRegionNameAsString(), regionNode.getState()));
170        throw new ProcedureSuspendedException();
171      }
172      ServerName targetServer = regionNode.getRegionLocation();
173      if (targetServer == null) {
174        setTimeoutForSuspend(env,
175          String.format("target server of region %s is null", region.getRegionNameAsString()));
176        throw new ProcedureSuspendedException();
177      }
178      ServerState serverState = regionStates.getServerNode(targetServer).getState();
179      if (serverState != ServerState.ONLINE) {
180        setTimeoutForSuspend(env, String.format("target server of region %s %s is in state %s",
181          region.getRegionNameAsString(), targetServer, serverState));
182        throw new ProcedureSuspendedException();
183      }
184      try {
185        env.getRemoteDispatcher().addOperationToNode(targetServer, this);
186        dispatched = true;
187        event = new ProcedureEvent<>(this);
188        event.suspendIfNotReady(this);
189        throw new ProcedureSuspendedException();
190      } catch (FailedRemoteDispatchException e) {
191        setTimeoutForSuspend(env, "Failed send request to " + targetServer);
192        throw new ProcedureSuspendedException();
193      }
194    } finally {
195      regionNode.unlock();
196    }
197  }
198
199  @Override
200  protected void rollback(MasterProcedureEnv env) {
201    throw new UnsupportedOperationException();
202  }
203
204  private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) {
205    if (retryCounter == null) {
206      retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
207    }
208    long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
209    LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff);
210    setTimeout(Math.toIntExact(backoff));
211    setState(ProcedureState.WAITING_TIMEOUT);
212    skipPersistence();
213  }
214
215  @Override
216  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
217    setState(ProcedureState.RUNNABLE);
218    env.getProcedureScheduler().addFront(this);
219    return false;
220  }
221
222  @Override
223  protected boolean abort(MasterProcedureEnv env) {
224    return false;
225  }
226
227  @Override
228  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
229    SnapshotRegionProcedureStateData.Builder builder =
230      SnapshotRegionProcedureStateData.newBuilder();
231    builder.setSnapshot(snapshot);
232    builder.setRegion(ProtobufUtil.toRegionInfo(region));
233    serializer.serialize(builder.build());
234  }
235
236  @Override
237  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
238    SnapshotRegionProcedureStateData data =
239      serializer.deserialize(SnapshotRegionProcedureStateData.class);
240    this.snapshot = data.getSnapshot();
241    this.region = ProtobufUtil.toRegionInfo(data.getRegion());
242  }
243
244  @Override
245  public String getProcName() {
246    return getClass().getSimpleName() + " " + region.getEncodedName();
247  }
248
249  @Override
250  protected void toStringClassDetails(StringBuilder builder) {
251    builder.append(getProcName());
252  }
253
254  @Override
255  protected boolean waitInitialized(MasterProcedureEnv env) {
256    return env.waitInitialized(this);
257  }
258
259  public RegionInfo getRegion() {
260    return region;
261  }
262
263  @RestrictedApi(explanation = "Should only be called in tests", link = "",
264      allowedOnPath = ".*(/src/test/.*|TestSnapshotProcedure).java")
265  boolean inRetrying() {
266    return retryCounter != null;
267  }
268}