001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.Optional;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.TableName;
024import org.apache.hadoop.hbase.client.RegionInfo;
025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException;
026import org.apache.hadoop.hbase.procedure2.Procedure;
027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
029import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException;
033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable;
034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
035import org.apache.hadoop.hbase.util.ForeignExceptionUtil;
036import org.apache.hadoop.hbase.util.RetryCounter;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
042import org.apache.hadoop.hbase.shaded.protobuf.generated.ErrorHandlingProtos;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData;
046import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
047import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
048
049/**
050 * A remote procedure which is used to send verify snapshot request to region server.
051 */
052@InterfaceAudience.Private
053public class SnapshotVerifyProcedure extends ServerRemoteProcedure
054  implements TableProcedureInterface {
055  private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class);
056
057  private SnapshotDescription snapshot;
058  private RegionInfo region;
059
060  private RetryCounter retryCounter;
061
062  public SnapshotVerifyProcedure() {
063  }
064
065  public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) {
066    this.snapshot = snapshot;
067    this.region = region;
068  }
069
070  @Override
071  protected void rollback(MasterProcedureEnv env) {
072    // nothing to rollback
073  }
074
075  @Override
076  protected boolean abort(MasterProcedureEnv env) {
077    return false;
078  }
079
080  @Override
081  protected synchronized boolean complete(MasterProcedureEnv env, Throwable error) {
082    boolean isProcedureCompleted = false;
083    try {
084      if (error != null) {
085        if (error instanceof RemoteProcedureException) {
086          // remote operation failed
087          Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error);
088          if (remoteEx instanceof CorruptedSnapshotException) {
089            // snapshot is corrupted, will touch a flag file and finish the procedure
090            isProcedureCompleted = true;
091            SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
092              .getProcedure(SnapshotProcedure.class, getParentProcId());
093            if (parent != null) {
094              parent.markSnapshotCorrupted();
095            }
096          } // else unexpected exception in remote server, will retry on other servers,
097            // procedureCompleted will stay false
098        } // else the mostly like thing is that remote call failed, will retry on other servers,
099          // procedureCompleted will stay false
100      } else {
101        // remote operation finished without error
102        isProcedureCompleted = true;
103      }
104    } catch (IOException e) {
105      // if we can't create the flag file, then mark the current procedure as FAILED
106      // and rollback the whole snapshot procedure stack.
107      LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}",
108        snapshot.getName(), region, e);
109      setFailure("verify-snapshot", e);
110    } finally {
111      // release the worker
112      env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer,
113        env.getProcedureScheduler());
114    }
115    return isProcedureCompleted;
116  }
117
118  // we will wrap remote exception into a RemoteProcedureException,
119  // here we try to unwrap it
120  private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) {
121    return e.getCause();
122  }
123
124  @Override
125  protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env)
126    throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException {
127    try {
128      // if we've already known the snapshot is corrupted, then stop scheduling
129      // the new procedures and the undispatched procedures
130      if (
131        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
132      ) {
133        SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor()
134          .getProcedure(SnapshotProcedure.class, getParentProcId());
135        if (parent != null && parent.isSnapshotCorrupted()) {
136          return null;
137        }
138      }
139      // acquire a worker
140      if (
141        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
142          && targetServer == null
143      ) {
144        targetServer =
145          env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this);
146      }
147      // send remote request
148      Procedure<MasterProcedureEnv>[] res = super.execute(env);
149      // retry if necessary
150      if (
151        state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH
152      ) {
153        // the mostly like thing is that a FailedRemoteDispatchException is thrown.
154        // we need to retry on another remote server
155        targetServer = null;
156        throw new FailedRemoteDispatchException("Failed sent request");
157      } else {
158        // the request was successfully dispatched
159        return res;
160      }
161    } catch (IOException e) {
162      // there are some cases we need to retry:
163      // 1. we can't get response from hdfs
164      // 2. the remote server crashed
165      if (retryCounter == null) {
166        retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
167      }
168      long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
169      LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e);
170      setTimeout(Math.toIntExact(backoff));
171      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
172      skipPersistence();
173      throw new ProcedureSuspendedException();
174    }
175  }
176
177  @Override
178  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
179    setState(ProcedureProtos.ProcedureState.RUNNABLE);
180    env.getProcedureScheduler().addFront(this);
181    return false;
182  }
183
184  @Override
185  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
186    SnapshotVerifyProcedureStateData.Builder builder =
187      SnapshotVerifyProcedureStateData.newBuilder();
188    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)).setState(state);
189    if (targetServer != null) {
190      builder.setTargetServer(ProtobufUtil.toServerName(targetServer));
191    }
192    if (this.remoteError != null) {
193      ErrorHandlingProtos.ForeignExceptionMessage fem =
194        ForeignExceptionUtil.toProtoForeignException(remoteError);
195      builder.setError(fem);
196    }
197    serializer.serialize(builder.build());
198  }
199
200  @Override
201  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
202    SnapshotVerifyProcedureStateData data =
203      serializer.deserialize(SnapshotVerifyProcedureStateData.class);
204    this.snapshot = data.getSnapshot();
205    this.region = ProtobufUtil.toRegionInfo(data.getRegion());
206    this.state = data.getState();
207    if (data.hasTargetServer()) {
208      this.targetServer = ProtobufUtil.toServerName(data.getTargetServer());
209    }
210    if (data.hasError()) {
211      this.remoteError = ForeignExceptionUtil.toException(data.getError());
212    }
213  }
214
215  @Override
216  protected void toStringClassDetails(StringBuilder builder) {
217    builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName());
218    if (targetServer != null) {
219      builder.append(", targetServer=").append(targetServer);
220    }
221  }
222
223  @Override
224  public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) {
225    SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder();
226    builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region));
227    return Optional
228      .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotVerifyCallable.class,
229        builder.build().toByteArray(), env.getMasterServices().getMasterActiveTime()));
230  }
231
232  @Override
233  public TableName getTableName() {
234    return TableName.valueOf(snapshot.getTable());
235  }
236
237  @Override
238  public TableOperationType getTableOperationType() {
239    return TableOperationType.SNAPSHOT;
240  }
241
242  public ServerName getServerName() {
243    return targetServer;
244  }
245}