001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.Optional; 022import org.apache.hadoop.hbase.ServerName; 023import org.apache.hadoop.hbase.TableName; 024import org.apache.hadoop.hbase.client.RegionInfo; 025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 026import org.apache.hadoop.hbase.procedure2.Procedure; 027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 029import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable; 034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 035import org.apache.hadoop.hbase.util.ForeignExceptionUtil; 036import org.apache.hadoop.hbase.util.RetryCounter; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 042import org.apache.hadoop.hbase.shaded.protobuf.generated.ErrorHandlingProtos; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 048 049/** 050 * A remote procedure which is used to send verify snapshot request to region server. 051 */ 052@InterfaceAudience.Private 053public class SnapshotVerifyProcedure extends ServerRemoteProcedure 054 implements TableProcedureInterface { 055 private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class); 056 057 private SnapshotDescription snapshot; 058 private RegionInfo region; 059 060 private RetryCounter retryCounter; 061 062 public SnapshotVerifyProcedure() { 063 } 064 065 public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) { 066 this.snapshot = snapshot; 067 this.region = region; 068 } 069 070 @Override 071 protected void rollback(MasterProcedureEnv env) { 072 // nothing to rollback 073 } 074 075 @Override 076 protected boolean abort(MasterProcedureEnv env) { 077 return false; 078 } 079 080 @Override 081 protected synchronized boolean complete(MasterProcedureEnv env, Throwable error) { 082 boolean isProcedureCompleted = false; 083 try { 084 if (error != null) { 085 if (error instanceof RemoteProcedureException) { 086 // remote operation failed 087 Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error); 088 if (remoteEx instanceof CorruptedSnapshotException) { 089 // snapshot is corrupted, will touch a flag file and finish the procedure 090 isProcedureCompleted = true; 091 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 092 .getProcedure(SnapshotProcedure.class, getParentProcId()); 093 if (parent != null) { 094 parent.markSnapshotCorrupted(); 095 } 096 } // else unexpected exception in remote server, will retry on other servers, 097 // procedureCompleted will stay false 098 } // else the mostly like thing is that remote call failed, will retry on other servers, 099 // procedureCompleted will stay false 100 } else { 101 // remote operation finished without error 102 isProcedureCompleted = true; 103 } 104 } catch (IOException e) { 105 // if we can't create the flag file, then mark the current procedure as FAILED 106 // and rollback the whole snapshot procedure stack. 107 LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}", 108 snapshot.getName(), region, e); 109 setFailure("verify-snapshot", e); 110 } finally { 111 // release the worker 112 env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer, 113 env.getProcedureScheduler()); 114 } 115 return isProcedureCompleted; 116 } 117 118 // we will wrap remote exception into a RemoteProcedureException, 119 // here we try to unwrap it 120 private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) { 121 return e.getCause(); 122 } 123 124 @Override 125 protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 126 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 127 try { 128 // if we've already known the snapshot is corrupted, then stop scheduling 129 // the new procedures and the undispatched procedures 130 if ( 131 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 132 ) { 133 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 134 .getProcedure(SnapshotProcedure.class, getParentProcId()); 135 if (parent != null && parent.isSnapshotCorrupted()) { 136 return null; 137 } 138 } 139 // acquire a worker 140 if ( 141 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 142 && targetServer == null 143 ) { 144 targetServer = 145 env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this); 146 } 147 // send remote request 148 Procedure<MasterProcedureEnv>[] res = super.execute(env); 149 // retry if necessary 150 if ( 151 state == MasterProcedureProtos.ServerRemoteProcedureState.SERVER_REMOTE_PROCEDURE_DISPATCH 152 ) { 153 // the mostly like thing is that a FailedRemoteDispatchException is thrown. 154 // we need to retry on another remote server 155 targetServer = null; 156 throw new FailedRemoteDispatchException("Failed sent request"); 157 } else { 158 // the request was successfully dispatched 159 return res; 160 } 161 } catch (IOException e) { 162 // there are some cases we need to retry: 163 // 1. we can't get response from hdfs 164 // 2. the remote server crashed 165 if (retryCounter == null) { 166 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 167 } 168 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 169 LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e); 170 setTimeout(Math.toIntExact(backoff)); 171 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 172 skipPersistence(); 173 throw new ProcedureSuspendedException(); 174 } 175 } 176 177 @Override 178 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 179 setState(ProcedureProtos.ProcedureState.RUNNABLE); 180 env.getProcedureScheduler().addFront(this); 181 return false; 182 } 183 184 @Override 185 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 186 SnapshotVerifyProcedureStateData.Builder builder = 187 SnapshotVerifyProcedureStateData.newBuilder(); 188 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)).setState(state); 189 if (targetServer != null) { 190 builder.setTargetServer(ProtobufUtil.toServerName(targetServer)); 191 } 192 if (this.remoteError != null) { 193 ErrorHandlingProtos.ForeignExceptionMessage fem = 194 ForeignExceptionUtil.toProtoForeignException(remoteError); 195 builder.setError(fem); 196 } 197 serializer.serialize(builder.build()); 198 } 199 200 @Override 201 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 202 SnapshotVerifyProcedureStateData data = 203 serializer.deserialize(SnapshotVerifyProcedureStateData.class); 204 this.snapshot = data.getSnapshot(); 205 this.region = ProtobufUtil.toRegionInfo(data.getRegion()); 206 this.state = data.getState(); 207 if (data.hasTargetServer()) { 208 this.targetServer = ProtobufUtil.toServerName(data.getTargetServer()); 209 } 210 if (data.hasError()) { 211 this.remoteError = ForeignExceptionUtil.toException(data.getError()); 212 } 213 } 214 215 @Override 216 protected void toStringClassDetails(StringBuilder builder) { 217 builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName()); 218 if (targetServer != null) { 219 builder.append(", targetServer=").append(targetServer); 220 } 221 } 222 223 @Override 224 public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) { 225 SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder(); 226 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)); 227 return Optional 228 .of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), SnapshotVerifyCallable.class, 229 builder.build().toByteArray(), env.getMasterServices().getMasterActiveTime())); 230 } 231 232 @Override 233 public TableName getTableName() { 234 return TableName.valueOf(snapshot.getTable()); 235 } 236 237 @Override 238 public TableOperationType getTableOperationType() { 239 return TableOperationType.SNAPSHOT; 240 } 241 242 public ServerName getServerName() { 243 return targetServer; 244 } 245}