001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.Optional; 022import org.apache.hadoop.hbase.ServerName; 023import org.apache.hadoop.hbase.TableName; 024import org.apache.hadoop.hbase.client.RegionInfo; 025import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 026import org.apache.hadoop.hbase.procedure2.Procedure; 027import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 028import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 029import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 030import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 031import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 032import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 033import org.apache.hadoop.hbase.regionserver.SnapshotVerifyCallable; 034import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 035import org.apache.hadoop.hbase.util.RetryCounter; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 041import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyParameter; 042import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotVerifyProcedureStateData; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 045 046/** 047 * A remote procedure which is used to send verify snapshot request to region server. 048 */ 049@InterfaceAudience.Private 050public class SnapshotVerifyProcedure extends ServerRemoteProcedure 051 implements TableProcedureInterface { 052 private static final Logger LOG = LoggerFactory.getLogger(SnapshotVerifyProcedure.class); 053 054 private SnapshotDescription snapshot; 055 private RegionInfo region; 056 057 private RetryCounter retryCounter; 058 059 public SnapshotVerifyProcedure() { 060 } 061 062 public SnapshotVerifyProcedure(SnapshotDescription snapshot, RegionInfo region) { 063 this.snapshot = snapshot; 064 this.region = region; 065 } 066 067 @Override 068 protected void rollback(MasterProcedureEnv env) { 069 // nothing to rollback 070 } 071 072 @Override 073 protected boolean abort(MasterProcedureEnv env) { 074 return false; 075 } 076 077 @Override 078 protected synchronized void complete(MasterProcedureEnv env, Throwable error) { 079 try { 080 if (error != null) { 081 if (error instanceof RemoteProcedureException) { 082 // remote operation failed 083 Throwable remoteEx = unwrapRemoteProcedureException((RemoteProcedureException) error); 084 if (remoteEx instanceof CorruptedSnapshotException) { 085 // snapshot is corrupted, will touch a flag file and finish the procedure 086 succ = true; 087 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 088 .getProcedure(SnapshotProcedure.class, getParentProcId()); 089 if (parent != null) { 090 parent.markSnapshotCorrupted(); 091 } 092 } else { 093 // unexpected exception in remote server, will retry on other servers 094 succ = false; 095 } 096 } else { 097 // the mostly like thing is that remote call failed, will retry on other servers 098 succ = false; 099 } 100 } else { 101 // remote operation finished without error 102 succ = true; 103 } 104 } catch (IOException e) { 105 // if we can't create the flag file, then mark the current procedure as FAILED 106 // and rollback the whole snapshot procedure stack. 107 LOG.warn("Failed create corrupted snapshot flag file for snapshot={}, region={}", 108 snapshot.getName(), region, e); 109 setFailure("verify-snapshot", e); 110 } finally { 111 // release the worker 112 env.getMasterServices().getSnapshotManager().releaseSnapshotVerifyWorker(this, targetServer, 113 env.getProcedureScheduler()); 114 } 115 } 116 117 // we will wrap remote exception into a RemoteProcedureException, 118 // here we try to unwrap it 119 private Throwable unwrapRemoteProcedureException(RemoteProcedureException e) { 120 return e.getCause(); 121 } 122 123 @Override 124 protected synchronized Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 125 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 126 try { 127 // if we've already known the snapshot is corrupted, then stop scheduling 128 // the new procedures and the undispatched procedures 129 if (!dispatched) { 130 SnapshotProcedure parent = env.getMasterServices().getMasterProcedureExecutor() 131 .getProcedure(SnapshotProcedure.class, getParentProcId()); 132 if (parent != null && parent.isSnapshotCorrupted()) { 133 return null; 134 } 135 } 136 // acquire a worker 137 if (!dispatched && targetServer == null) { 138 targetServer = 139 env.getMasterServices().getSnapshotManager().acquireSnapshotVerifyWorker(this); 140 } 141 // send remote request 142 Procedure<MasterProcedureEnv>[] res = super.execute(env); 143 // retry if necessary 144 if (!dispatched) { 145 // the mostly like thing is that a FailedRemoteDispatchException is thrown. 146 // we need to retry on another remote server 147 targetServer = null; 148 throw new FailedRemoteDispatchException("Failed sent request"); 149 } else { 150 // the request was successfully dispatched 151 return res; 152 } 153 } catch (IOException e) { 154 // there are some cases we need to retry: 155 // 1. we can't get response from hdfs 156 // 2. the remote server crashed 157 if (retryCounter == null) { 158 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 159 } 160 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 161 LOG.warn("Failed to get snapshot verify result , wait {} ms to retry", backoff, e); 162 setTimeout(Math.toIntExact(backoff)); 163 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 164 skipPersistence(); 165 throw new ProcedureSuspendedException(); 166 } 167 } 168 169 @Override 170 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 171 setState(ProcedureProtos.ProcedureState.RUNNABLE); 172 env.getProcedureScheduler().addFront(this); 173 return false; 174 } 175 176 @Override 177 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 178 SnapshotVerifyProcedureStateData.Builder builder = 179 SnapshotVerifyProcedureStateData.newBuilder(); 180 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)); 181 if (targetServer != null) { 182 builder.setTargetServer(ProtobufUtil.toServerName(targetServer)); 183 } 184 serializer.serialize(builder.build()); 185 } 186 187 @Override 188 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 189 SnapshotVerifyProcedureStateData data = 190 serializer.deserialize(SnapshotVerifyProcedureStateData.class); 191 this.snapshot = data.getSnapshot(); 192 this.region = ProtobufUtil.toRegionInfo(data.getRegion()); 193 if (data.hasTargetServer()) { 194 this.targetServer = ProtobufUtil.toServerName(data.getTargetServer()); 195 } 196 } 197 198 @Override 199 protected void toStringClassDetails(StringBuilder builder) { 200 builder.append(getClass().getSimpleName()).append(", snapshot=").append(snapshot.getName()); 201 if (targetServer != null) { 202 builder.append(", targetServer=").append(targetServer); 203 } 204 } 205 206 @Override 207 public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) { 208 SnapshotVerifyParameter.Builder builder = SnapshotVerifyParameter.newBuilder(); 209 builder.setSnapshot(snapshot).setRegion(ProtobufUtil.toRegionInfo(region)); 210 return Optional.of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), 211 SnapshotVerifyCallable.class, builder.build().toByteArray())); 212 } 213 214 @Override 215 public TableName getTableName() { 216 return TableName.valueOf(snapshot.getTable()); 217 } 218 219 @Override 220 public TableOperationType getTableOperationType() { 221 return TableOperationType.SNAPSHOT; 222 } 223 224 public ServerName getServerName() { 225 return targetServer; 226 } 227}