001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import com.google.errorprone.annotations.RestrictedApi; 021import java.io.IOException; 022import java.util.Optional; 023import org.apache.hadoop.hbase.ServerName; 024import org.apache.hadoop.hbase.TableName; 025import org.apache.hadoop.hbase.client.RegionInfo; 026import org.apache.hadoop.hbase.master.RegionState; 027import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 028import org.apache.hadoop.hbase.master.assignment.RegionStates; 029import org.apache.hadoop.hbase.master.assignment.ServerState; 030import org.apache.hadoop.hbase.procedure2.FailedRemoteDispatchException; 031import org.apache.hadoop.hbase.procedure2.Procedure; 032import org.apache.hadoop.hbase.procedure2.ProcedureEvent; 033import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 034import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 035import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 036import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 037import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; 038import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteProcedure; 039import org.apache.hadoop.hbase.procedure2.RemoteProcedureException; 040import org.apache.hadoop.hbase.regionserver.SnapshotRegionCallable; 041import org.apache.hadoop.hbase.util.RetryCounter; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 047import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 048import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SnapshotRegionProcedureStateData; 049import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; 050import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 051 052/** 053 * A remote procedure which is used to send region snapshot request to region server. The basic 054 * logic of SnapshotRegionProcedure is similar like {@link ServerRemoteProcedure}, only with a 055 * little difference, when {@link FailedRemoteDispatchException} was thrown, SnapshotRegionProcedure 056 * will sleep some time and continue retrying until success. 057 */ 058@InterfaceAudience.Private 059public class SnapshotRegionProcedure extends Procedure<MasterProcedureEnv> 060 implements TableProcedureInterface, RemoteProcedure<MasterProcedureEnv, ServerName> { 061 private static final Logger LOG = LoggerFactory.getLogger(SnapshotRegionProcedure.class); 062 063 private SnapshotDescription snapshot; 064 private ProcedureEvent<?> event; 065 private RegionInfo region; 066 private boolean dispatched; 067 private boolean succ; 068 private RetryCounter retryCounter; 069 070 public SnapshotRegionProcedure() { 071 } 072 073 public SnapshotRegionProcedure(SnapshotDescription snapshot, RegionInfo region) { 074 this.snapshot = snapshot; 075 this.region = region; 076 } 077 078 @Override 079 protected LockState acquireLock(final MasterProcedureEnv env) { 080 if (env.getProcedureScheduler().waitRegions(this, getTableName(), region)) { 081 return LockState.LOCK_EVENT_WAIT; 082 } 083 return LockState.LOCK_ACQUIRED; 084 } 085 086 @Override 087 protected void releaseLock(final MasterProcedureEnv env) { 088 env.getProcedureScheduler().wakeRegions(this, getTableName(), region); 089 } 090 091 @Override 092 protected boolean holdLock(MasterProcedureEnv env) { 093 return false; 094 } 095 096 @Override 097 public Optional<RemoteOperation> remoteCallBuild(MasterProcedureEnv env, ServerName serverName) { 098 return Optional.of(new RSProcedureDispatcher.ServerOperation(this, getProcId(), 099 SnapshotRegionCallable.class, MasterProcedureProtos.SnapshotRegionParameter.newBuilder() 100 .setRegion(ProtobufUtil.toRegionInfo(region)).setSnapshot(snapshot).build().toByteArray())); 101 } 102 103 @Override 104 public void remoteCallFailed(MasterProcedureEnv env, ServerName serverName, IOException e) { 105 complete(env, e); 106 } 107 108 @Override 109 public void remoteOperationCompleted(MasterProcedureEnv env) { 110 complete(env, null); 111 } 112 113 @Override 114 public void remoteOperationFailed(MasterProcedureEnv env, RemoteProcedureException e) { 115 complete(env, e); 116 } 117 118 // keep retrying until success 119 private void complete(MasterProcedureEnv env, Throwable error) { 120 if (isFinished()) { 121 LOG.info("This procedure {} is already finished, skip the rest processes", this.getProcId()); 122 return; 123 } 124 if (event == null) { 125 LOG.warn("procedure event for {} is null, maybe the procedure is created when recovery", 126 getProcId()); 127 return; 128 } 129 if (error == null) { 130 LOG.info("finish snapshot {} on region {}", snapshot.getName(), region.getEncodedName()); 131 succ = true; 132 } 133 134 event.wake(env.getProcedureScheduler()); 135 event = null; 136 } 137 138 @Override 139 public TableName getTableName() { 140 return region.getTable(); 141 } 142 143 @Override 144 public TableOperationType getTableOperationType() { 145 return TableOperationType.REGION_SNAPSHOT; 146 } 147 148 @Override 149 protected Procedure<MasterProcedureEnv>[] execute(MasterProcedureEnv env) 150 throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { 151 if (dispatched) { 152 if (succ) { 153 return null; 154 } 155 dispatched = false; 156 } 157 158 RegionStates regionStates = env.getAssignmentManager().getRegionStates(); 159 RegionStateNode regionNode = regionStates.getRegionStateNode(region); 160 regionNode.lock(); 161 try { 162 if (regionNode.getProcedure() != null) { 163 setTimeoutForSuspend(env, String.format("region %s has a TRSP attached %s", 164 region.getRegionNameAsString(), regionNode.getProcedure())); 165 throw new ProcedureSuspendedException(); 166 } 167 if (!regionNode.isInState(RegionState.State.OPEN)) { 168 setTimeoutForSuspend(env, String.format("region state of %s is %s", 169 region.getRegionNameAsString(), regionNode.getState())); 170 throw new ProcedureSuspendedException(); 171 } 172 ServerName targetServer = regionNode.getRegionLocation(); 173 if (targetServer == null) { 174 setTimeoutForSuspend(env, 175 String.format("target server of region %s is null", region.getRegionNameAsString())); 176 throw new ProcedureSuspendedException(); 177 } 178 ServerState serverState = regionStates.getServerNode(targetServer).getState(); 179 if (serverState != ServerState.ONLINE) { 180 setTimeoutForSuspend(env, String.format("target server of region %s %s is in state %s", 181 region.getRegionNameAsString(), targetServer, serverState)); 182 throw new ProcedureSuspendedException(); 183 } 184 try { 185 env.getRemoteDispatcher().addOperationToNode(targetServer, this); 186 dispatched = true; 187 event = new ProcedureEvent<>(this); 188 event.suspendIfNotReady(this); 189 throw new ProcedureSuspendedException(); 190 } catch (FailedRemoteDispatchException e) { 191 setTimeoutForSuspend(env, "Failed send request to " + targetServer); 192 throw new ProcedureSuspendedException(); 193 } 194 } finally { 195 regionNode.unlock(); 196 } 197 } 198 199 @Override 200 protected void rollback(MasterProcedureEnv env) { 201 throw new UnsupportedOperationException(); 202 } 203 204 private void setTimeoutForSuspend(MasterProcedureEnv env, String reason) { 205 if (retryCounter == null) { 206 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 207 } 208 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 209 LOG.warn("{} can not run currently because {}, wait {} ms to retry", this, reason, backoff); 210 setTimeout(Math.toIntExact(backoff)); 211 setState(ProcedureState.WAITING_TIMEOUT); 212 skipPersistence(); 213 } 214 215 @Override 216 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 217 setState(ProcedureState.RUNNABLE); 218 env.getProcedureScheduler().addFront(this); 219 return false; 220 } 221 222 @Override 223 protected boolean abort(MasterProcedureEnv env) { 224 return false; 225 } 226 227 @Override 228 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 229 SnapshotRegionProcedureStateData.Builder builder = 230 SnapshotRegionProcedureStateData.newBuilder(); 231 builder.setSnapshot(snapshot); 232 builder.setRegion(ProtobufUtil.toRegionInfo(region)); 233 serializer.serialize(builder.build()); 234 } 235 236 @Override 237 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 238 SnapshotRegionProcedureStateData data = 239 serializer.deserialize(SnapshotRegionProcedureStateData.class); 240 this.snapshot = data.getSnapshot(); 241 this.region = ProtobufUtil.toRegionInfo(data.getRegion()); 242 } 243 244 @Override 245 public String getProcName() { 246 return getClass().getSimpleName() + " " + region.getEncodedName(); 247 } 248 249 @Override 250 protected void toStringClassDetails(StringBuilder builder) { 251 builder.append(getProcName()); 252 } 253 254 @Override 255 protected boolean waitInitialized(MasterProcedureEnv env) { 256 return env.waitInitialized(this); 257 } 258 259 public RegionInfo getRegion() { 260 return region; 261 } 262 263 @RestrictedApi(explanation = "Should only be called in tests", link = "", 264 allowedOnPath = ".*(/src/test/.*|TestSnapshotProcedure).java") 265 boolean inRetrying() { 266 return retryCounter != null; 267 } 268}