001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE; 021import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY; 022import static org.apache.hadoop.hbase.master.LoadBalancer.BOGUS_SERVER_NAME; 023import static org.apache.hadoop.hbase.master.assignment.AssignmentManager.FORCE_REGION_RETAINMENT; 024 025import edu.umd.cs.findbugs.annotations.Nullable; 026import java.io.IOException; 027import java.util.concurrent.TimeUnit; 028import org.apache.hadoop.hbase.HBaseIOException; 029import org.apache.hadoop.hbase.ServerName; 030import org.apache.hadoop.hbase.TableName; 031import org.apache.hadoop.hbase.client.RegionInfo; 032import org.apache.hadoop.hbase.client.RegionReplicaUtil; 033import org.apache.hadoop.hbase.client.RetriesExhaustedException; 034import org.apache.hadoop.hbase.master.MetricsAssignmentManager; 035import org.apache.hadoop.hbase.master.RegionState.State; 036import org.apache.hadoop.hbase.master.ServerManager; 037import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 038import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 039import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; 040import org.apache.hadoop.hbase.procedure2.Procedure; 041import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 042import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 043import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 044import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 045import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 046import org.apache.hadoop.hbase.util.RetryCounter; 047import org.apache.yetus.audience.InterfaceAudience; 048import org.slf4j.Logger; 049import org.slf4j.LoggerFactory; 050 051import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 052import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState; 053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionStateData; 054import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionType; 055import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 057 058/** 059 * The procedure to deal with the state transition of a region. A region with a TRSP in place is 060 * called RIT, i.e, RegionInTransition. 061 * <p/> 062 * It can be used to assign/unassign/reopen/move a region, and for 063 * {@link #unassign(MasterProcedureEnv, RegionInfo)} and 064 * {@link #reopen(MasterProcedureEnv, RegionInfo)}, you do not need to specify a target server, and 065 * for {@link #assign(MasterProcedureEnv, RegionInfo, ServerName)} and 066 * {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you want to you can provide a 067 * target server. And for {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you do not 068 * specify a targetServer, we will select one randomly. 069 * <p/> 070 * <p/> 071 * The typical state transition for assigning a region is: 072 * 073 * <pre> 074 * GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED 075 * </pre> 076 * 077 * Notice that, if there are failures we may go back to the {@code GET_ASSIGN_CANDIDATE} state to 078 * try again. 079 * <p/> 080 * The typical state transition for unassigning a region is: 081 * 082 * <pre> 083 * CLOSE -----> CONFIRM_CLOSED 084 * </pre> 085 * 086 * Here things go a bit different, if there are failures, especially that if there is a server 087 * crash, we will go to the {@code GET_ASSIGN_CANDIDATE} state to bring the region online first, and 088 * then go through the normal way to unassign it. 089 * <p/> 090 * The typical state transition for reopening/moving a region is: 091 * 092 * <pre> 093 * CLOSE -----> CONFIRM_CLOSED -----> GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED 094 * </pre> 095 * 096 * The retry logic is the same with the above assign/unassign. 097 * <p/> 098 * Notice that, although we allow specify a target server, it just acts as a candidate, we do not 099 * guarantee that the region will finally be on the target server. If this is important for you, you 100 * should check whether the region is on the target server after the procedure is finished. 101 * </p> 102 * Altenatively, for trying retaining assignments, the 103 * <b>hbase.master.scp.retain.assignment.force</b> option can be used together with 104 * <b>hbase.master.scp.retain.assignment</b>. 105 * <p/> 106 * When you want to schedule a TRSP, please check whether there is still one for this region, and 107 * the check should be under the RegionStateNode lock. We will remove the TRSP from a 108 * RegionStateNode when we are done, see the code in {@code reportTransition} method below. There 109 * could be at most one TRSP for a give region. 110 */ 111@InterfaceAudience.Private 112public class TransitRegionStateProcedure 113 extends AbstractStateMachineRegionProcedure<RegionStateTransitionState> { 114 115 private static final Logger LOG = LoggerFactory.getLogger(TransitRegionStateProcedure.class); 116 117 private TransitionType type; 118 119 private RegionStateTransitionState initialState; 120 121 private RegionStateTransitionState lastState; 122 123 // the candidate where we want to assign the region to. 124 private ServerName assignCandidate; 125 126 private boolean forceNewPlan; 127 128 private RetryCounter retryCounter; 129 130 private RegionRemoteProcedureBase remoteProc; 131 132 private boolean evictCache; 133 134 private boolean isSplit; 135 136 private RetryCounter forceRetainmentRetryCounter; 137 138 private long forceRetainmentTotalWait; 139 140 public TransitRegionStateProcedure() { 141 } 142 143 private void setInitialAndLastState() { 144 switch (type) { 145 case ASSIGN: 146 initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE; 147 lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED; 148 break; 149 case UNASSIGN: 150 initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE; 151 lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED; 152 break; 153 case MOVE: 154 case REOPEN: 155 initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE; 156 lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED; 157 break; 158 default: 159 throw new IllegalArgumentException("Unknown TransitionType: " + type); 160 } 161 } 162 163 protected TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri, 164 ServerName assignCandidate, boolean forceNewPlan, TransitionType type) { 165 super(env, hri); 166 this.assignCandidate = assignCandidate; 167 this.forceNewPlan = forceNewPlan; 168 this.type = type; 169 setInitialAndLastState(); 170 171 // when do reopen TRSP, let the rs know the targetServer so it can keep some info on close 172 if (type == TransitionType.REOPEN) { 173 this.assignCandidate = getRegionStateNode(env).getRegionLocation(); 174 } 175 evictCache = 176 env.getMasterConfiguration().getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE); 177 initForceRetainmentRetryCounter(env); 178 } 179 180 private void initForceRetainmentRetryCounter(MasterProcedureEnv env) { 181 if (env.getAssignmentManager().isForceRegionRetainment()) { 182 forceRetainmentRetryCounter = 183 new RetryCounter(env.getAssignmentManager().getForceRegionRetainmentRetries(), 184 env.getAssignmentManager().getForceRegionRetainmentWaitInterval(), TimeUnit.MILLISECONDS); 185 forceRetainmentTotalWait = 0; 186 } 187 } 188 189 protected TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri, 190 ServerName assignCandidate, boolean forceNewPlan, TransitionType type, boolean isSplit) { 191 this(env, hri, assignCandidate, forceNewPlan, type); 192 this.isSplit = isSplit; 193 } 194 195 @Override 196 public TableOperationType getTableOperationType() { 197 // TODO: maybe we should make another type here, REGION_TRANSITION? 198 return TableOperationType.REGION_EDIT; 199 } 200 201 @Override 202 protected boolean waitInitialized(MasterProcedureEnv env) { 203 if (TableName.isMetaTableName(getTableName())) { 204 return false; 205 } 206 // First we need meta to be loaded, and second, if meta is not online then we will likely to 207 // fail when updating meta so we wait until it is assigned. 208 AssignmentManager am = env.getAssignmentManager(); 209 return am.waitMetaLoaded(this) || am.waitMetaAssigned(this, getRegion()); 210 } 211 212 private void checkAndWaitForOriginalServer(MasterProcedureEnv env, ServerName lastHost) 213 throws ProcedureSuspendedException { 214 ServerManager serverManager = env.getMasterServices().getServerManager(); 215 ServerName newNameForServer = serverManager.findServerWithSameHostnamePortWithLock(lastHost); 216 boolean isOnline = serverManager.createDestinationServersList().contains(newNameForServer); 217 218 if (!isOnline && forceRetainmentRetryCounter.shouldRetry()) { 219 int backoff = 220 Math.toIntExact(forceRetainmentRetryCounter.getBackoffTimeAndIncrementAttempts()); 221 forceRetainmentTotalWait += backoff; 222 LOG.info( 223 "Suspending the TRSP PID={} for {}ms because {} is true and previous host {} " 224 + "for region is not yet online.", 225 this.getProcId(), backoff, FORCE_REGION_RETAINMENT, lastHost); 226 setTimeout(backoff); 227 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 228 throw new ProcedureSuspendedException(); 229 } 230 LOG.info( 231 "{} is true. TRSP PID={} waited {}ms for host {} to come back online. " 232 + "Did host come back online? {}", 233 FORCE_REGION_RETAINMENT, this.getProcId(), forceRetainmentTotalWait, lastHost, isOnline); 234 initForceRetainmentRetryCounter(env); 235 } 236 237 private void queueAssign(MasterProcedureEnv env, RegionStateNode regionNode) 238 throws ProcedureSuspendedException { 239 boolean retain = false; 240 if (forceNewPlan) { 241 // set the region location to null if forceNewPlan is true 242 regionNode.setRegionLocation(null); 243 } else { 244 if (assignCandidate != null) { 245 retain = assignCandidate.equals(regionNode.getLastHost()); 246 regionNode.setRegionLocation(assignCandidate); 247 } else if (regionNode.getLastHost() != null) { 248 retain = true; 249 LOG.info("Setting lastHost {} as the location for region {}", regionNode.getLastHost(), 250 regionNode.getRegionInfo().getEncodedName()); 251 regionNode.setRegionLocation(regionNode.getLastHost()); 252 } 253 if ( 254 regionNode.getRegionLocation() != null 255 && env.getAssignmentManager().isForceRegionRetainment() 256 ) { 257 LOG.warn("{} is set to true. This may delay regions re-assignment " 258 + "upon RegionServers crashes or restarts.", FORCE_REGION_RETAINMENT); 259 checkAndWaitForOriginalServer(env, regionNode.getRegionLocation()); 260 } 261 } 262 LOG.info("Starting {}; {}; forceNewPlan={}, retain={}", this, regionNode.toShortString(), 263 forceNewPlan, retain); 264 env.getAssignmentManager().queueAssign(regionNode); 265 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_OPEN); 266 if (regionNode.getProcedureEvent().suspendIfNotReady(this)) { 267 throw new ProcedureSuspendedException(); 268 } 269 } 270 271 private void openRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException { 272 ServerName loc = regionNode.getRegionLocation(); 273 if (loc == null || BOGUS_SERVER_NAME.equals(loc)) { 274 LOG.warn("No location specified for {}, jump back to state {} to get one", getRegion(), 275 RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 276 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 277 throw new HBaseIOException("Failed to open region, the location is null or bogus."); 278 } 279 env.getAssignmentManager().regionOpening(regionNode); 280 addChildProcedure(new OpenRegionProcedure(this, getRegion(), loc)); 281 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED); 282 } 283 284 private Flow confirmOpened(MasterProcedureEnv env, RegionStateNode regionNode) 285 throws IOException { 286 if (regionNode.isInState(State.OPEN)) { 287 retryCounter = null; 288 if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) { 289 // we are the last state, finish 290 regionNode.unsetProcedure(this); 291 ServerCrashProcedure.updateProgress(env, getParentProcId()); 292 return Flow.NO_MORE_STATE; 293 } 294 // It is possible that we arrive here but confirm opened is not the last state, for example, 295 // when merging or splitting a region, we unassign the region from a RS and the RS is crashed, 296 // then there will be recovered edits for this region, we'd better make the region online 297 // again and then unassign it, otherwise we have to fail the merge/split procedure as we may 298 // loss data. 299 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE); 300 return Flow.HAS_MORE_STATE; 301 } 302 303 int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode) 304 .incrementAndGetRetries(); 305 int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts(); 306 LOG.info("Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString()); 307 308 if (retries >= maxAttempts) { 309 env.getAssignmentManager().regionFailedOpen(regionNode, true); 310 setFailure(getClass().getSimpleName(), new RetriesExhaustedException( 311 "Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded")); 312 regionNode.unsetProcedure(this); 313 return Flow.NO_MORE_STATE; 314 } 315 316 env.getAssignmentManager().regionFailedOpen(regionNode, false); 317 // we failed to assign the region, force a new plan 318 forceNewPlan = true; 319 regionNode.setRegionLocation(null); 320 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 321 322 if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) { 323 // Throw exception to backoff and retry when failed open too many times 324 throw new HBaseIOException( 325 "Failed confirm OPEN of " + regionNode + " (remote log may yield more detail on why)."); 326 } else { 327 // Here we do not throw exception because we want to the region to be online ASAP 328 return Flow.HAS_MORE_STATE; 329 } 330 } 331 332 private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException { 333 if (regionNode.isInState(State.OPEN, State.CLOSING, State.MERGING, State.SPLITTING)) { 334 // this is the normal case 335 env.getAssignmentManager().regionClosing(regionNode); 336 CloseRegionProcedure closeProc = isSplit 337 ? new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(), 338 assignCandidate, true) 339 : new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(), 340 assignCandidate, evictCache); 341 addChildProcedure(closeProc); 342 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED); 343 } else { 344 forceNewPlan = true; 345 regionNode.setRegionLocation(null); 346 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 347 } 348 } 349 350 private Flow confirmClosed(MasterProcedureEnv env, RegionStateNode regionNode) 351 throws IOException { 352 if (regionNode.isInState(State.CLOSED)) { 353 retryCounter = null; 354 if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) { 355 // we are the last state, finish 356 regionNode.unsetProcedure(this); 357 return Flow.NO_MORE_STATE; 358 } 359 // This means we need to open the region again, should be a move or reopen 360 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 361 return Flow.HAS_MORE_STATE; 362 } 363 if (regionNode.isInState(State.CLOSING)) { 364 // This is possible, think the target RS crashes and restarts immediately, the close region 365 // operation will return a NotServingRegionException soon, we can only recover after SCP takes 366 // care of this RS. So here we throw an IOException to let upper layer to retry with backoff. 367 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE); 368 throw new HBaseIOException("Failed to close region"); 369 } 370 // abnormally closed, need to reopen it, no matter what is the last state, see the comment in 371 // confirmOpened for more details that why we need to reopen the region first even if we just 372 // want to close it. 373 // The only exception is for non-default replica, where we do not need to deal with recovered 374 // edits. Notice that the region will remain in ABNORMALLY_CLOSED state, the upper layer need to 375 // deal with this state. For non-default replica, this is usually the same with CLOSED. 376 assert regionNode.isInState(State.ABNORMALLY_CLOSED); 377 if ( 378 !RegionReplicaUtil.isDefaultReplica(getRegion()) 379 && lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED 380 ) { 381 regionNode.unsetProcedure(this); 382 return Flow.NO_MORE_STATE; 383 } 384 retryCounter = null; 385 setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE); 386 return Flow.HAS_MORE_STATE; 387 } 388 389 // Override to lock RegionStateNode 390 @SuppressWarnings("rawtypes") 391 @Override 392 protected Procedure[] execute(MasterProcedureEnv env) 393 throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { 394 RegionStateNode regionNode = 395 env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion()); 396 regionNode.lock(); 397 try { 398 return super.execute(env); 399 } finally { 400 regionNode.unlock(); 401 } 402 } 403 404 private RegionStateNode getRegionStateNode(MasterProcedureEnv env) { 405 return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion()); 406 } 407 408 @Override 409 protected Flow executeFromState(MasterProcedureEnv env, RegionStateTransitionState state) 410 throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { 411 RegionStateNode regionNode = getRegionStateNode(env); 412 try { 413 switch (state) { 414 case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE: 415 // Need to do some sanity check for replica region, if the region does not exist at 416 // master, do not try to assign the replica region, log error and return. 417 if (!RegionReplicaUtil.isDefaultReplica(regionNode.getRegionInfo())) { 418 RegionInfo defaultRI = 419 RegionReplicaUtil.getRegionInfoForDefaultReplica(regionNode.getRegionInfo()); 420 if ( 421 env.getMasterServices().getAssignmentManager().getRegionStates() 422 .getRegionStateNode(defaultRI) == null 423 ) { 424 LOG.error( 425 "Cannot assign replica region {} because its primary region {} does not exist.", 426 regionNode.getRegionInfo(), defaultRI); 427 regionNode.unsetProcedure(this); 428 return Flow.NO_MORE_STATE; 429 } 430 } 431 queueAssign(env, regionNode); 432 return Flow.HAS_MORE_STATE; 433 case REGION_STATE_TRANSITION_OPEN: 434 openRegion(env, regionNode); 435 return Flow.HAS_MORE_STATE; 436 case REGION_STATE_TRANSITION_CONFIRM_OPENED: 437 return confirmOpened(env, regionNode); 438 case REGION_STATE_TRANSITION_CLOSE: 439 closeRegion(env, regionNode); 440 return Flow.HAS_MORE_STATE; 441 case REGION_STATE_TRANSITION_CONFIRM_CLOSED: 442 return confirmClosed(env, regionNode); 443 default: 444 throw new UnsupportedOperationException("unhandled state=" + state); 445 } 446 } catch (IOException e) { 447 if (retryCounter == null) { 448 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 449 } 450 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 451 LOG.warn( 452 "Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " 453 + "by other Procedure or operator intervention", 454 backoff / 1000, this, regionNode.toShortString(), e); 455 setTimeout(Math.toIntExact(backoff)); 456 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 457 skipPersistence(); 458 throw new ProcedureSuspendedException(); 459 } 460 } 461 462 /** 463 * At end of timeout, wake ourselves up so we run again. 464 */ 465 @Override 466 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 467 setState(ProcedureProtos.ProcedureState.RUNNABLE); 468 env.getProcedureScheduler().addFront(this); 469 return false; // 'false' means that this procedure handled the timeout 470 } 471 472 // Should be called with RegionStateNode locked 473 public void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode, 474 ServerName serverName, TransitionCode code, long seqId, long procId) throws IOException { 475 if (remoteProc == null) { 476 LOG.warn( 477 "There is no outstanding remote region procedure for {}, serverName={}, code={}," 478 + " seqId={}, proc={}, should be a retry, ignore", 479 regionNode, serverName, code, seqId, this); 480 return; 481 } 482 // The procId could be -1 if it is from an old region server, we need to deal with it so that we 483 // can do rolling upgraing. 484 if (procId >= 0 && remoteProc.getProcId() != procId) { 485 LOG.warn( 486 "The pid of remote region procedure for {} is {}, the reported pid={}, serverName={}," 487 + " code={}, seqId={}, proc={}, should be a retry, ignore", 488 regionNode, remoteProc.getProcId(), procId, serverName, code, seqId, this); 489 return; 490 } 491 remoteProc.reportTransition(env, regionNode, serverName, code, seqId); 492 } 493 494 // Should be called with RegionStateNode locked 495 public void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode, 496 ServerName serverName, boolean forceNewPlan) throws IOException { 497 this.forceNewPlan = forceNewPlan; 498 if (remoteProc != null) { 499 // this means we are waiting for the sub procedure, so wake it up 500 remoteProc.serverCrashed(env, regionNode, serverName); 501 } else { 502 // we are in RUNNING state, just update the region state, and we will process it later. 503 env.getAssignmentManager().regionClosedAbnormally(regionNode); 504 } 505 } 506 507 void attachRemoteProc(RegionRemoteProcedureBase proc) { 508 this.remoteProc = proc; 509 } 510 511 void unattachRemoteProc(RegionRemoteProcedureBase proc) { 512 assert this.remoteProc == proc; 513 this.remoteProc = null; 514 } 515 516 // will be called after we finish loading the meta entry for this region. 517 // used to change the state of the region node if we have a sub procedure, as we may not persist 518 // the state to meta yet. See the code in RegionRemoteProcedureBase.execute for more details. 519 void stateLoaded(AssignmentManager am, RegionStateNode regionNode) { 520 if (remoteProc != null) { 521 remoteProc.stateLoaded(am, regionNode); 522 } 523 } 524 525 @Override 526 protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state) 527 throws IOException, InterruptedException { 528 // no rollback 529 throw new UnsupportedOperationException(); 530 } 531 532 @Override 533 protected RegionStateTransitionState getState(int stateId) { 534 return RegionStateTransitionState.forNumber(stateId); 535 } 536 537 @Override 538 protected int getStateId(RegionStateTransitionState state) { 539 return state.getNumber(); 540 } 541 542 @Override 543 protected RegionStateTransitionState getInitialState() { 544 return initialState; 545 } 546 547 private static TransitionType convert(RegionTransitionType type) { 548 switch (type) { 549 case ASSIGN: 550 return TransitionType.ASSIGN; 551 case UNASSIGN: 552 return TransitionType.UNASSIGN; 553 case MOVE: 554 return TransitionType.MOVE; 555 case REOPEN: 556 return TransitionType.REOPEN; 557 default: 558 throw new IllegalArgumentException("Unknown RegionTransitionType: " + type); 559 } 560 } 561 562 private static RegionTransitionType convert(TransitionType type) { 563 switch (type) { 564 case ASSIGN: 565 return RegionTransitionType.ASSIGN; 566 case UNASSIGN: 567 return RegionTransitionType.UNASSIGN; 568 case MOVE: 569 return RegionTransitionType.MOVE; 570 case REOPEN: 571 return RegionTransitionType.REOPEN; 572 default: 573 throw new IllegalArgumentException("Unknown TransitionType: " + type); 574 } 575 } 576 577 @Override 578 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 579 super.serializeStateData(serializer); 580 RegionStateTransitionStateData.Builder builder = 581 RegionStateTransitionStateData.newBuilder().setType(convert(type)) 582 .setForceNewPlan(forceNewPlan).setEvictCache(evictCache).setIsSplit(isSplit); 583 if (assignCandidate != null) { 584 builder.setAssignCandidate(ProtobufUtil.toServerName(assignCandidate)); 585 } 586 serializer.serialize(builder.build()); 587 } 588 589 @Override 590 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 591 super.deserializeStateData(serializer); 592 RegionStateTransitionStateData data = 593 serializer.deserialize(RegionStateTransitionStateData.class); 594 type = convert(data.getType()); 595 setInitialAndLastState(); 596 forceNewPlan = data.getForceNewPlan(); 597 if (data.hasAssignCandidate()) { 598 assignCandidate = ProtobufUtil.toServerName(data.getAssignCandidate()); 599 } 600 evictCache = data.getEvictCache(); 601 isSplit = data.getIsSplit(); 602 } 603 604 @Override 605 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 606 MetricsAssignmentManager metrics = env.getAssignmentManager().getAssignmentManagerMetrics(); 607 switch (type) { 608 case ASSIGN: 609 return metrics.getAssignProcMetrics(); 610 case UNASSIGN: 611 return metrics.getUnassignProcMetrics(); 612 case MOVE: 613 return metrics.getMoveProcMetrics(); 614 case REOPEN: 615 return metrics.getReopenProcMetrics(); 616 default: 617 throw new IllegalArgumentException("Unknown transition type: " + type); 618 } 619 } 620 621 @Override 622 public void toStringClassDetails(StringBuilder sb) { 623 super.toStringClassDetails(sb); 624 if (initialState == RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE) { 625 sb.append(", ASSIGN"); 626 } else if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) { 627 sb.append(", UNASSIGN"); 628 } else { 629 sb.append(", REOPEN/MOVE"); 630 } 631 } 632 633 private static TransitRegionStateProcedure setOwner(MasterProcedureEnv env, 634 TransitRegionStateProcedure proc) { 635 proc.setOwner(env.getRequestUser().getShortName()); 636 return proc; 637 } 638 639 public enum TransitionType { 640 ASSIGN, 641 UNASSIGN, 642 MOVE, 643 REOPEN 644 } 645 646 // Be careful that, when you call these 4 methods below, you need to manually attach the returned 647 // procedure with the RegionStateNode, otherwise the procedure will quit immediately without doing 648 // anything. See the comment in executeFromState to find out why we need this assumption. 649 public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region, 650 @Nullable ServerName targetServer) { 651 return assign(env, region, false, targetServer); 652 } 653 654 public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region, 655 boolean forceNewPlan, @Nullable ServerName targetServer) { 656 return setOwner(env, new TransitRegionStateProcedure(env, region, targetServer, forceNewPlan, 657 TransitionType.ASSIGN)); 658 } 659 660 public static TransitRegionStateProcedure unassign(MasterProcedureEnv env, RegionInfo region) { 661 return setOwner(env, 662 new TransitRegionStateProcedure(env, region, null, false, TransitionType.UNASSIGN)); 663 } 664 665 public static TransitRegionStateProcedure unassignSplitMerge(MasterProcedureEnv env, 666 RegionInfo region) { 667 return setOwner(env, 668 new TransitRegionStateProcedure(env, region, null, false, TransitionType.UNASSIGN, true)); 669 } 670 671 public static TransitRegionStateProcedure reopen(MasterProcedureEnv env, RegionInfo region) { 672 return setOwner(env, 673 new TransitRegionStateProcedure(env, region, null, false, TransitionType.REOPEN)); 674 } 675 676 public static TransitRegionStateProcedure move(MasterProcedureEnv env, RegionInfo region, 677 @Nullable ServerName targetServer) { 678 return setOwner(env, new TransitRegionStateProcedure(env, region, targetServer, 679 targetServer == null, TransitionType.MOVE)); 680 } 681}