001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import static org.apache.hadoop.hbase.HConstants.DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK; 021import static org.apache.hadoop.hbase.HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK; 022 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.Arrays; 026import java.util.List; 027import java.util.concurrent.CompletableFuture; 028import org.apache.hadoop.hbase.DoNotRetryIOException; 029import org.apache.hadoop.hbase.ServerName; 030import org.apache.hadoop.hbase.client.RegionInfo; 031import org.apache.hadoop.hbase.client.RegionInfoBuilder; 032import org.apache.hadoop.hbase.client.RegionReplicaUtil; 033import org.apache.hadoop.hbase.client.TableState; 034import org.apache.hadoop.hbase.master.MasterServices; 035import org.apache.hadoop.hbase.master.MasterWalManager; 036import org.apache.hadoop.hbase.master.SplitWALManager; 037import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 038import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 039import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 040import org.apache.hadoop.hbase.master.replication.AssignReplicationQueuesProcedure; 041import org.apache.hadoop.hbase.master.replication.MigrateReplicationQueueFromZkToTableProcedure; 042import org.apache.hadoop.hbase.monitoring.MonitoredTask; 043import org.apache.hadoop.hbase.monitoring.TaskMonitor; 044import org.apache.hadoop.hbase.procedure2.Procedure; 045import org.apache.hadoop.hbase.procedure2.ProcedureFutureUtil; 046import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 047import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 048import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 049import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 050import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 056import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 057import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ServerCrashState; 058import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 059 060/** 061 * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called 062 * ServerShutdownHandler. 063 * <p> 064 * The procedure flow varies dependent on whether meta is assigned and if we are to split logs. 065 * <p> 066 * We come in here after ServerManager has noticed a server has expired. Procedures queued on the 067 * rpc should have been notified about fail and should be concurrently getting themselves ready to 068 * assign elsewhere. 069 */ 070@InterfaceAudience.Private 071public class ServerCrashProcedure extends 072 StateMachineProcedure<MasterProcedureEnv, ServerCrashState> implements ServerProcedureInterface { 073 private static final Logger LOG = LoggerFactory.getLogger(ServerCrashProcedure.class); 074 075 /** 076 * Configuration parameter to enable/disable the retain region assignment during 077 * ServerCrashProcedure. 078 * <p> 079 * By default retain assignment is disabled which makes the failover faster and improve the 080 * availability; useful for cloud scenario where region block locality is not important. Enable 081 * this when RegionServers are deployed on same host where Datanode are running, this will improve 082 * read performance due to local read. 083 * <p> 084 * see HBASE-24900 for more details. 085 */ 086 public static final String MASTER_SCP_RETAIN_ASSIGNMENT = "hbase.master.scp.retain.assignment"; 087 /** Default value of {@link #MASTER_SCP_RETAIN_ASSIGNMENT} */ 088 public static final boolean DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT = false; 089 090 /** 091 * Name of the crashed server to process. 092 */ 093 private ServerName serverName; 094 095 /** 096 * Whether DeadServer knows that we are processing it. 097 */ 098 private boolean notifiedDeadServer = false; 099 100 /** 101 * Regions that were on the crashed server. 102 */ 103 private List<RegionInfo> regionsOnCrashedServer; 104 105 private boolean carryingMeta = false; 106 private boolean shouldSplitWal; 107 private MonitoredTask status; 108 // currentRunningState is updated when ServerCrashProcedure get scheduled, child procedures update 109 // progress will not update the state because the actual state is overwritten by its next state 110 private ServerCrashState currentRunningState = getInitialState(); 111 112 private CompletableFuture<Void> updateMetaFuture; 113 114 private int processedRegions = 0; 115 116 /** 117 * Call this constructor queuing up a Procedure. 118 * @param serverName Name of the crashed server. 119 * @param shouldSplitWal True if we should split WALs as part of crashed server processing. 120 * @param carryingMeta True if carrying hbase:meta table region. 121 */ 122 public ServerCrashProcedure(final MasterProcedureEnv env, final ServerName serverName, 123 final boolean shouldSplitWal, final boolean carryingMeta) { 124 this.serverName = serverName; 125 this.shouldSplitWal = shouldSplitWal; 126 this.carryingMeta = carryingMeta; 127 this.setOwner(env.getRequestUser()); 128 } 129 130 /** 131 * Used when deserializing from a procedure store; we'll construct one of these then call 132 * #deserializeStateData(InputStream). Do not use directly. 133 */ 134 public ServerCrashProcedure() { 135 } 136 137 public boolean isInRecoverMetaState() { 138 return getCurrentState() == ServerCrashState.SERVER_CRASH_PROCESS_META; 139 } 140 141 @Override 142 protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) 143 throws ProcedureSuspendedException, ProcedureYieldException { 144 final MasterServices services = env.getMasterServices(); 145 final AssignmentManager am = env.getAssignmentManager(); 146 updateProgress(true); 147 // HBASE-14802 If we have not yet notified that we are processing a dead server, do so now. 148 // This adds server to the DeadServer processing list but not to the DeadServers list. 149 // Server gets removed from processing list below on procedure successful finish. 150 if (!notifiedDeadServer) { 151 notifiedDeadServer = true; 152 } 153 154 switch (state) { 155 case SERVER_CRASH_START: 156 case SERVER_CRASH_SPLIT_META_LOGS: 157 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 158 case SERVER_CRASH_ASSIGN_META: 159 break; 160 default: 161 // If hbase:meta is not assigned, yield. 162 if (env.getAssignmentManager().waitMetaLoaded(this)) { 163 throw new ProcedureSuspendedException(); 164 } 165 } 166 try { 167 switch (state) { 168 case SERVER_CRASH_START: 169 LOG.info("Start " + this); 170 // If carrying meta, process it first. Else, get list of regions on crashed server. 171 if (this.carryingMeta) { 172 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 173 } else { 174 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 175 } 176 break; 177 case SERVER_CRASH_SPLIT_META_LOGS: 178 if ( 179 env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 180 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK) 181 ) { 182 zkCoordinatedSplitMetaLogs(env); 183 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 184 } else { 185 am.getRegionStates().metaLogSplitting(serverName); 186 addChildProcedure(createSplittingWalProcedures(env, true)); 187 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR); 188 } 189 break; 190 case SERVER_CRASH_DELETE_SPLIT_META_WALS_DIR: 191 if (isSplittingDone(env, true)) { 192 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN_META); 193 am.getRegionStates().metaLogSplit(serverName); 194 } else { 195 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_META_LOGS); 196 } 197 break; 198 case SERVER_CRASH_ASSIGN_META: 199 assignRegions(env, Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO)); 200 setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); 201 break; 202 case SERVER_CRASH_GET_REGIONS: 203 this.regionsOnCrashedServer = getRegionsOnCrashedServer(env); 204 // Where to go next? Depends on whether we should split logs at all or 205 // if we should do distributed log splitting. 206 if (regionsOnCrashedServer != null) { 207 LOG.info("{} had {} regions", serverName, regionsOnCrashedServer.size()); 208 if (LOG.isTraceEnabled()) { 209 this.regionsOnCrashedServer.stream().forEach(ri -> LOG.trace(ri.getShortNameToLog())); 210 } 211 } 212 if (!this.shouldSplitWal) { 213 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 214 } else { 215 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 216 } 217 break; 218 case SERVER_CRASH_SPLIT_LOGS: 219 if ( 220 env.getMasterConfiguration().getBoolean(HBASE_SPLIT_WAL_COORDINATED_BY_ZK, 221 DEFAULT_HBASE_SPLIT_COORDINATED_BY_ZK) 222 ) { 223 zkCoordinatedSplitLogs(env); 224 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 225 } else { 226 am.getRegionStates().logSplitting(this.serverName); 227 addChildProcedure(createSplittingWalProcedures(env, false)); 228 setNextState(ServerCrashState.SERVER_CRASH_DELETE_SPLIT_WALS_DIR); 229 } 230 break; 231 case SERVER_CRASH_DELETE_SPLIT_WALS_DIR: 232 if (isSplittingDone(env, false)) { 233 cleanupSplitDir(env); 234 setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); 235 am.getRegionStates().logSplit(this.serverName); 236 } else { 237 setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); 238 } 239 break; 240 case SERVER_CRASH_ASSIGN: 241 // If no regions to assign, skip assign and skip to the finish. 242 // Filter out meta regions. Those are handled elsewhere in this procedure. 243 // Filter changes this.regionsOnCrashedServer. 244 if (filterDefaultMetaRegions()) { 245 if (LOG.isTraceEnabled()) { 246 LOG.trace("Assigning regions " + RegionInfo.getShortNameToLog(regionsOnCrashedServer) 247 + ", " + this + "; cycles=" + getCycles()); 248 } 249 assignRegions(env, regionsOnCrashedServer); 250 } 251 // If there is no replication peer, we do not need to enter the claim queues stage. 252 // This is also very important that now we will later initialize ReplicationQueueStorage 253 // so if there is no replication peer added yet, the storage can not be accessed. 254 // And there will be no race because: 255 // 1. For adding replication peer, if the peer storage has not been updated yet, the crash 256 // region server will not have any replication queues for this peer, so it is safe to skip 257 // claiming. 258 // 2. For removing replication peer, it it has already updated the peer storage, then 259 // there is no way to rollback and region servers are already started to close and delete 260 // replication queues, so it is also safe to skip claiming. 261 if (env.getReplicationPeerManager().listPeers(null).isEmpty()) { 262 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 263 } else { 264 setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES); 265 } 266 break; 267 case SERVER_CRASH_HANDLE_RIT2: 268 // Noop. Left in place because we used to call handleRIT here for a second time 269 // but no longer necessary since HBASE-20634. 270 if (env.getReplicationPeerManager().listPeers(null).isEmpty()) { 271 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 272 } else { 273 setNextState(ServerCrashState.SERVER_CRASH_CLAIM_REPLICATION_QUEUES); 274 } 275 break; 276 case SERVER_CRASH_CLAIM_REPLICATION_QUEUES: 277 if ( 278 env.getMasterServices().getProcedures().stream() 279 .filter(p -> p instanceof MigrateReplicationQueueFromZkToTableProcedure) 280 .anyMatch(p -> !p.isFinished()) 281 ) { 282 LOG.info("There is a pending {}, will retry claim replication queue later", 283 MigrateReplicationQueueFromZkToTableProcedure.class.getSimpleName()); 284 suspend(10_000, true); 285 return Flow.NO_MORE_STATE; 286 } 287 addChildProcedure(new AssignReplicationQueuesProcedure(serverName)); 288 setNextState(ServerCrashState.SERVER_CRASH_FINISH); 289 break; 290 case SERVER_CRASH_FINISH: 291 LOG.info("removed crashed server {} after splitting done", serverName); 292 services.getAssignmentManager().getRegionStates().removeServer(serverName); 293 updateProgress(true); 294 return Flow.NO_MORE_STATE; 295 default: 296 throw new UnsupportedOperationException("unhandled state=" + state); 297 } 298 } catch (IOException e) { 299 LOG.warn("Failed state=" + state + ", retry " + this + "; cycles=" + getCycles(), e); 300 } 301 return Flow.HAS_MORE_STATE; 302 } 303 304 /** Returns List of Regions on crashed server. */ 305 List<RegionInfo> getRegionsOnCrashedServer(MasterProcedureEnv env) { 306 return env.getMasterServices().getAssignmentManager().getRegionsOnServer(serverName); 307 } 308 309 private void cleanupSplitDir(MasterProcedureEnv env) { 310 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 311 try { 312 if (!this.carryingMeta) { 313 // If we are NOT carrying hbase:meta, check if any left-over hbase:meta WAL files from an 314 // old hbase:meta tenancy on this server; clean these up if any before trying to remove the 315 // WAL directory of this server or we will fail. See archiveMetaLog comment for more details 316 // on this condition. 317 env.getMasterServices().getMasterWalManager().archiveMetaLog(this.serverName); 318 } 319 splitWALManager.deleteWALDir(serverName); 320 } catch (IOException e) { 321 LOG.info("Remove WAL directory for {} failed, ignore...{}", serverName, e.getMessage()); 322 } 323 } 324 325 private boolean isSplittingDone(MasterProcedureEnv env, boolean splitMeta) { 326 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 327 try { 328 int wals = splitWALManager.getWALsToSplit(serverName, splitMeta).size(); 329 LOG.debug("Check if {} WAL splitting is done? wals={}, meta={}", serverName, wals, splitMeta); 330 return wals == 0; 331 } catch (IOException e) { 332 LOG.warn("Get WALs of {} failed, retry...", serverName, e); 333 return false; 334 } 335 } 336 337 private Procedure[] createSplittingWalProcedures(MasterProcedureEnv env, boolean splitMeta) 338 throws IOException { 339 LOG.info("Splitting WALs {}, isMeta: {}", this, splitMeta); 340 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 341 List<Procedure> procedures = splitWALManager.splitWALs(serverName, splitMeta); 342 return procedures.toArray(new Procedure[procedures.size()]); 343 } 344 345 private boolean filterDefaultMetaRegions() { 346 if (regionsOnCrashedServer == null) { 347 return false; 348 } 349 regionsOnCrashedServer.removeIf(this::isDefaultMetaRegion); 350 return !regionsOnCrashedServer.isEmpty(); 351 } 352 353 private boolean isDefaultMetaRegion(RegionInfo hri) { 354 return hri.isMetaRegion() && RegionReplicaUtil.isDefaultReplica(hri); 355 } 356 357 /** 358 * Split hbase:meta logs using 'classic' zk-based coordination. Superceded by procedure-based WAL 359 * splitting. 360 * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean) 361 */ 362 private void zkCoordinatedSplitMetaLogs(MasterProcedureEnv env) throws IOException { 363 LOG.debug("Splitting meta WALs {}", this); 364 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 365 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 366 am.getRegionStates().metaLogSplitting(serverName); 367 mwm.splitMetaLog(serverName); 368 am.getRegionStates().metaLogSplit(serverName); 369 LOG.debug("Done splitting meta WALs {}", this); 370 } 371 372 /** 373 * Split logs using 'classic' zk-based coordination. Superceded by procedure-based WAL splitting. 374 * @see #createSplittingWalProcedures(MasterProcedureEnv, boolean) 375 */ 376 private void zkCoordinatedSplitLogs(final MasterProcedureEnv env) throws IOException { 377 LOG.debug("Splitting WALs {}", this); 378 MasterWalManager mwm = env.getMasterServices().getMasterWalManager(); 379 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 380 // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running. 381 // PROBLEM!!! WE BLOCK HERE. Can block for hours if hundreds of WALs to split and hundreds 382 // of SCPs running because big cluster crashed down. 383 am.getRegionStates().logSplitting(this.serverName); 384 mwm.splitLog(this.serverName); 385 if (!carryingMeta) { 386 mwm.archiveMetaLog(this.serverName); 387 } 388 am.getRegionStates().logSplit(this.serverName); 389 LOG.debug("Done splitting WALs {}", this); 390 } 391 392 void updateProgress(boolean updateState) { 393 String msg = "Processing ServerCrashProcedure of " + serverName; 394 if (status == null) { 395 status = TaskMonitor.get().createStatus(msg); 396 return; 397 } 398 if (currentRunningState == ServerCrashState.SERVER_CRASH_FINISH) { 399 status.markComplete(msg + " done"); 400 return; 401 } 402 if (updateState) { 403 currentRunningState = getCurrentState(); 404 } 405 int childrenLatch = getChildrenLatch(); 406 status.setStatus(msg + " current State " + currentRunningState 407 + (childrenLatch > 0 408 ? "; remaining num of running child procedures = " + childrenLatch 409 : "")); 410 } 411 412 @Override 413 protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) throws IOException { 414 // Can't rollback. 415 throw new UnsupportedOperationException("unhandled state=" + state); 416 } 417 418 @Override 419 protected ServerCrashState getState(int stateId) { 420 return ServerCrashState.forNumber(stateId); 421 } 422 423 @Override 424 protected int getStateId(ServerCrashState state) { 425 return state.getNumber(); 426 } 427 428 @Override 429 protected ServerCrashState getInitialState() { 430 return ServerCrashState.SERVER_CRASH_START; 431 } 432 433 @Override 434 protected boolean abort(MasterProcedureEnv env) { 435 // TODO 436 return false; 437 } 438 439 @Override 440 protected LockState acquireLock(final MasterProcedureEnv env) { 441 if (env.getProcedureScheduler().waitServerExclusiveLock(this, getServerName())) { 442 return LockState.LOCK_EVENT_WAIT; 443 } 444 return LockState.LOCK_ACQUIRED; 445 } 446 447 @Override 448 protected void releaseLock(final MasterProcedureEnv env) { 449 env.getProcedureScheduler().wakeServerExclusiveLock(this, getServerName()); 450 } 451 452 @Override 453 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 454 setState(ProcedureProtos.ProcedureState.RUNNABLE); 455 env.getProcedureScheduler().addFront(this); 456 return false; 457 } 458 459 @Override 460 public void toStringClassDetails(StringBuilder sb) { 461 sb.append(getProcName()); 462 sb.append(", splitWal="); 463 sb.append(shouldSplitWal); 464 sb.append(", meta="); 465 sb.append(carryingMeta); 466 } 467 468 @Override 469 public String getProcName() { 470 return getClass().getSimpleName() + " " + this.serverName; 471 } 472 473 @Override 474 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 475 super.serializeStateData(serializer); 476 477 MasterProcedureProtos.ServerCrashStateData.Builder state = 478 MasterProcedureProtos.ServerCrashStateData.newBuilder() 479 .setServerName(ProtobufUtil.toServerName(this.serverName)) 480 .setCarryingMeta(this.carryingMeta).setShouldSplitWal(this.shouldSplitWal); 481 if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) { 482 for (RegionInfo hri : this.regionsOnCrashedServer) { 483 state.addRegionsOnCrashedServer(ProtobufUtil.toRegionInfo(hri)); 484 } 485 } 486 serializer.serialize(state.build()); 487 } 488 489 @Override 490 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 491 super.deserializeStateData(serializer); 492 493 MasterProcedureProtos.ServerCrashStateData state = 494 serializer.deserialize(MasterProcedureProtos.ServerCrashStateData.class); 495 this.serverName = ProtobufUtil.toServerName(state.getServerName()); 496 this.carryingMeta = state.hasCarryingMeta() ? state.getCarryingMeta() : false; 497 // shouldSplitWAL has a default over in pb so this invocation will always work. 498 this.shouldSplitWal = state.getShouldSplitWal(); 499 int size = state.getRegionsOnCrashedServerCount(); 500 if (size > 0) { 501 this.regionsOnCrashedServer = new ArrayList<>(size); 502 for (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.RegionInfo ri : state 503 .getRegionsOnCrashedServerList()) { 504 this.regionsOnCrashedServer.add(ProtobufUtil.toRegionInfo(ri)); 505 } 506 } 507 updateProgress(false); 508 } 509 510 @Override 511 public ServerName getServerName() { 512 return this.serverName; 513 } 514 515 @Override 516 public boolean hasMetaTableRegion() { 517 return this.carryingMeta; 518 } 519 520 @Override 521 public ServerOperationType getServerOperationType() { 522 return ServerOperationType.CRASH_HANDLER; 523 } 524 525 @Override 526 protected boolean shouldWaitClientAck(MasterProcedureEnv env) { 527 // The operation is triggered internally on the server 528 // the client does not know about this procedure. 529 return false; 530 } 531 532 /** 533 * Moved out here so can be overridden by the HBCK fix-up SCP to be less strict about what it will 534 * tolerate as a 'match'. 535 * @return True if the region location in <code>rsn</code> matches that of this crashed server. 536 */ 537 protected boolean isMatchingRegionLocation(RegionStateNode rsn) { 538 return this.serverName.equals(rsn.getRegionLocation()); 539 } 540 541 private CompletableFuture<Void> getUpdateMetaFuture() { 542 return updateMetaFuture; 543 } 544 545 private void setUpdateMetaFuture(CompletableFuture<Void> f) { 546 updateMetaFuture = f; 547 } 548 549 /** 550 * Assign the regions on the crashed RS to other Rses. 551 * <p/> 552 * In this method we will go through all the RegionStateNodes of the give regions to find out 553 * whether there is already an TRSP for the region, if so we interrupt it and let it retry on 554 * other server, otherwise we will schedule a TRSP to bring the region online. 555 * <p/> 556 * We will also check whether the table for a region is enabled, if not, we will skip assigning 557 * it. 558 */ 559 private void assignRegions(MasterProcedureEnv env, List<RegionInfo> regions) 560 throws IOException, ProcedureSuspendedException { 561 AssignmentManager am = env.getMasterServices().getAssignmentManager(); 562 boolean retainAssignment = env.getMasterConfiguration().getBoolean(MASTER_SCP_RETAIN_ASSIGNMENT, 563 DEFAULT_MASTER_SCP_RETAIN_ASSIGNMENT); 564 // Since we may suspend in the middle of this loop, so here we use processedRegions to record 565 // the progress, so next time we can locate the correct region 566 // We do not need to persist the processedRegions when serializing the procedure, as when master 567 // restarts, the sub procedure list will be cleared when rescheduling this SCP again, so we need 568 // to start from beginning. 569 for (int n = regions.size(); processedRegions < n; processedRegions++) { 570 RegionInfo region = regions.get(processedRegions); 571 RegionStateNode regionNode = am.getRegionStates().getOrCreateRegionStateNode(region); 572 // There are two possible ways where we have already hold the lock here 573 // 1. We have already hold the lock and we suspend while updating meta, so after being woken 574 // up, we should skip lock again. 575 // 2. We suspend the procedure while trying to hold the lock, and finally it is our turn to 576 // hold the lock and we schedule the procedure again, this time we should have already hold 577 // the lock, so we do not need to lock again 578 if (!regionNode.isLockedBy(this)) { 579 regionNode.lock(this, () -> ProcedureFutureUtil.wakeUp(this, env)); 580 } 581 try { 582 if ( 583 ProcedureFutureUtil.checkFuture(this, this::getUpdateMetaFuture, 584 this::setUpdateMetaFuture, () -> { 585 }) 586 ) { 587 continue; 588 } 589 // This is possible, as when a server is dead, TRSP will fail to schedule a RemoteProcedure 590 // and then try to assign the region to a new RS. And before it has updated the region 591 // location to the new RS, we may have already called the am.getRegionsOnServer so we will 592 // consider the region is still on this crashed server. Then before we arrive here, the 593 // TRSP could have updated the region location, or even finished itself, so the region is 594 // no longer on this crashed server any more. We should not try to assign it again. Please 595 // see HBASE-23594 for more details. 596 // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get 597 // in the way of our clearing out 'Unknown Servers'. 598 if (!isMatchingRegionLocation(regionNode)) { 599 // See HBASE-24117, though we have already changed the shutdown order, it is still worth 600 // double checking here to confirm that we do not skip assignment incorrectly. 601 if (!am.isRunning()) { 602 throw new DoNotRetryIOException( 603 "AssignmentManager has been stopped, can not process assignment any more"); 604 } 605 LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", 606 this, regionNode, serverName); 607 continue; 608 } 609 if (regionNode.getProcedure() != null) { 610 LOG.info("{} found RIT {}; {}", this, regionNode.getProcedure(), regionNode); 611 ProcedureFutureUtil.suspendIfNecessary(this, this::setUpdateMetaFuture, regionNode 612 .getProcedure().serverCrashed(env, regionNode, getServerName(), !retainAssignment), env, 613 () -> { 614 }); 615 continue; 616 } 617 if ( 618 env.getMasterServices().getTableStateManager().isTableState(regionNode.getTable(), 619 TableState.State.DISABLED) 620 ) { 621 // This should not happen, table disabled but has regions on server. 622 LOG.warn("Found table disabled for region {}, procDetails: {}", regionNode, this); 623 continue; 624 } 625 TransitRegionStateProcedure proc = 626 TransitRegionStateProcedure.assign(env, region, !retainAssignment, null); 627 regionNode.setProcedure(proc); 628 // It is OK to still use addChildProcedure even if we suspend in the middle of this loop, as 629 // the subProcList will only be cleared when we successfully returned from the 630 // executeFromState method. This means we will submit all the TRSPs after we successfully 631 // finished this loop 632 addChildProcedure(proc); 633 } finally { 634 if (updateMetaFuture == null) { 635 regionNode.unlock(this); 636 } 637 } 638 } 639 // we will call this method two times if the region server carries meta, so we need to reset it 640 // to 0 after successfully finished the above loop 641 processedRegions = 0; 642 } 643 644 @Override 645 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 646 return env.getMasterServices().getMasterMetrics().getServerCrashProcMetrics(); 647 } 648 649 @Override 650 protected boolean holdLock(MasterProcedureEnv env) { 651 return true; 652 } 653 654 public static void updateProgress(MasterProcedureEnv env, long parentId) { 655 if (parentId == NO_PROC_ID) { 656 return; 657 } 658 Procedure parentProcedure = 659 env.getMasterServices().getMasterProcedureExecutor().getProcedure(parentId); 660 if (parentProcedure != null && parentProcedure instanceof ServerCrashProcedure) { 661 ((ServerCrashProcedure) parentProcedure).updateProgress(false); 662 } 663 } 664}