001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import static org.apache.hadoop.hbase.io.hfile.CacheConfig.DEFAULT_EVICT_ON_CLOSE;
021import static org.apache.hadoop.hbase.io.hfile.CacheConfig.EVICT_BLOCKS_ON_CLOSE_KEY;
022import static org.apache.hadoop.hbase.master.LoadBalancer.BOGUS_SERVER_NAME;
023import static org.apache.hadoop.hbase.master.assignment.AssignmentManager.FORCE_REGION_RETAINMENT;
024
025import edu.umd.cs.findbugs.annotations.Nullable;
026import java.io.IOException;
027import java.util.concurrent.TimeUnit;
028import org.apache.hadoop.hbase.HBaseIOException;
029import org.apache.hadoop.hbase.ServerName;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.client.RegionReplicaUtil;
033import org.apache.hadoop.hbase.client.RetriesExhaustedException;
034import org.apache.hadoop.hbase.master.MetricsAssignmentManager;
035import org.apache.hadoop.hbase.master.RegionState.State;
036import org.apache.hadoop.hbase.master.ServerManager;
037import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
038import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
039import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
040import org.apache.hadoop.hbase.procedure2.Procedure;
041import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
042import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
043import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
044import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
045import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
046import org.apache.hadoop.hbase.util.RetryCounter;
047import org.apache.yetus.audience.InterfaceAudience;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
052import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionState;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionStateTransitionStateData;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionType;
055import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
056import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
057
058/**
059 * The procedure to deal with the state transition of a region. A region with a TRSP in place is
060 * called RIT, i.e, RegionInTransition.
061 * <p/>
062 * It can be used to assign/unassign/reopen/move a region, and for
063 * {@link #unassign(MasterProcedureEnv, RegionInfo)} and
064 * {@link #reopen(MasterProcedureEnv, RegionInfo)}, you do not need to specify a target server, and
065 * for {@link #assign(MasterProcedureEnv, RegionInfo, ServerName)} and
066 * {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you want to you can provide a
067 * target server. And for {@link #move(MasterProcedureEnv, RegionInfo, ServerName)}, if you do not
068 * specify a targetServer, we will select one randomly.
069 * <p/>
070 * <p/>
071 * The typical state transition for assigning a region is:
072 *
073 * <pre>
074 * GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED
075 * </pre>
076 *
077 * Notice that, if there are failures we may go back to the {@code GET_ASSIGN_CANDIDATE} state to
078 * try again.
079 * <p/>
080 * The typical state transition for unassigning a region is:
081 *
082 * <pre>
083 * CLOSE -----> CONFIRM_CLOSED
084 * </pre>
085 *
086 * Here things go a bit different, if there are failures, especially that if there is a server
087 * crash, we will go to the {@code GET_ASSIGN_CANDIDATE} state to bring the region online first, and
088 * then go through the normal way to unassign it.
089 * <p/>
090 * The typical state transition for reopening/moving a region is:
091 *
092 * <pre>
093 * CLOSE -----> CONFIRM_CLOSED -----> GET_ASSIGN_CANDIDATE ------> OPEN -----> CONFIRM_OPENED
094 * </pre>
095 *
096 * The retry logic is the same with the above assign/unassign.
097 * <p/>
098 * Notice that, although we allow specify a target server, it just acts as a candidate, we do not
099 * guarantee that the region will finally be on the target server. If this is important for you, you
100 * should check whether the region is on the target server after the procedure is finished.
101 * </p>
102 * Altenatively, for trying retaining assignments, the
103 * <b>hbase.master.scp.retain.assignment.force</b> option can be used together with
104 * <b>hbase.master.scp.retain.assignment</b>.
105 * <p/>
106 * When you want to schedule a TRSP, please check whether there is still one for this region, and
107 * the check should be under the RegionStateNode lock. We will remove the TRSP from a
108 * RegionStateNode when we are done, see the code in {@code reportTransition} method below. There
109 * could be at most one TRSP for a give region.
110 */
111@InterfaceAudience.Private
112public class TransitRegionStateProcedure
113  extends AbstractStateMachineRegionProcedure<RegionStateTransitionState> {
114
115  private static final Logger LOG = LoggerFactory.getLogger(TransitRegionStateProcedure.class);
116
117  private TransitionType type;
118
119  private RegionStateTransitionState initialState;
120
121  private RegionStateTransitionState lastState;
122
123  // the candidate where we want to assign the region to.
124  private ServerName assignCandidate;
125
126  private boolean forceNewPlan;
127
128  private RetryCounter retryCounter;
129
130  private RegionRemoteProcedureBase remoteProc;
131
132  private boolean evictCache;
133
134  private boolean isSplit;
135
136  private RetryCounter forceRetainmentRetryCounter;
137
138  private long forceRetainmentTotalWait;
139
140  public TransitRegionStateProcedure() {
141  }
142
143  private void setInitialAndLastState() {
144    switch (type) {
145      case ASSIGN:
146        initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE;
147        lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED;
148        break;
149      case UNASSIGN:
150        initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE;
151        lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED;
152        break;
153      case MOVE:
154      case REOPEN:
155        initialState = RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE;
156        lastState = RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED;
157        break;
158      default:
159        throw new IllegalArgumentException("Unknown TransitionType: " + type);
160    }
161  }
162
163  protected TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri,
164    ServerName assignCandidate, boolean forceNewPlan, TransitionType type) {
165    super(env, hri);
166    this.assignCandidate = assignCandidate;
167    this.forceNewPlan = forceNewPlan;
168    this.type = type;
169    setInitialAndLastState();
170
171    // when do reopen TRSP, let the rs know the targetServer so it can keep some info on close
172    if (type == TransitionType.REOPEN) {
173      this.assignCandidate = getRegionStateNode(env).getRegionLocation();
174    }
175    evictCache =
176      env.getMasterConfiguration().getBoolean(EVICT_BLOCKS_ON_CLOSE_KEY, DEFAULT_EVICT_ON_CLOSE);
177    initForceRetainmentRetryCounter(env);
178  }
179
180  private void initForceRetainmentRetryCounter(MasterProcedureEnv env) {
181    if (env.getAssignmentManager().isForceRegionRetainment()) {
182      forceRetainmentRetryCounter =
183        new RetryCounter(env.getAssignmentManager().getForceRegionRetainmentRetries(),
184          env.getAssignmentManager().getForceRegionRetainmentWaitInterval(), TimeUnit.MILLISECONDS);
185      forceRetainmentTotalWait = 0;
186    }
187  }
188
189  protected TransitRegionStateProcedure(MasterProcedureEnv env, RegionInfo hri,
190    ServerName assignCandidate, boolean forceNewPlan, TransitionType type, boolean isSplit) {
191    this(env, hri, assignCandidate, forceNewPlan, type);
192    this.isSplit = isSplit;
193  }
194
195  @Override
196  public TableOperationType getTableOperationType() {
197    // TODO: maybe we should make another type here, REGION_TRANSITION?
198    return TableOperationType.REGION_EDIT;
199  }
200
201  @Override
202  protected boolean waitInitialized(MasterProcedureEnv env) {
203    if (TableName.isMetaTableName(getTableName())) {
204      return false;
205    }
206    // First we need meta to be loaded, and second, if meta is not online then we will likely to
207    // fail when updating meta so we wait until it is assigned.
208    AssignmentManager am = env.getAssignmentManager();
209    return am.waitMetaLoaded(this) || am.waitMetaAssigned(this, getRegion());
210  }
211
212  private void checkAndWaitForOriginalServer(MasterProcedureEnv env, ServerName lastHost)
213    throws ProcedureSuspendedException {
214    ServerManager serverManager = env.getMasterServices().getServerManager();
215    ServerName newNameForServer = serverManager.findServerWithSameHostnamePortWithLock(lastHost);
216    boolean isOnline = serverManager.createDestinationServersList().contains(newNameForServer);
217
218    if (!isOnline && forceRetainmentRetryCounter.shouldRetry()) {
219      int backoff =
220        Math.toIntExact(forceRetainmentRetryCounter.getBackoffTimeAndIncrementAttempts());
221      forceRetainmentTotalWait += backoff;
222      LOG.info(
223        "Suspending the TRSP PID={} for {}ms because {} is true and previous host {} "
224          + "for region is not yet online.",
225        this.getProcId(), backoff, FORCE_REGION_RETAINMENT, lastHost);
226      setTimeout(backoff);
227      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
228      throw new ProcedureSuspendedException();
229    }
230    LOG.info(
231      "{} is true. TRSP PID={} waited {}ms for host {} to come back online. "
232        + "Did host come back online? {}",
233      FORCE_REGION_RETAINMENT, this.getProcId(), forceRetainmentTotalWait, lastHost, isOnline);
234    initForceRetainmentRetryCounter(env);
235  }
236
237  private void queueAssign(MasterProcedureEnv env, RegionStateNode regionNode)
238    throws ProcedureSuspendedException {
239    boolean retain = false;
240    if (forceNewPlan) {
241      // set the region location to null if forceNewPlan is true
242      regionNode.setRegionLocation(null);
243    } else {
244      if (assignCandidate != null) {
245        retain = assignCandidate.equals(regionNode.getLastHost());
246        regionNode.setRegionLocation(assignCandidate);
247      } else if (regionNode.getLastHost() != null) {
248        retain = true;
249        LOG.info("Setting lastHost {} as the location for region {}", regionNode.getLastHost(),
250          regionNode.getRegionInfo().getEncodedName());
251        regionNode.setRegionLocation(regionNode.getLastHost());
252      }
253      if (
254        regionNode.getRegionLocation() != null
255          && env.getAssignmentManager().isForceRegionRetainment()
256      ) {
257        LOG.warn("{} is set to true. This may delay regions re-assignment "
258          + "upon RegionServers crashes or restarts.", FORCE_REGION_RETAINMENT);
259        checkAndWaitForOriginalServer(env, regionNode.getRegionLocation());
260      }
261    }
262    LOG.info("Starting {}; {}; forceNewPlan={}, retain={}", this, regionNode.toShortString(),
263      forceNewPlan, retain);
264    env.getAssignmentManager().queueAssign(regionNode);
265    setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_OPEN);
266    if (regionNode.getProcedureEvent().suspendIfNotReady(this)) {
267      throw new ProcedureSuspendedException();
268    }
269  }
270
271  private void openRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
272    ServerName loc = regionNode.getRegionLocation();
273    if (loc == null || BOGUS_SERVER_NAME.equals(loc)) {
274      LOG.warn("No location specified for {}, jump back to state {} to get one", getRegion(),
275        RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
276      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
277      throw new HBaseIOException("Failed to open region, the location is null or bogus.");
278    }
279    env.getAssignmentManager().regionOpening(regionNode);
280    addChildProcedure(new OpenRegionProcedure(this, getRegion(), loc));
281    setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED);
282  }
283
284  private Flow confirmOpened(MasterProcedureEnv env, RegionStateNode regionNode)
285    throws IOException {
286    if (regionNode.isInState(State.OPEN)) {
287      retryCounter = null;
288      if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_OPENED) {
289        // we are the last state, finish
290        regionNode.unsetProcedure(this);
291        ServerCrashProcedure.updateProgress(env, getParentProcId());
292        return Flow.NO_MORE_STATE;
293      }
294      // It is possible that we arrive here but confirm opened is not the last state, for example,
295      // when merging or splitting a region, we unassign the region from a RS and the RS is crashed,
296      // then there will be recovered edits for this region, we'd better make the region online
297      // again and then unassign it, otherwise we have to fail the merge/split procedure as we may
298      // loss data.
299      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE);
300      return Flow.HAS_MORE_STATE;
301    }
302
303    int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode)
304      .incrementAndGetRetries();
305    int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts();
306    LOG.info("Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString());
307
308    if (retries >= maxAttempts) {
309      env.getAssignmentManager().regionFailedOpen(regionNode, true);
310      setFailure(getClass().getSimpleName(), new RetriesExhaustedException(
311        "Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded"));
312      regionNode.unsetProcedure(this);
313      return Flow.NO_MORE_STATE;
314    }
315
316    env.getAssignmentManager().regionFailedOpen(regionNode, false);
317    // we failed to assign the region, force a new plan
318    forceNewPlan = true;
319    regionNode.setRegionLocation(null);
320    setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
321
322    if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) {
323      // Throw exception to backoff and retry when failed open too many times
324      throw new HBaseIOException(
325        "Failed confirm OPEN of " + regionNode + " (remote log may yield more detail on why).");
326    } else {
327      // Here we do not throw exception because we want to the region to be online ASAP
328      return Flow.HAS_MORE_STATE;
329    }
330  }
331
332  private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
333    if (regionNode.isInState(State.OPEN, State.CLOSING, State.MERGING, State.SPLITTING)) {
334      // this is the normal case
335      env.getAssignmentManager().regionClosing(regionNode);
336      CloseRegionProcedure closeProc = isSplit
337        ? new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(),
338          assignCandidate, true)
339        : new CloseRegionProcedure(this, getRegion(), regionNode.getRegionLocation(),
340          assignCandidate, evictCache);
341      addChildProcedure(closeProc);
342      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED);
343    } else {
344      forceNewPlan = true;
345      regionNode.setRegionLocation(null);
346      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
347    }
348  }
349
350  private Flow confirmClosed(MasterProcedureEnv env, RegionStateNode regionNode)
351    throws IOException {
352    if (regionNode.isInState(State.CLOSED)) {
353      retryCounter = null;
354      if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
355        // we are the last state, finish
356        regionNode.unsetProcedure(this);
357        return Flow.NO_MORE_STATE;
358      }
359      // This means we need to open the region again, should be a move or reopen
360      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
361      return Flow.HAS_MORE_STATE;
362    }
363    if (regionNode.isInState(State.CLOSING)) {
364      // This is possible, think the target RS crashes and restarts immediately, the close region
365      // operation will return a NotServingRegionException soon, we can only recover after SCP takes
366      // care of this RS. So here we throw an IOException to let upper layer to retry with backoff.
367      setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_CLOSE);
368      throw new HBaseIOException("Failed to close region");
369    }
370    // abnormally closed, need to reopen it, no matter what is the last state, see the comment in
371    // confirmOpened for more details that why we need to reopen the region first even if we just
372    // want to close it.
373    // The only exception is for non-default replica, where we do not need to deal with recovered
374    // edits. Notice that the region will remain in ABNORMALLY_CLOSED state, the upper layer need to
375    // deal with this state. For non-default replica, this is usually the same with CLOSED.
376    assert regionNode.isInState(State.ABNORMALLY_CLOSED);
377    if (
378      !RegionReplicaUtil.isDefaultReplica(getRegion())
379        && lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED
380    ) {
381      regionNode.unsetProcedure(this);
382      return Flow.NO_MORE_STATE;
383    }
384    retryCounter = null;
385    setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
386    return Flow.HAS_MORE_STATE;
387  }
388
389  // Override to lock RegionStateNode
390  @SuppressWarnings("rawtypes")
391  @Override
392  protected Procedure[] execute(MasterProcedureEnv env)
393    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
394    RegionStateNode regionNode =
395      env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion());
396    regionNode.lock();
397    try {
398      return super.execute(env);
399    } finally {
400      regionNode.unlock();
401    }
402  }
403
404  private RegionStateNode getRegionStateNode(MasterProcedureEnv env) {
405    return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(getRegion());
406  }
407
408  @Override
409  protected Flow executeFromState(MasterProcedureEnv env, RegionStateTransitionState state)
410    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
411    RegionStateNode regionNode = getRegionStateNode(env);
412    try {
413      switch (state) {
414        case REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE:
415          // Need to do some sanity check for replica region, if the region does not exist at
416          // master, do not try to assign the replica region, log error and return.
417          if (!RegionReplicaUtil.isDefaultReplica(regionNode.getRegionInfo())) {
418            RegionInfo defaultRI =
419              RegionReplicaUtil.getRegionInfoForDefaultReplica(regionNode.getRegionInfo());
420            if (
421              env.getMasterServices().getAssignmentManager().getRegionStates()
422                .getRegionStateNode(defaultRI) == null
423            ) {
424              LOG.error(
425                "Cannot assign replica region {} because its primary region {} does not exist.",
426                regionNode.getRegionInfo(), defaultRI);
427              regionNode.unsetProcedure(this);
428              return Flow.NO_MORE_STATE;
429            }
430          }
431          queueAssign(env, regionNode);
432          return Flow.HAS_MORE_STATE;
433        case REGION_STATE_TRANSITION_OPEN:
434          openRegion(env, regionNode);
435          return Flow.HAS_MORE_STATE;
436        case REGION_STATE_TRANSITION_CONFIRM_OPENED:
437          return confirmOpened(env, regionNode);
438        case REGION_STATE_TRANSITION_CLOSE:
439          closeRegion(env, regionNode);
440          return Flow.HAS_MORE_STATE;
441        case REGION_STATE_TRANSITION_CONFIRM_CLOSED:
442          return confirmClosed(env, regionNode);
443        default:
444          throw new UnsupportedOperationException("unhandled state=" + state);
445      }
446    } catch (IOException e) {
447      if (retryCounter == null) {
448        retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
449      }
450      long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
451      LOG.warn(
452        "Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed "
453          + "by other Procedure or operator intervention",
454        backoff / 1000, this, regionNode.toShortString(), e);
455      setTimeout(Math.toIntExact(backoff));
456      setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
457      skipPersistence();
458      throw new ProcedureSuspendedException();
459    }
460  }
461
462  /**
463   * At end of timeout, wake ourselves up so we run again.
464   */
465  @Override
466  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
467    setState(ProcedureProtos.ProcedureState.RUNNABLE);
468    env.getProcedureScheduler().addFront(this);
469    return false; // 'false' means that this procedure handled the timeout
470  }
471
472  // Should be called with RegionStateNode locked
473  public void reportTransition(MasterProcedureEnv env, RegionStateNode regionNode,
474    ServerName serverName, TransitionCode code, long seqId, long procId) throws IOException {
475    if (remoteProc == null) {
476      LOG.warn(
477        "There is no outstanding remote region procedure for {}, serverName={}, code={},"
478          + " seqId={}, proc={}, should be a retry, ignore",
479        regionNode, serverName, code, seqId, this);
480      return;
481    }
482    // The procId could be -1 if it is from an old region server, we need to deal with it so that we
483    // can do rolling upgraing.
484    if (procId >= 0 && remoteProc.getProcId() != procId) {
485      LOG.warn(
486        "The pid of remote region procedure for {} is {}, the reported pid={}, serverName={},"
487          + " code={}, seqId={}, proc={}, should be a retry, ignore",
488        regionNode, remoteProc.getProcId(), procId, serverName, code, seqId, this);
489      return;
490    }
491    remoteProc.reportTransition(env, regionNode, serverName, code, seqId);
492  }
493
494  // Should be called with RegionStateNode locked
495  public void serverCrashed(MasterProcedureEnv env, RegionStateNode regionNode,
496    ServerName serverName, boolean forceNewPlan) throws IOException {
497    this.forceNewPlan = forceNewPlan;
498    if (remoteProc != null) {
499      // this means we are waiting for the sub procedure, so wake it up
500      remoteProc.serverCrashed(env, regionNode, serverName);
501    } else {
502      // we are in RUNNING state, just update the region state, and we will process it later.
503      env.getAssignmentManager().regionClosedAbnormally(regionNode);
504    }
505  }
506
507  void attachRemoteProc(RegionRemoteProcedureBase proc) {
508    this.remoteProc = proc;
509  }
510
511  void unattachRemoteProc(RegionRemoteProcedureBase proc) {
512    assert this.remoteProc == proc;
513    this.remoteProc = null;
514  }
515
516  // will be called after we finish loading the meta entry for this region.
517  // used to change the state of the region node if we have a sub procedure, as we may not persist
518  // the state to meta yet. See the code in RegionRemoteProcedureBase.execute for more details.
519  void stateLoaded(AssignmentManager am, RegionStateNode regionNode) {
520    if (remoteProc != null) {
521      remoteProc.stateLoaded(am, regionNode);
522    }
523  }
524
525  @Override
526  protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state)
527    throws IOException, InterruptedException {
528    // no rollback
529    throw new UnsupportedOperationException();
530  }
531
532  @Override
533  protected RegionStateTransitionState getState(int stateId) {
534    return RegionStateTransitionState.forNumber(stateId);
535  }
536
537  @Override
538  protected int getStateId(RegionStateTransitionState state) {
539    return state.getNumber();
540  }
541
542  @Override
543  protected RegionStateTransitionState getInitialState() {
544    return initialState;
545  }
546
547  private static TransitionType convert(RegionTransitionType type) {
548    switch (type) {
549      case ASSIGN:
550        return TransitionType.ASSIGN;
551      case UNASSIGN:
552        return TransitionType.UNASSIGN;
553      case MOVE:
554        return TransitionType.MOVE;
555      case REOPEN:
556        return TransitionType.REOPEN;
557      default:
558        throw new IllegalArgumentException("Unknown RegionTransitionType: " + type);
559    }
560  }
561
562  private static RegionTransitionType convert(TransitionType type) {
563    switch (type) {
564      case ASSIGN:
565        return RegionTransitionType.ASSIGN;
566      case UNASSIGN:
567        return RegionTransitionType.UNASSIGN;
568      case MOVE:
569        return RegionTransitionType.MOVE;
570      case REOPEN:
571        return RegionTransitionType.REOPEN;
572      default:
573        throw new IllegalArgumentException("Unknown TransitionType: " + type);
574    }
575  }
576
577  @Override
578  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
579    super.serializeStateData(serializer);
580    RegionStateTransitionStateData.Builder builder =
581      RegionStateTransitionStateData.newBuilder().setType(convert(type))
582        .setForceNewPlan(forceNewPlan).setEvictCache(evictCache).setIsSplit(isSplit);
583    if (assignCandidate != null) {
584      builder.setAssignCandidate(ProtobufUtil.toServerName(assignCandidate));
585    }
586    serializer.serialize(builder.build());
587  }
588
589  @Override
590  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
591    super.deserializeStateData(serializer);
592    RegionStateTransitionStateData data =
593      serializer.deserialize(RegionStateTransitionStateData.class);
594    type = convert(data.getType());
595    setInitialAndLastState();
596    forceNewPlan = data.getForceNewPlan();
597    if (data.hasAssignCandidate()) {
598      assignCandidate = ProtobufUtil.toServerName(data.getAssignCandidate());
599    }
600    evictCache = data.getEvictCache();
601    isSplit = data.getIsSplit();
602  }
603
604  @Override
605  protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
606    MetricsAssignmentManager metrics = env.getAssignmentManager().getAssignmentManagerMetrics();
607    switch (type) {
608      case ASSIGN:
609        return metrics.getAssignProcMetrics();
610      case UNASSIGN:
611        return metrics.getUnassignProcMetrics();
612      case MOVE:
613        return metrics.getMoveProcMetrics();
614      case REOPEN:
615        return metrics.getReopenProcMetrics();
616      default:
617        throw new IllegalArgumentException("Unknown transition type: " + type);
618    }
619  }
620
621  @Override
622  public void toStringClassDetails(StringBuilder sb) {
623    super.toStringClassDetails(sb);
624    if (initialState == RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE) {
625      sb.append(", ASSIGN");
626    } else if (lastState == RegionStateTransitionState.REGION_STATE_TRANSITION_CONFIRM_CLOSED) {
627      sb.append(", UNASSIGN");
628    } else {
629      sb.append(", REOPEN/MOVE");
630    }
631  }
632
633  private static TransitRegionStateProcedure setOwner(MasterProcedureEnv env,
634    TransitRegionStateProcedure proc) {
635    proc.setOwner(env.getRequestUser().getShortName());
636    return proc;
637  }
638
639  public enum TransitionType {
640    ASSIGN,
641    UNASSIGN,
642    MOVE,
643    REOPEN
644  }
645
646  // Be careful that, when you call these 4 methods below, you need to manually attach the returned
647  // procedure with the RegionStateNode, otherwise the procedure will quit immediately without doing
648  // anything. See the comment in executeFromState to find out why we need this assumption.
649  public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region,
650    @Nullable ServerName targetServer) {
651    return assign(env, region, false, targetServer);
652  }
653
654  public static TransitRegionStateProcedure assign(MasterProcedureEnv env, RegionInfo region,
655    boolean forceNewPlan, @Nullable ServerName targetServer) {
656    return setOwner(env, new TransitRegionStateProcedure(env, region, targetServer, forceNewPlan,
657      TransitionType.ASSIGN));
658  }
659
660  public static TransitRegionStateProcedure unassign(MasterProcedureEnv env, RegionInfo region) {
661    return setOwner(env,
662      new TransitRegionStateProcedure(env, region, null, false, TransitionType.UNASSIGN));
663  }
664
665  public static TransitRegionStateProcedure unassignSplitMerge(MasterProcedureEnv env,
666    RegionInfo region) {
667    return setOwner(env,
668      new TransitRegionStateProcedure(env, region, null, false, TransitionType.UNASSIGN, true));
669  }
670
671  public static TransitRegionStateProcedure reopen(MasterProcedureEnv env, RegionInfo region) {
672    return setOwner(env,
673      new TransitRegionStateProcedure(env, region, null, false, TransitionType.REOPEN));
674  }
675
676  public static TransitRegionStateProcedure move(MasterProcedureEnv env, RegionInfo region,
677    @Nullable ServerName targetServer) {
678    return setOwner(env, new TransitRegionStateProcedure(env, region, targetServer,
679      targetServer == null, TransitionType.MOVE));
680  }
681}