001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import edu.umd.cs.findbugs.annotations.Nullable;
021import java.io.IOException;
022import java.util.concurrent.TimeUnit;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.TableName;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.client.RegionReplicaUtil;
028import org.apache.hadoop.hbase.client.TableDescriptor;
029import org.apache.hadoop.hbase.executor.EventHandler;
030import org.apache.hadoop.hbase.executor.EventType;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.HRegionServer;
033import org.apache.hadoop.hbase.regionserver.Region;
034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
036import org.apache.hadoop.hbase.util.RetryCounter;
037import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041import org.slf4j.MDC;
042
043import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
044
045/**
046 * Handles opening of a region on a region server.
047 * <p/>
048 * Just done the same thing with the old {@link OpenRegionHandler}, with some modifications on
049 * fencing and retrying. But we need to keep the {@link OpenRegionHandler} as is to keep compatible
050 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade.
051 */
052@InterfaceAudience.Private
053public class AssignRegionHandler extends EventHandler {
054
055  private static final Logger LOG = LoggerFactory.getLogger(AssignRegionHandler.class);
056
057  private final RegionInfo regionInfo;
058
059  private final long openProcId;
060
061  private final TableDescriptor tableDesc;
062
063  private final long masterSystemTime;
064
065  private final RetryCounter retryCounter;
066
067  public AssignRegionHandler(HRegionServer server, RegionInfo regionInfo, long openProcId,
068    @Nullable TableDescriptor tableDesc, long masterSystemTime, EventType eventType) {
069    super(server, eventType);
070    this.regionInfo = regionInfo;
071    this.openProcId = openProcId;
072    this.tableDesc = tableDesc;
073    this.masterSystemTime = masterSystemTime;
074    this.retryCounter = HandlerUtil.getRetryCounter();
075  }
076
077  private HRegionServer getServer() {
078    return (HRegionServer) server;
079  }
080
081  private void cleanUpAndReportFailure(IOException error) throws IOException {
082    LOG.warn("Failed to open region {}, will report to master", regionInfo.getRegionNameAsString(),
083      error);
084    HRegionServer rs = getServer();
085    rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
086    if (
087      !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN,
088        HConstants.NO_SEQNUM, openProcId, masterSystemTime, regionInfo))
089    ) {
090      throw new IOException(
091        "Failed to report failed open to master: " + regionInfo.getRegionNameAsString());
092    }
093  }
094
095  @Override
096  public void process() throws IOException {
097    MDC.put("pid", Long.toString(openProcId));
098    HRegionServer rs = getServer();
099    String encodedName = regionInfo.getEncodedName();
100    byte[] encodedNameBytes = regionInfo.getEncodedNameAsBytes();
101    String regionName = regionInfo.getRegionNameAsString();
102    Region onlineRegion = rs.getRegion(encodedName);
103    if (onlineRegion != null) {
104      LOG.warn("Received OPEN for {} which is already online", regionName);
105      // Just follow the old behavior, do we need to call reportRegionStateTransition? Maybe not?
106      // For normal case, it could happen that the rpc call to schedule this handler is succeeded,
107      // but before returning to master the connection is broken. And when master tries again, we
108      // have already finished the opening. For this case we do not need to call
109      // reportRegionStateTransition any more.
110      return;
111    }
112    Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.TRUE);
113    if (previous != null) {
114      if (previous) {
115        // The region is opening and this maybe a retry on the rpc call, it is safe to ignore it.
116        LOG.info("Receiving OPEN for {} which we are already trying to OPEN"
117          + " - ignoring this new request for this region.", regionName);
118      } else {
119        // The region is closing. This is possible as we will update the region state to CLOSED when
120        // calling reportRegionStateTransition, so the HMaster will think the region is offline,
121        // before we actually close the region, as reportRegionStateTransition is part of the
122        // closing process.
123        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
124        LOG.info("Receiving OPEN for {} which we are trying to close, try again after {}ms",
125          regionName, backoff);
126        rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS);
127      }
128      return;
129    }
130    LOG.info("Open {}", regionName);
131    HRegion region;
132    try {
133      TableDescriptor htd =
134        tableDesc != null ? tableDesc : rs.getTableDescriptors().get(regionInfo.getTable());
135      if (htd == null) {
136        throw new IOException("Missing table descriptor for " + regionName);
137      }
138      // pass null for the last parameter, which used to be a CancelableProgressable, as now the
139      // opening can not be interrupted by a close request any more.
140      Configuration conf = rs.getConfiguration();
141      TableName tn = htd.getTableName();
142      if (ServerRegionReplicaUtil.isMetaRegionReplicaReplicationEnabled(conf, tn)) {
143        if (RegionReplicaUtil.isDefaultReplica(this.regionInfo.getReplicaId())) {
144          // Add the hbase:meta replication source on replica zero/default.
145          rs.getReplicationSourceService().getReplicationManager()
146            .addCatalogReplicationSource(this.regionInfo);
147        }
148      }
149      region = HRegion.openHRegion(regionInfo, htd, rs.getWAL(regionInfo), conf, rs, null);
150    } catch (IOException e) {
151      cleanUpAndReportFailure(e);
152      return;
153    }
154    // From here on out, this is PONR. We can not revert back. The only way to address an
155    // exception from here on out is to abort the region server.
156    rs.postOpenDeployTasks(new PostOpenDeployContext(region, openProcId, masterSystemTime));
157    rs.addRegion(region);
158    LOG.info("Opened {}", regionName);
159    // Cache the open region procedure id after report region transition succeed.
160    rs.finishRegionProcedure(openProcId);
161    Boolean current = rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes());
162    if (current == null) {
163      // Should NEVER happen, but let's be paranoid.
164      LOG.error("Bad state: we've just opened {} which was NOT in transition", regionName);
165    } else if (!current) {
166      // Should NEVER happen, but let's be paranoid.
167      LOG.error("Bad state: we've just opened {} which was closing", regionName);
168    }
169  }
170
171  @Override
172  protected void handleException(Throwable t) {
173    LOG.warn("Fatal error occurred while opening region {}, aborting...",
174      regionInfo.getRegionNameAsString(), t);
175    // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up
176    // regionserver abort on cluster shutdown. HBASE-23984.
177    getServer().getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes());
178    getServer().abort(
179      "Failed to open region " + regionInfo.getRegionNameAsString() + " and can not recover", t);
180  }
181
182  public static AssignRegionHandler create(HRegionServer server, RegionInfo regionInfo,
183    long openProcId, TableDescriptor tableDesc, long masterSystemTime) {
184    EventType eventType;
185    if (regionInfo.isMetaRegion()) {
186      eventType = EventType.M_RS_OPEN_META;
187    } else if (
188      regionInfo.getTable().isSystemTable()
189        || (tableDesc != null && tableDesc.getPriority() >= HConstants.ADMIN_QOS)
190    ) {
191      eventType = EventType.M_RS_OPEN_PRIORITY_REGION;
192    } else {
193      eventType = EventType.M_RS_OPEN_REGION;
194    }
195    return new AssignRegionHandler(server, regionInfo, openProcId, tableDesc, masterSystemTime,
196      eventType);
197  }
198}