001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import edu.umd.cs.findbugs.annotations.Nullable; 021import java.io.IOException; 022import java.util.concurrent.TimeUnit; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.TableName; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.RegionReplicaUtil; 028import org.apache.hadoop.hbase.client.TableDescriptor; 029import org.apache.hadoop.hbase.executor.EventHandler; 030import org.apache.hadoop.hbase.executor.EventType; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.HRegionServer; 033import org.apache.hadoop.hbase.regionserver.Region; 034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 036import org.apache.hadoop.hbase.util.RetryCounter; 037import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041import org.slf4j.MDC; 042 043import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 044 045/** 046 * Handles opening of a region on a region server. 047 * <p/> 048 * Just done the same thing with the old {@link OpenRegionHandler}, with some modifications on 049 * fencing and retrying. But we need to keep the {@link OpenRegionHandler} as is to keep compatible 050 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade. 051 */ 052@InterfaceAudience.Private 053public class AssignRegionHandler extends EventHandler { 054 055 private static final Logger LOG = LoggerFactory.getLogger(AssignRegionHandler.class); 056 057 private final RegionInfo regionInfo; 058 059 private final long openProcId; 060 061 private final TableDescriptor tableDesc; 062 063 private final long masterSystemTime; 064 065 private final RetryCounter retryCounter; 066 067 public AssignRegionHandler(HRegionServer server, RegionInfo regionInfo, long openProcId, 068 @Nullable TableDescriptor tableDesc, long masterSystemTime, EventType eventType) { 069 super(server, eventType); 070 this.regionInfo = regionInfo; 071 this.openProcId = openProcId; 072 this.tableDesc = tableDesc; 073 this.masterSystemTime = masterSystemTime; 074 this.retryCounter = HandlerUtil.getRetryCounter(); 075 } 076 077 private HRegionServer getServer() { 078 return (HRegionServer) server; 079 } 080 081 private void cleanUpAndReportFailure(IOException error) throws IOException { 082 LOG.warn("Failed to open region {}, will report to master", regionInfo.getRegionNameAsString(), 083 error); 084 HRegionServer rs = getServer(); 085 rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 086 if ( 087 !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN, 088 HConstants.NO_SEQNUM, openProcId, masterSystemTime, regionInfo)) 089 ) { 090 throw new IOException( 091 "Failed to report failed open to master: " + regionInfo.getRegionNameAsString()); 092 } 093 } 094 095 @Override 096 public void process() throws IOException { 097 MDC.put("pid", Long.toString(openProcId)); 098 HRegionServer rs = getServer(); 099 String encodedName = regionInfo.getEncodedName(); 100 byte[] encodedNameBytes = regionInfo.getEncodedNameAsBytes(); 101 String regionName = regionInfo.getRegionNameAsString(); 102 Region onlineRegion = rs.getRegion(encodedName); 103 if (onlineRegion != null) { 104 LOG.warn("Received OPEN for {} which is already online", regionName); 105 // Just follow the old behavior, do we need to call reportRegionStateTransition? Maybe not? 106 // For normal case, it could happen that the rpc call to schedule this handler is succeeded, 107 // but before returning to master the connection is broken. And when master tries again, we 108 // have already finished the opening. For this case we do not need to call 109 // reportRegionStateTransition any more. 110 return; 111 } 112 Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.TRUE); 113 if (previous != null) { 114 if (previous) { 115 // The region is opening and this maybe a retry on the rpc call, it is safe to ignore it. 116 LOG.info("Receiving OPEN for {} which we are already trying to OPEN" 117 + " - ignoring this new request for this region.", regionName); 118 } else { 119 // The region is closing. This is possible as we will update the region state to CLOSED when 120 // calling reportRegionStateTransition, so the HMaster will think the region is offline, 121 // before we actually close the region, as reportRegionStateTransition is part of the 122 // closing process. 123 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 124 LOG.info("Receiving OPEN for {} which we are trying to close, try again after {}ms", 125 regionName, backoff); 126 rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS); 127 } 128 return; 129 } 130 LOG.info("Open {}", regionName); 131 HRegion region; 132 try { 133 TableDescriptor htd = 134 tableDesc != null ? tableDesc : rs.getTableDescriptors().get(regionInfo.getTable()); 135 if (htd == null) { 136 throw new IOException("Missing table descriptor for " + regionName); 137 } 138 // pass null for the last parameter, which used to be a CancelableProgressable, as now the 139 // opening can not be interrupted by a close request any more. 140 Configuration conf = rs.getConfiguration(); 141 TableName tn = htd.getTableName(); 142 if (ServerRegionReplicaUtil.isMetaRegionReplicaReplicationEnabled(conf, tn)) { 143 if (RegionReplicaUtil.isDefaultReplica(this.regionInfo.getReplicaId())) { 144 // Add the hbase:meta replication source on replica zero/default. 145 rs.getReplicationSourceService().getReplicationManager() 146 .addCatalogReplicationSource(this.regionInfo); 147 } 148 } 149 region = HRegion.openHRegion(regionInfo, htd, rs.getWAL(regionInfo), conf, rs, null); 150 } catch (IOException e) { 151 cleanUpAndReportFailure(e); 152 return; 153 } 154 // From here on out, this is PONR. We can not revert back. The only way to address an 155 // exception from here on out is to abort the region server. 156 rs.postOpenDeployTasks(new PostOpenDeployContext(region, openProcId, masterSystemTime)); 157 rs.addRegion(region); 158 LOG.info("Opened {}", regionName); 159 // Cache the open region procedure id after report region transition succeed. 160 rs.finishRegionProcedure(openProcId); 161 Boolean current = rs.getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes()); 162 if (current == null) { 163 // Should NEVER happen, but let's be paranoid. 164 LOG.error("Bad state: we've just opened {} which was NOT in transition", regionName); 165 } else if (!current) { 166 // Should NEVER happen, but let's be paranoid. 167 LOG.error("Bad state: we've just opened {} which was closing", regionName); 168 } 169 } 170 171 @Override 172 protected void handleException(Throwable t) { 173 LOG.warn("Fatal error occurred while opening region {}, aborting...", 174 regionInfo.getRegionNameAsString(), t); 175 // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up 176 // regionserver abort on cluster shutdown. HBASE-23984. 177 getServer().getRegionsInTransitionInRS().remove(regionInfo.getEncodedNameAsBytes()); 178 getServer().abort( 179 "Failed to open region " + regionInfo.getRegionNameAsString() + " and can not recover", t); 180 } 181 182 public static AssignRegionHandler create(HRegionServer server, RegionInfo regionInfo, 183 long openProcId, TableDescriptor tableDesc, long masterSystemTime) { 184 EventType eventType; 185 if (regionInfo.isMetaRegion()) { 186 eventType = EventType.M_RS_OPEN_META; 187 } else if ( 188 regionInfo.getTable().isSystemTable() 189 || (tableDesc != null && tableDesc.getPriority() >= HConstants.ADMIN_QOS) 190 ) { 191 eventType = EventType.M_RS_OPEN_PRIORITY_REGION; 192 } else { 193 eventType = EventType.M_RS_OPEN_REGION; 194 } 195 return new AssignRegionHandler(server, regionInfo, openProcId, tableDesc, masterSystemTime, 196 eventType); 197 } 198}