001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import edu.umd.cs.findbugs.annotations.Nullable; 021import java.io.IOException; 022import java.util.concurrent.TimeUnit; 023import org.apache.hadoop.hbase.HConstants; 024import org.apache.hadoop.hbase.ServerName; 025import org.apache.hadoop.hbase.client.RegionReplicaUtil; 026import org.apache.hadoop.hbase.executor.EventHandler; 027import org.apache.hadoop.hbase.executor.EventType; 028import org.apache.hadoop.hbase.regionserver.HRegion; 029import org.apache.hadoop.hbase.regionserver.HRegionServer; 030import org.apache.hadoop.hbase.regionserver.Region; 031import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 032import org.apache.hadoop.hbase.util.Bytes; 033import org.apache.hadoop.hbase.util.RetryCounter; 034import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import org.slf4j.MDC; 039 040import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 041 042/** 043 * Handles closing of a region on a region server. 044 * <p/> 045 * Just done the same thing with the old {@link CloseRegionHandler}, with some modifications on 046 * fencing and retrying. But we need to keep the {@link CloseRegionHandler} as is to keep compatible 047 * with the zk less assignment for 1.x, otherwise it is not possible to do rolling upgrade. 048 */ 049@InterfaceAudience.Private 050public class UnassignRegionHandler extends EventHandler { 051 052 private static final Logger LOG = LoggerFactory.getLogger(UnassignRegionHandler.class); 053 054 private final String encodedName; 055 056 private final long closeProcId; 057 // If true, the hosting server is aborting. Region close process is different 058 // when we are aborting. 059 // TODO: not used yet, we still use the old CloseRegionHandler when aborting 060 private final boolean abort; 061 062 private final ServerName destination; 063 064 private final RetryCounter retryCounter; 065 066 private boolean evictCache; 067 068 public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId, 069 boolean abort, @Nullable ServerName destination, EventType eventType) { 070 this(server, encodedName, closeProcId, abort, destination, eventType, false); 071 } 072 073 public UnassignRegionHandler(HRegionServer server, String encodedName, long closeProcId, 074 boolean abort, @Nullable ServerName destination, EventType eventType, boolean evictCache) { 075 super(server, eventType); 076 this.encodedName = encodedName; 077 this.closeProcId = closeProcId; 078 this.abort = abort; 079 this.destination = destination; 080 this.retryCounter = HandlerUtil.getRetryCounter(); 081 this.evictCache = evictCache; 082 } 083 084 private HRegionServer getServer() { 085 return (HRegionServer) server; 086 } 087 088 @Override 089 public void process() throws IOException { 090 MDC.put("pid", Long.toString(closeProcId)); 091 HRegionServer rs = getServer(); 092 byte[] encodedNameBytes = Bytes.toBytes(encodedName); 093 Boolean previous = rs.getRegionsInTransitionInRS().putIfAbsent(encodedNameBytes, Boolean.FALSE); 094 if (previous != null) { 095 if (previous) { 096 // This could happen as we will update the region state to OPEN when calling 097 // reportRegionStateTransition, so the HMaster will think the region is online, before we 098 // actually open the region, as reportRegionStateTransition is part of the opening process. 099 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 100 LOG.warn( 101 "Received CLOSE for {} which we are already " + "trying to OPEN; try again after {}ms", 102 encodedName, backoff); 103 rs.getExecutorService().delayedSubmit(this, backoff, TimeUnit.MILLISECONDS); 104 } else { 105 LOG.info( 106 "Received CLOSE for {} which we are already trying to CLOSE," + " but not completed yet", 107 encodedName); 108 } 109 return; 110 } 111 HRegion region = rs.getRegion(encodedName); 112 if (region == null) { 113 LOG.debug("Received CLOSE for {} which is not ONLINE and we're not opening/closing.", 114 encodedName); 115 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 116 return; 117 } 118 String regionName = region.getRegionInfo().getEncodedName(); 119 LOG.info("Close {}", regionName); 120 if (region.getCoprocessorHost() != null) { 121 // XXX: The behavior is a bit broken. At master side there is no FAILED_CLOSE state, so if 122 // there are exception thrown from the CP, we can not report the error to master, and if 123 // here we just return without calling reportRegionStateTransition, the TRSP at master side 124 // will hang there for ever. So here if the CP throws an exception out, the only way is to 125 // abort the RS... 126 region.getCoprocessorHost().preClose(abort); 127 } 128 // This should be true only in the case of splits/merges closing the parent regions, as 129 // there's no point on keep blocks for those region files. As hbase.rs.evictblocksonclose is 130 // false by default we don't bother overriding it if evictCache is false. 131 if (evictCache) { 132 region.getStores().forEach(s -> s.getCacheConfig().setEvictOnClose(true)); 133 } 134 if (region.close(abort) == null) { 135 // XXX: Is this still possible? The old comment says about split, but now split is done at 136 // master side, so... 137 LOG.warn("Can't close {}, already closed during close()", regionName); 138 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 139 return; 140 } 141 142 rs.removeRegion(region, destination); 143 if ( 144 ServerRegionReplicaUtil.isMetaRegionReplicaReplicationEnabled(rs.getConfiguration(), 145 region.getTableDescriptor().getTableName()) 146 ) { 147 if (RegionReplicaUtil.isDefaultReplica(region.getRegionInfo().getReplicaId())) { 148 // If hbase:meta read replicas enabled, remove replication source for hbase:meta Regions. 149 // See assign region handler where we add the replication source on open. 150 rs.getReplicationSourceService().getReplicationManager() 151 .removeCatalogReplicationSource(region.getRegionInfo()); 152 } 153 } 154 if ( 155 !rs.reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.CLOSED, 156 HConstants.NO_SEQNUM, closeProcId, -1, region.getRegionInfo())) 157 ) { 158 throw new IOException("Failed to report close to master: " + regionName); 159 } 160 // Cache the close region procedure id after report region transition succeed. 161 rs.finishRegionProcedure(closeProcId); 162 rs.getRegionsInTransitionInRS().remove(encodedNameBytes, Boolean.FALSE); 163 LOG.info("Closed {}", regionName); 164 } 165 166 @Override 167 protected void handleException(Throwable t) { 168 LOG.warn("Fatal error occurred while closing region {}, aborting...", encodedName, t); 169 // Clear any reference in getServer().getRegionsInTransitionInRS() otherwise can hold up 170 // regionserver abort on cluster shutdown. HBASE-23984. 171 getServer().getRegionsInTransitionInRS().remove(Bytes.toBytes(this.encodedName)); 172 getServer().abort("Failed to close region " + encodedName + " and can not recover", t); 173 } 174 175 public static UnassignRegionHandler create(HRegionServer server, String encodedName, 176 long closeProcId, boolean abort, @Nullable ServerName destination, boolean evictCache) { 177 // Just try our best to determine whether it is for closing meta. It is not the end of the world 178 // if we put the handler into a wrong executor. 179 Region region = server.getRegion(encodedName); 180 EventType eventType = region != null && region.getRegionInfo().isMetaRegion() 181 ? EventType.M_RS_CLOSE_META 182 : EventType.M_RS_CLOSE_REGION; 183 return new UnassignRegionHandler(server, encodedName, closeProcId, abort, destination, 184 eventType, evictCache); 185 } 186}