001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.handler; 019 020import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION; 021 022import java.io.IOException; 023import java.util.concurrent.atomic.AtomicBoolean; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.Server; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.TableDescriptor; 028import org.apache.hadoop.hbase.executor.EventHandler; 029import org.apache.hadoop.hbase.executor.EventType; 030import org.apache.hadoop.hbase.procedure2.Procedure; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.HRegionServer; 033import org.apache.hadoop.hbase.regionserver.RegionServerServices; 034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext; 035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext; 036import org.apache.hadoop.hbase.util.CancelableProgressable; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; 042 043/** 044 * Handles opening of a region on a region server. 045 * <p> 046 * This is executed after receiving an OPEN RPC from the master or client. 047 * @deprecated Keep it here only for compatible 048 * @see AssignRegionHandler 049 */ 050@Deprecated 051@InterfaceAudience.Private 052public class OpenRegionHandler extends EventHandler { 053 private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class); 054 055 protected final RegionServerServices rsServices; 056 057 private final RegionInfo regionInfo; 058 private final TableDescriptor htd; 059 private final long masterSystemTime; 060 061 public OpenRegionHandler(final Server server, final RegionServerServices rsServices, 062 RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) { 063 this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION); 064 } 065 066 protected OpenRegionHandler(final Server server, final RegionServerServices rsServices, 067 final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime, 068 EventType eventType) { 069 super(server, eventType); 070 this.rsServices = rsServices; 071 this.regionInfo = regionInfo; 072 this.htd = htd; 073 this.masterSystemTime = masterSystemTime; 074 } 075 076 public RegionInfo getRegionInfo() { 077 return regionInfo; 078 } 079 080 @Override 081 public void process() throws IOException { 082 boolean openSuccessful = false; 083 final String regionName = regionInfo.getRegionNameAsString(); 084 HRegion region = null; 085 086 try { 087 if (this.server.isStopped() || this.rsServices.isStopping()) { 088 return; 089 } 090 final String encodedName = regionInfo.getEncodedName(); 091 092 // 2 different difficult situations can occur 093 // 1) The opening was cancelled. This is an expected situation 094 // 2) The region is now marked as online while we're suppose to open. This would be a bug. 095 096 // Check that this region is not already online 097 if (this.rsServices.getRegion(encodedName) != null) { 098 LOG.error( 099 "Region " + encodedName + " was already online when we started processing the opening. " 100 + "Marking this new attempt as failed"); 101 return; 102 } 103 104 // Check that we're still supposed to open the region. 105 // If fails, just return. Someone stole the region from under us. 106 if (!isRegionStillOpening()) { 107 LOG.error("Region " + encodedName + " opening cancelled"); 108 return; 109 } 110 111 // Open region. After a successful open, failures in subsequent 112 // processing needs to do a close as part of cleanup. 113 region = openRegion(); 114 if (region == null) { 115 return; 116 } 117 118 if ( 119 !updateMeta(region, masterSystemTime) || this.server.isStopped() 120 || this.rsServices.isStopping() 121 ) { 122 return; 123 } 124 125 if (!isRegionStillOpening()) { 126 return; 127 } 128 129 // Successful region open, and add it to MutableOnlineRegions 130 this.rsServices.addRegion(region); 131 openSuccessful = true; 132 133 // Done! Successful region open 134 LOG.debug("Opened " + regionName + " on " + this.server.getServerName()); 135 } finally { 136 // Do all clean up here 137 if (!openSuccessful) { 138 doCleanUpOnFailedOpen(region); 139 } 140 final Boolean current = this.rsServices.getRegionsInTransitionInRS() 141 .remove(this.regionInfo.getEncodedNameAsBytes()); 142 143 // Let's check if we have met a race condition on open cancellation.... 144 // A better solution would be to not have any race condition. 145 // this.rsServices.getRegionsInTransitionInRS().remove( 146 // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE); 147 // would help. 148 if (openSuccessful) { 149 if (current == null) { // Should NEVER happen, but let's be paranoid. 150 LOG.error("Bad state: we've just opened a region that was NOT in transition. Region=" 151 + regionName); 152 } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're 153 // really unlucky. 154 LOG.error("Race condition: we've finished to open a region, while a close was requested " 155 + " on region=" + regionName + ". It can be a critical error, as a region that" 156 + " should be closed is now opened. Closing it now"); 157 cleanupFailedOpen(region); 158 } 159 } 160 } 161 } 162 163 private void doCleanUpOnFailedOpen(HRegion region) throws IOException { 164 try { 165 if (region != null) { 166 cleanupFailedOpen(region); 167 } 168 } finally { 169 rsServices.reportRegionStateTransition(new RegionStateTransitionContext( 170 TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo)); 171 } 172 } 173 174 /** 175 * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if 176 * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread 177 * and keep updating znode state meantime so master doesn't timeout our region-in-transition. 178 * Caller must cleanup region if this fails. 179 */ 180 private boolean updateMeta(final HRegion r, long masterSystemTime) { 181 if (this.server.isStopped() || this.rsServices.isStopping()) { 182 return false; 183 } 184 // Object we do wait/notify on. Make it boolean. If set, we're done. 185 // Else, wait. 186 final AtomicBoolean signaller = new AtomicBoolean(false); 187 PostOpenDeployTasksThread t = 188 new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime); 189 t.start(); 190 // Post open deploy task: 191 // meta => update meta location in ZK 192 // other region => update meta 193 while ( 194 !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping() 195 && isRegionStillOpening() 196 ) { 197 synchronized (signaller) { 198 try { 199 // Wait for 10 seconds, so that server shutdown 200 // won't take too long if this thread happens to run. 201 if (!signaller.get()) signaller.wait(10000); 202 } catch (InterruptedException e) { 203 // Go to the loop check. 204 } 205 } 206 } 207 // Is thread still alive? We may have left above loop because server is 208 // stopping or we timed out the edit. Is so, interrupt it. 209 if (t.isAlive()) { 210 if (!signaller.get()) { 211 // Thread still running; interrupt 212 LOG.debug("Interrupting thread " + t); 213 t.interrupt(); 214 } 215 try { 216 t.join(); 217 } catch (InterruptedException ie) { 218 LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie); 219 Thread.currentThread().interrupt(); 220 } 221 } 222 223 // Was there an exception opening the region? This should trigger on 224 // InterruptedException too. If so, we failed. 225 return (!Thread.interrupted() && t.getException() == null); 226 } 227 228 /** 229 * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to 230 * check for exceptions running 231 * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)} 232 */ 233 static class PostOpenDeployTasksThread extends Thread { 234 private Throwable exception = null; 235 private final Server server; 236 private final RegionServerServices services; 237 private final HRegion region; 238 private final AtomicBoolean signaller; 239 private final long masterSystemTime; 240 241 PostOpenDeployTasksThread(final HRegion region, final Server server, 242 final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) { 243 super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName()); 244 this.setDaemon(true); 245 this.server = server; 246 this.services = services; 247 this.region = region; 248 this.signaller = signaller; 249 this.masterSystemTime = masterSystemTime; 250 } 251 252 @Override 253 public void run() { 254 try { 255 this.services.postOpenDeployTasks( 256 new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime)); 257 } catch (Throwable e) { 258 String msg = "Exception running postOpenDeployTasks; region=" 259 + this.region.getRegionInfo().getEncodedName(); 260 this.exception = e; 261 if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) { 262 server.abort(msg, e); 263 } else { 264 LOG.warn(msg, e); 265 } 266 } 267 // We're done. Set flag then wake up anyone waiting on thread to complete. 268 this.signaller.set(true); 269 synchronized (this.signaller) { 270 this.signaller.notify(); 271 } 272 } 273 274 /** Returns Null or the run exception; call this method after thread is done. */ 275 Throwable getException() { 276 return this.exception; 277 } 278 } 279 280 /** Returns Instance of HRegion if successful open else null. */ 281 private HRegion openRegion() { 282 HRegion region = null; 283 boolean compactionEnabled = 284 ((HRegionServer) server).getCompactSplitThread().isCompactionsEnabled(); 285 this.server.getConfiguration().setBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION, 286 compactionEnabled); 287 try { 288 // Instantiate the region. This also periodically tickles OPENING 289 // state so master doesn't timeout this region in transition. 290 region = 291 HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo), 292 this.server.getConfiguration(), this.rsServices, new CancelableProgressable() { 293 @Override 294 public boolean progress() { 295 if (!isRegionStillOpening()) { 296 LOG.warn("Open region aborted since it isn't opening any more"); 297 return false; 298 } 299 return true; 300 } 301 }); 302 } catch (Throwable t) { 303 // We failed open. Our caller will see the 'null' return value 304 // and transition the node back to FAILED_OPEN. If that fails, 305 // we rely on the Timeout Monitor in the master to reassign. 306 LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t); 307 } 308 return region; 309 } 310 311 private void cleanupFailedOpen(final HRegion region) throws IOException { 312 if (region != null) { 313 this.rsServices.removeRegion(region, null); 314 region.close(); 315 } 316 } 317 318 private static boolean isRegionStillOpening(RegionInfo regionInfo, 319 RegionServerServices rsServices) { 320 byte[] encodedName = regionInfo.getEncodedNameAsBytes(); 321 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName); 322 return Boolean.TRUE.equals(action); // true means opening for RIT 323 } 324 325 private boolean isRegionStillOpening() { 326 return isRegionStillOpening(regionInfo, rsServices); 327 } 328}