001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.io.InterruptedIOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collection; 025import java.util.Collections; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.Callable; 030import java.util.concurrent.ExecutionException; 031import java.util.concurrent.ExecutorService; 032import java.util.concurrent.Executors; 033import java.util.concurrent.Future; 034import java.util.concurrent.TimeUnit; 035import java.util.stream.Stream; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.hbase.DoNotRetryIOException; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.UnknownRegionException; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.MasterSwitchType; 046import org.apache.hadoop.hbase.client.Mutation; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.TableDescriptor; 050import org.apache.hadoop.hbase.io.hfile.CacheConfig; 051import org.apache.hadoop.hbase.master.MasterCoprocessorHost; 052import org.apache.hadoop.hbase.master.MasterFileSystem; 053import org.apache.hadoop.hbase.master.RegionState.State; 054import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; 055import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; 058import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; 059import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 060import org.apache.hadoop.hbase.quotas.QuotaExceededException; 061import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 062import org.apache.hadoop.hbase.regionserver.HStore; 063import org.apache.hadoop.hbase.regionserver.HStoreFile; 064import org.apache.hadoop.hbase.regionserver.RegionSplitPolicy; 065import org.apache.hadoop.hbase.regionserver.RegionSplitRestriction; 066import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 067import org.apache.hadoop.hbase.regionserver.StoreUtils; 068import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 069import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 070import org.apache.hadoop.hbase.util.Bytes; 071import org.apache.hadoop.hbase.util.CommonFSUtils; 072import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 073import org.apache.hadoop.hbase.util.FSUtils; 074import org.apache.hadoop.hbase.util.Pair; 075import org.apache.hadoop.hbase.util.Threads; 076import org.apache.hadoop.hbase.wal.WALSplitUtil; 077import org.apache.hadoop.util.ReflectionUtils; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.slf4j.Logger; 080import org.slf4j.LoggerFactory; 081 082import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; 083 084import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 085import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.GetRegionInfoResponse; 086import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 087import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.SplitTableRegionState; 088 089/** 090 * The procedure to split a region in a table. Takes lock on the parent region. It holds the lock 091 * for the life of the procedure. 092 * <p> 093 * Throws exception on construction if determines context hostile to spllt (cluster going down or 094 * master is shutting down or table is disabled). 095 * </p> 096 */ 097@InterfaceAudience.Private 098public class SplitTableRegionProcedure 099 extends AbstractStateMachineRegionProcedure<SplitTableRegionState> { 100 private static final Logger LOG = LoggerFactory.getLogger(SplitTableRegionProcedure.class); 101 private RegionInfo daughterOneRI; 102 private RegionInfo daughterTwoRI; 103 private byte[] bestSplitRow; 104 private RegionSplitPolicy splitPolicy; 105 106 public SplitTableRegionProcedure() { 107 // Required by the Procedure framework to create the procedure on replay 108 } 109 110 public SplitTableRegionProcedure(final MasterProcedureEnv env, final RegionInfo regionToSplit, 111 final byte[] splitRow) throws IOException { 112 super(env, regionToSplit); 113 preflightChecks(env, true); 114 // When procedure goes to run in its prepare step, it also does these checkOnline checks. Here 115 // we fail-fast on construction. There it skips the split with just a warning. 116 checkOnline(env, regionToSplit); 117 this.bestSplitRow = splitRow; 118 TableDescriptor tableDescriptor = 119 env.getMasterServices().getTableDescriptors().get(getTableName()); 120 Configuration conf = env.getMasterConfiguration(); 121 if (hasBestSplitRow()) { 122 // Apply the split restriction for the table to the user-specified split point 123 RegionSplitRestriction splitRestriction = 124 RegionSplitRestriction.create(tableDescriptor, conf); 125 byte[] restrictedSplitRow = splitRestriction.getRestrictedSplitPoint(bestSplitRow); 126 if (!Bytes.equals(bestSplitRow, restrictedSplitRow)) { 127 LOG.warn( 128 "The specified split point {} violates the split restriction of the table. " 129 + "Using {} as a split point.", 130 Bytes.toStringBinary(bestSplitRow), Bytes.toStringBinary(restrictedSplitRow)); 131 bestSplitRow = restrictedSplitRow; 132 } 133 } 134 checkSplittable(env, regionToSplit); 135 final TableName table = regionToSplit.getTable(); 136 final long rid = getDaughterRegionIdTimestamp(regionToSplit); 137 this.daughterOneRI = 138 RegionInfoBuilder.newBuilder(table).setStartKey(regionToSplit.getStartKey()) 139 .setEndKey(bestSplitRow).setSplit(false).setRegionId(rid).build(); 140 this.daughterTwoRI = RegionInfoBuilder.newBuilder(table).setStartKey(bestSplitRow) 141 .setEndKey(regionToSplit.getEndKey()).setSplit(false).setRegionId(rid).build(); 142 143 if (tableDescriptor.getRegionSplitPolicyClassName() != null) { 144 // Since we don't have region reference here, creating the split policy instance without it. 145 // This can be used to invoke methods which don't require Region reference. This instantiation 146 // of a class on Master-side though it only makes sense on the RegionServer-side is 147 // for Phoenix Local Indexing. Refer HBASE-12583 for more information. 148 Class<? extends RegionSplitPolicy> clazz = 149 RegionSplitPolicy.getSplitPolicyClass(tableDescriptor, conf); 150 this.splitPolicy = ReflectionUtils.newInstance(clazz, conf); 151 } 152 } 153 154 @Override 155 protected LockState acquireLock(final MasterProcedureEnv env) { 156 if ( 157 env.getProcedureScheduler().waitRegions(this, getTableName(), getParentRegion(), 158 daughterOneRI, daughterTwoRI) 159 ) { 160 try { 161 LOG.debug(LockState.LOCK_EVENT_WAIT + " " + env.getProcedureScheduler().dumpLocks()); 162 } catch (IOException e) { 163 // Ignore, just for logging 164 } 165 return LockState.LOCK_EVENT_WAIT; 166 } 167 return LockState.LOCK_ACQUIRED; 168 } 169 170 @Override 171 protected void releaseLock(final MasterProcedureEnv env) { 172 env.getProcedureScheduler().wakeRegions(this, getTableName(), getParentRegion(), daughterOneRI, 173 daughterTwoRI); 174 } 175 176 public RegionInfo getDaughterOneRI() { 177 return daughterOneRI; 178 } 179 180 public RegionInfo getDaughterTwoRI() { 181 return daughterTwoRI; 182 } 183 184 private boolean hasBestSplitRow() { 185 return bestSplitRow != null && bestSplitRow.length > 0; 186 } 187 188 /** 189 * Check whether the region is splittable 190 * @param env MasterProcedureEnv 191 * @param regionToSplit parent Region to be split 192 */ 193 private void checkSplittable(final MasterProcedureEnv env, final RegionInfo regionToSplit) 194 throws IOException { 195 // Ask the remote RS if this region is splittable. 196 // If we get an IOE, report it along w/ the failure so can see why we are not splittable at 197 // this time. 198 if (regionToSplit.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 199 throw new IllegalArgumentException("Can't invoke split on non-default regions directly"); 200 } 201 RegionStateNode node = 202 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 203 IOException splittableCheckIOE = null; 204 boolean splittable = false; 205 if (node != null) { 206 try { 207 GetRegionInfoResponse response; 208 if (!hasBestSplitRow()) { 209 LOG.info( 210 "{} splitKey isn't explicitly specified, will try to find a best split key from RS {}", 211 node.getRegionInfo().getRegionNameAsString(), node.getRegionLocation()); 212 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 213 node.getRegionInfo(), true); 214 bestSplitRow = 215 response.hasBestSplitRow() ? response.getBestSplitRow().toByteArray() : null; 216 } else { 217 response = AssignmentManagerUtil.getRegionInfoResponse(env, node.getRegionLocation(), 218 node.getRegionInfo(), false); 219 } 220 splittable = response.hasSplittable() && response.getSplittable(); 221 if (LOG.isDebugEnabled()) { 222 LOG.debug("Splittable=" + splittable + " " + node.toShortString()); 223 } 224 } catch (IOException e) { 225 splittableCheckIOE = e; 226 } 227 } 228 229 if (!splittable) { 230 IOException e = 231 new DoNotRetryIOException(regionToSplit.getShortNameToLog() + " NOT splittable"); 232 if (splittableCheckIOE != null) { 233 e.initCause(splittableCheckIOE); 234 } 235 throw e; 236 } 237 238 if (!hasBestSplitRow()) { 239 throw new DoNotRetryIOException("Region not splittable because bestSplitPoint = null, " 240 + "maybe table is too small for auto split. For force split, try specifying split row"); 241 } 242 243 if (Bytes.equals(regionToSplit.getStartKey(), bestSplitRow)) { 244 throw new DoNotRetryIOException( 245 "Split row is equal to startkey: " + Bytes.toStringBinary(bestSplitRow)); 246 } 247 248 if (!regionToSplit.containsRow(bestSplitRow)) { 249 throw new DoNotRetryIOException("Split row is not inside region key range splitKey:" 250 + Bytes.toStringBinary(bestSplitRow) + " region: " + regionToSplit); 251 } 252 } 253 254 /** 255 * Calculate daughter regionid to use. 256 * @param hri Parent {@link RegionInfo} 257 * @return Daughter region id (timestamp) to use. 258 */ 259 private static long getDaughterRegionIdTimestamp(final RegionInfo hri) { 260 long rid = EnvironmentEdgeManager.currentTime(); 261 // Regionid is timestamp. Can't be less than that of parent else will insert 262 // at wrong location in hbase:meta (See HBASE-710). 263 if (rid < hri.getRegionId()) { 264 LOG.warn("Clock skew; parent regions id is " + hri.getRegionId() 265 + " but current time here is " + rid); 266 rid = hri.getRegionId() + 1; 267 } 268 return rid; 269 } 270 271 private void removeNonDefaultReplicas(MasterProcedureEnv env) throws IOException { 272 AssignmentManagerUtil.removeNonDefaultReplicas(env, Stream.of(getParentRegion()), 273 getRegionReplication(env)); 274 } 275 276 private void checkClosedRegions(MasterProcedureEnv env) throws IOException { 277 // theoretically this should not happen any more after we use TRSP, but anyway let's add a check 278 // here 279 AssignmentManagerUtil.checkClosedRegion(env, getParentRegion()); 280 } 281 282 @Override 283 protected Flow executeFromState(MasterProcedureEnv env, SplitTableRegionState state) 284 throws InterruptedException { 285 LOG.trace("{} execute state={}", this, state); 286 287 try { 288 switch (state) { 289 case SPLIT_TABLE_REGION_PREPARE: 290 if (prepareSplitRegion(env)) { 291 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION); 292 break; 293 } else { 294 return Flow.NO_MORE_STATE; 295 } 296 case SPLIT_TABLE_REGION_PRE_OPERATION: 297 preSplitRegion(env); 298 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CLOSE_PARENT_REGION); 299 break; 300 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 301 addChildProcedure(createUnassignProcedures(env)); 302 // createUnassignProcedures() can throw out IOException. If this happens, 303 // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions 304 // is closed as all created UnassignProcedures are rolled back. If it rolls back with 305 // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(), 306 // otherwise, it will result in OpenRegionProcedure for an already open region. 307 setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); 308 break; 309 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 310 checkClosedRegions(env); 311 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS); 312 break; 313 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 314 removeNonDefaultReplicas(env); 315 createDaughterRegions(env); 316 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE); 317 break; 318 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 319 writeMaxSequenceIdFile(env); 320 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META); 321 break; 322 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 323 preSplitRegionBeforeMETA(env); 324 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_UPDATE_META); 325 break; 326 case SPLIT_TABLE_REGION_UPDATE_META: 327 updateMeta(env); 328 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META); 329 break; 330 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 331 preSplitRegionAfterMETA(env); 332 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS); 333 break; 334 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 335 addChildProcedure(createAssignProcedures(env)); 336 setNextState(SplitTableRegionState.SPLIT_TABLE_REGION_POST_OPERATION); 337 break; 338 case SPLIT_TABLE_REGION_POST_OPERATION: 339 postSplitRegion(env); 340 return Flow.NO_MORE_STATE; 341 default: 342 throw new UnsupportedOperationException(this + " unhandled state=" + state); 343 } 344 } catch (IOException e) { 345 String msg = "Splitting " + getParentRegion().getEncodedName() + ", " + this; 346 if (!isRollbackSupported(state)) { 347 // We reach a state that cannot be rolled back. We just need to keep retrying. 348 LOG.warn(msg, e); 349 } else { 350 LOG.error(msg, e); 351 setFailure("master-split-regions", e); 352 } 353 } 354 // if split fails, need to call ((HRegion)parent).clearSplit() when it is a force split 355 return Flow.HAS_MORE_STATE; 356 } 357 358 /** 359 * To rollback {@link SplitTableRegionProcedure}, an AssignProcedure is asynchronously submitted 360 * for parent region to be split (rollback doesn't wait on the completion of the AssignProcedure) 361 * . This can be improved by changing rollback() to support sub-procedures. See HBASE-19851 for 362 * details. 363 */ 364 @Override 365 protected void rollbackState(final MasterProcedureEnv env, final SplitTableRegionState state) 366 throws IOException, InterruptedException { 367 LOG.trace("{} rollback state={}", this, state); 368 369 try { 370 switch (state) { 371 case SPLIT_TABLE_REGION_POST_OPERATION: 372 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 373 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 374 case SPLIT_TABLE_REGION_UPDATE_META: 375 // PONR 376 throw new UnsupportedOperationException(this + " unhandled state=" + state); 377 case SPLIT_TABLE_REGION_PRE_OPERATION_BEFORE_META: 378 break; 379 case SPLIT_TABLE_REGION_CREATE_DAUGHTER_REGIONS: 380 case SPLIT_TABLE_REGION_WRITE_MAX_SEQUENCE_ID_FILE: 381 deleteDaughterRegions(env); 382 break; 383 case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: 384 openParentRegion(env); 385 break; 386 case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: 387 // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call 388 // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an 389 // already open region. 390 break; 391 case SPLIT_TABLE_REGION_PRE_OPERATION: 392 postRollBackSplitRegion(env); 393 break; 394 case SPLIT_TABLE_REGION_PREPARE: 395 rollbackPrepareSplit(env); 396 break; 397 default: 398 throw new UnsupportedOperationException(this + " unhandled state=" + state); 399 } 400 } catch (IOException e) { 401 // This will be retried. Unless there is a bug in the code, 402 // this should be just a "temporary error" (e.g. network down) 403 LOG.warn("pid=" + getProcId() + " failed rollback attempt step " + state 404 + " for splitting the region " + getParentRegion().getEncodedName() + " in table " 405 + getTableName(), e); 406 throw e; 407 } 408 } 409 410 /* 411 * Check whether we are in the state that can be rollback 412 */ 413 @Override 414 protected boolean isRollbackSupported(final SplitTableRegionState state) { 415 switch (state) { 416 case SPLIT_TABLE_REGION_POST_OPERATION: 417 case SPLIT_TABLE_REGION_OPEN_CHILD_REGIONS: 418 case SPLIT_TABLE_REGION_PRE_OPERATION_AFTER_META: 419 case SPLIT_TABLE_REGION_UPDATE_META: 420 // It is not safe to rollback if we reach to these states. 421 return false; 422 default: 423 break; 424 } 425 return true; 426 } 427 428 @Override 429 protected SplitTableRegionState getState(final int stateId) { 430 return SplitTableRegionState.forNumber(stateId); 431 } 432 433 @Override 434 protected int getStateId(final SplitTableRegionState state) { 435 return state.getNumber(); 436 } 437 438 @Override 439 protected SplitTableRegionState getInitialState() { 440 return SplitTableRegionState.SPLIT_TABLE_REGION_PREPARE; 441 } 442 443 @Override 444 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 445 super.serializeStateData(serializer); 446 447 final MasterProcedureProtos.SplitTableRegionStateData.Builder splitTableRegionMsg = 448 MasterProcedureProtos.SplitTableRegionStateData.newBuilder() 449 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 450 .setParentRegionInfo(ProtobufUtil.toRegionInfo(getRegion())) 451 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterOneRI)) 452 .addChildRegionInfo(ProtobufUtil.toRegionInfo(daughterTwoRI)); 453 serializer.serialize(splitTableRegionMsg.build()); 454 } 455 456 @Override 457 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 458 super.deserializeStateData(serializer); 459 460 final MasterProcedureProtos.SplitTableRegionStateData splitTableRegionsMsg = 461 serializer.deserialize(MasterProcedureProtos.SplitTableRegionStateData.class); 462 setUser(MasterProcedureUtil.toUserInfo(splitTableRegionsMsg.getUserInfo())); 463 setRegion(ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getParentRegionInfo())); 464 assert (splitTableRegionsMsg.getChildRegionInfoCount() == 2); 465 daughterOneRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(0)); 466 daughterTwoRI = ProtobufUtil.toRegionInfo(splitTableRegionsMsg.getChildRegionInfo(1)); 467 } 468 469 @Override 470 public void toStringClassDetails(StringBuilder sb) { 471 sb.append(getClass().getSimpleName()); 472 sb.append(" table="); 473 sb.append(getTableName()); 474 sb.append(", parent="); 475 sb.append(getParentRegion().getShortNameToLog()); 476 sb.append(", daughterA="); 477 sb.append(daughterOneRI.getShortNameToLog()); 478 sb.append(", daughterB="); 479 sb.append(daughterTwoRI.getShortNameToLog()); 480 } 481 482 private RegionInfo getParentRegion() { 483 return getRegion(); 484 } 485 486 @Override 487 public TableOperationType getTableOperationType() { 488 return TableOperationType.REGION_SPLIT; 489 } 490 491 @Override 492 protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { 493 return env.getAssignmentManager().getAssignmentManagerMetrics().getSplitProcMetrics(); 494 } 495 496 private byte[] getSplitRow() { 497 return daughterTwoRI.getStartKey(); 498 } 499 500 private static final State[] EXPECTED_SPLIT_STATES = new State[] { State.OPEN, State.CLOSED }; 501 502 /** 503 * Prepare to Split region. 504 * @param env MasterProcedureEnv 505 */ 506 public boolean prepareSplitRegion(final MasterProcedureEnv env) throws IOException { 507 // Fail if we are taking snapshot for the given table 508 if ( 509 env.getMasterServices().getSnapshotManager() 510 .isTableTakingAnySnapshot(getParentRegion().getTable()) 511 ) { 512 setFailure(new IOException("Skip splitting region " + getParentRegion().getShortNameToLog() 513 + ", because we are taking snapshot for the table " + getParentRegion().getTable())); 514 return false; 515 } 516 // Check whether the region is splittable 517 RegionStateNode node = 518 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 519 520 if (node == null) { 521 throw new UnknownRegionException(getParentRegion().getRegionNameAsString()); 522 } 523 524 RegionInfo parentHRI = node.getRegionInfo(); 525 if (parentHRI == null) { 526 LOG.info("Unsplittable; parent region is null; node={}", node); 527 return false; 528 } 529 // Lookup the parent HRI state from the AM, which has the latest updated info. 530 // Protect against the case where concurrent SPLIT requests came in and succeeded 531 // just before us. 532 if (node.isInState(State.SPLIT)) { 533 LOG.info("Split of " + parentHRI + " skipped; state is already SPLIT"); 534 return false; 535 } 536 if (parentHRI.isSplit() || parentHRI.isOffline()) { 537 LOG.info("Split of " + parentHRI + " skipped because offline/split."); 538 return false; 539 } 540 541 // expected parent to be online or closed 542 if (!node.isInState(EXPECTED_SPLIT_STATES)) { 543 // We may have SPLIT already? 544 setFailure( 545 new IOException("Split " + parentHRI.getRegionNameAsString() + " FAILED because state=" 546 + node.getState() + "; expected " + Arrays.toString(EXPECTED_SPLIT_STATES))); 547 return false; 548 } 549 550 // Mostly the below two checks are not used because we already check the switches before 551 // submitting the split procedure. Just for safety, we are checking the switch again here. 552 // Also, in case the switch was set to false after submission, this procedure can be rollbacked, 553 // thanks to this double check! 554 // case 1: check for cluster level switch 555 if (!env.getMasterServices().isSplitOrMergeEnabled(MasterSwitchType.SPLIT)) { 556 LOG.warn("pid=" + getProcId() + " split switch is off! skip split of " + parentHRI); 557 setFailure(new IOException( 558 "Split region " + parentHRI.getRegionNameAsString() + " failed due to split switch off")); 559 return false; 560 } 561 // case 2: check for table level switch 562 if (!env.getMasterServices().getTableDescriptors().get(getTableName()).isSplitEnabled()) { 563 LOG.warn("pid={}, split is disabled for the table! Skipping split of {}", getProcId(), 564 parentHRI); 565 setFailure(new IOException("Split region " + parentHRI.getRegionNameAsString() 566 + " failed as region split is disabled for the table")); 567 return false; 568 } 569 570 // set node state as SPLITTING 571 node.setState(State.SPLITTING); 572 573 // Since we have the lock and the master is coordinating the operation 574 // we are always able to split the region 575 return true; 576 } 577 578 /** 579 * Rollback prepare split region 580 * @param env MasterProcedureEnv 581 */ 582 private void rollbackPrepareSplit(final MasterProcedureEnv env) { 583 RegionStateNode parentRegionStateNode = 584 env.getAssignmentManager().getRegionStates().getRegionStateNode(getParentRegion()); 585 if (parentRegionStateNode.getState() == State.SPLITTING) { 586 parentRegionStateNode.setState(State.OPEN); 587 } 588 } 589 590 /** 591 * Action before splitting region in a table. 592 * @param env MasterProcedureEnv 593 */ 594 private void preSplitRegion(final MasterProcedureEnv env) 595 throws IOException, InterruptedException { 596 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 597 if (cpHost != null) { 598 cpHost.preSplitRegionAction(getTableName(), getSplitRow(), getUser()); 599 } 600 601 // TODO: Clean up split and merge. Currently all over the place. 602 // Notify QuotaManager and RegionNormalizer 603 try { 604 env.getMasterServices().getMasterQuotaManager().onRegionSplit(this.getParentRegion()); 605 } catch (QuotaExceededException e) { 606 // TODO: why is this here? split requests can be submitted by actors other than the normalizer 607 env.getMasterServices().getRegionNormalizerManager() 608 .planSkipped(NormalizationPlan.PlanType.SPLIT); 609 throw e; 610 } 611 } 612 613 /** 614 * Action after rollback a split table region action. 615 * @param env MasterProcedureEnv 616 */ 617 private void postRollBackSplitRegion(final MasterProcedureEnv env) throws IOException { 618 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 619 if (cpHost != null) { 620 cpHost.postRollBackSplitRegionAction(getUser()); 621 } 622 } 623 624 /** 625 * Rollback close parent region 626 */ 627 private void openParentRegion(MasterProcedureEnv env) throws IOException { 628 AssignmentManagerUtil.reopenRegionsForRollback(env, 629 Collections.singletonList((getParentRegion())), getRegionReplication(env), 630 getParentRegionServerName(env)); 631 } 632 633 /** 634 * Create daughter regions 635 */ 636 public void createDaughterRegions(final MasterProcedureEnv env) throws IOException { 637 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 638 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 639 final FileSystem fs = mfs.getFileSystem(); 640 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 641 env.getMasterConfiguration(), fs, tabledir, getParentRegion(), false); 642 regionFs.createSplitsDir(daughterOneRI, daughterTwoRI); 643 644 Pair<List<Path>, List<Path>> expectedReferences = splitStoreFiles(env, regionFs); 645 646 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 647 regionFs.getSplitsDir(daughterOneRI)); 648 regionFs.commitDaughterRegion(daughterOneRI, expectedReferences.getFirst(), env); 649 assertSplitResultFilesCount(fs, expectedReferences.getFirst().size(), 650 new Path(tabledir, daughterOneRI.getEncodedName())); 651 652 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 653 regionFs.getSplitsDir(daughterTwoRI)); 654 regionFs.commitDaughterRegion(daughterTwoRI, expectedReferences.getSecond(), env); 655 assertSplitResultFilesCount(fs, expectedReferences.getSecond().size(), 656 new Path(tabledir, daughterTwoRI.getEncodedName())); 657 } 658 659 private void deleteDaughterRegions(final MasterProcedureEnv env) throws IOException { 660 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 661 final Path tabledir = CommonFSUtils.getTableDir(mfs.getRootDir(), getTableName()); 662 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 663 tabledir, daughterOneRI); 664 HRegionFileSystem.deleteRegionFromFileSystem(env.getMasterConfiguration(), mfs.getFileSystem(), 665 tabledir, daughterTwoRI); 666 } 667 668 /** 669 * Create Split directory 670 * @param env MasterProcedureEnv 671 */ 672 private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env, 673 final HRegionFileSystem regionFs) throws IOException { 674 final Configuration conf = env.getMasterConfiguration(); 675 TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 676 // The following code sets up a thread pool executor with as many slots as 677 // there's files to split. It then fires up everything, waits for 678 // completion and finally checks for any exception 679 // 680 // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the 681 // table dir. In case of failure, the proc would go through this again, already existing 682 // region dirs and split files would just be ignored, new split files should get created. 683 int nbFiles = 0; 684 final Map<String, Collection<StoreFileInfo>> files = 685 new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount()); 686 for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) { 687 String family = cfd.getNameAsString(); 688 StoreFileTracker tracker = 689 StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs); 690 Collection<StoreFileInfo> sfis = tracker.load(); 691 if (sfis == null) { 692 continue; 693 } 694 Collection<StoreFileInfo> filteredSfis = null; 695 for (StoreFileInfo sfi : sfis) { 696 // Filter. There is a lag cleaning up compacted reference files. They get cleared 697 // after a delay in case outstanding Scanners still have references. Because of this, 698 // the listing of the Store content may have straggler reference files. Skip these. 699 // It should be safe to skip references at this point because we checked above with 700 // the region if it thinks it is splittable and if we are here, it thinks it is 701 // splitable. 702 if (sfi.isReference()) { 703 LOG.info("Skipping split of " + sfi + "; presuming ready for archiving."); 704 continue; 705 } 706 if (filteredSfis == null) { 707 filteredSfis = new ArrayList<StoreFileInfo>(sfis.size()); 708 files.put(family, filteredSfis); 709 } 710 filteredSfis.add(sfi); 711 nbFiles++; 712 } 713 } 714 if (nbFiles == 0) { 715 // no file needs to be splitted. 716 return new Pair<>(Collections.emptyList(), Collections.emptyList()); 717 } 718 // Max #threads is the smaller of the number of storefiles or the default max determined above. 719 int maxThreads = Math.min( 720 conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, 721 conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), 722 nbFiles); 723 LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" 724 + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads); 725 final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, 726 new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true) 727 .setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build()); 728 final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles); 729 730 // Split each store file. 731 for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) { 732 byte[] familyName = Bytes.toBytes(e.getKey()); 733 final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName); 734 final Collection<StoreFileInfo> storeFiles = e.getValue(); 735 if (storeFiles != null && storeFiles.size() > 0) { 736 final Configuration storeConfiguration = 737 StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd); 738 for (StoreFileInfo storeFileInfo : storeFiles) { 739 // As this procedure is running on master, use CacheConfig.DISABLED means 740 // don't cache any block. 741 // We also need to pass through a suitable CompoundConfiguration as if this 742 // is running in a regionserver's Store context, or we might not be able 743 // to read the hfiles. 744 storeFileInfo.setConf(storeConfiguration); 745 StoreFileSplitter sfs = new StoreFileSplitter(regionFs, familyName, 746 new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED)); 747 futures.add(threadPool.submit(sfs)); 748 } 749 } 750 } 751 // Shutdown the pool 752 threadPool.shutdown(); 753 754 // Wait for all the tasks to finish. 755 // When splits ran on the RegionServer, how-long-to-wait-configuration was named 756 // hbase.regionserver.fileSplitTimeout. If set, use its value. 757 long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 758 conf.getLong("hbase.regionserver.fileSplitTimeout", 600000)); 759 try { 760 boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS); 761 if (stillRunning) { 762 threadPool.shutdownNow(); 763 // wait for the thread to shutdown completely. 764 while (!threadPool.isTerminated()) { 765 Thread.sleep(50); 766 } 767 throw new IOException( 768 "Took too long to split the" + " files and create the references, aborting split"); 769 } 770 } catch (InterruptedException e) { 771 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 772 } 773 774 List<Path> daughterA = new ArrayList<>(); 775 List<Path> daughterB = new ArrayList<>(); 776 // Look for any exception 777 for (Future<Pair<Path, Path>> future : futures) { 778 try { 779 Pair<Path, Path> p = future.get(); 780 if (p.getFirst() != null) { 781 daughterA.add(p.getFirst()); 782 } 783 if (p.getSecond() != null) { 784 daughterB.add(p.getSecond()); 785 } 786 } catch (InterruptedException e) { 787 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 788 } catch (ExecutionException e) { 789 throw new IOException(e); 790 } 791 } 792 793 if (LOG.isDebugEnabled()) { 794 LOG.debug("pid=" + getProcId() + " split storefiles for region " 795 + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA 796 + " storefiles, Daughter B: " + daughterB + " storefiles."); 797 } 798 return new Pair<>(daughterA, daughterB); 799 } 800 801 private void assertSplitResultFilesCount(final FileSystem fs, 802 final int expectedSplitResultFileCount, Path dir) throws IOException { 803 if (expectedSplitResultFileCount != 0) { 804 int resultFileCount = FSUtils.getRegionReferenceAndLinkFileCount(fs, dir); 805 if (expectedSplitResultFileCount != resultFileCount) { 806 throw new IOException("Failing split. Didn't have expected reference and HFileLink files" 807 + ", expected=" + expectedSplitResultFileCount + ", actual=" + resultFileCount); 808 } 809 } 810 } 811 812 private Pair<Path, Path> splitStoreFile(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) 813 throws IOException { 814 if (LOG.isDebugEnabled()) { 815 LOG.debug("pid=" + getProcId() + " splitting started for store file: " + sf.getPath() 816 + " for region: " + getParentRegion().getShortNameToLog()); 817 } 818 819 final byte[] splitRow = getSplitRow(); 820 final String familyName = Bytes.toString(family); 821 final Path path_first = 822 regionFs.splitStoreFile(this.daughterOneRI, familyName, sf, splitRow, false, splitPolicy); 823 final Path path_second = 824 regionFs.splitStoreFile(this.daughterTwoRI, familyName, sf, splitRow, true, splitPolicy); 825 if (LOG.isDebugEnabled()) { 826 LOG.debug("pid=" + getProcId() + " splitting complete for store file: " + sf.getPath() 827 + " for region: " + getParentRegion().getShortNameToLog()); 828 } 829 return new Pair<Path, Path>(path_first, path_second); 830 } 831 832 /** 833 * Utility class used to do the file splitting / reference writing in parallel instead of 834 * sequentially. 835 */ 836 private class StoreFileSplitter implements Callable<Pair<Path, Path>> { 837 private final HRegionFileSystem regionFs; 838 private final byte[] family; 839 private final HStoreFile sf; 840 841 /** 842 * Constructor that takes what it needs to split 843 * @param regionFs the file system 844 * @param family Family that contains the store file 845 * @param sf which file 846 */ 847 public StoreFileSplitter(HRegionFileSystem regionFs, byte[] family, HStoreFile sf) { 848 this.regionFs = regionFs; 849 this.sf = sf; 850 this.family = family; 851 } 852 853 @Override 854 public Pair<Path, Path> call() throws IOException { 855 return splitStoreFile(regionFs, family, sf); 856 } 857 } 858 859 /** 860 * Post split region actions before the Point-of-No-Return step 861 * @param env MasterProcedureEnv 862 **/ 863 private void preSplitRegionBeforeMETA(final MasterProcedureEnv env) 864 throws IOException, InterruptedException { 865 final List<Mutation> metaEntries = new ArrayList<Mutation>(); 866 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 867 if (cpHost != null) { 868 cpHost.preSplitBeforeMETAAction(getSplitRow(), metaEntries, getUser()); 869 try { 870 for (Mutation p : metaEntries) { 871 RegionInfo.parseRegionName(p.getRow()); 872 } 873 } catch (IOException e) { 874 LOG.error("pid=" + getProcId() + " row key of mutation from coprocessor not parsable as " 875 + "region name." + "Mutations from coprocessor should only for hbase:meta table."); 876 throw e; 877 } 878 } 879 } 880 881 /** 882 * Add daughter regions to META 883 * @param env MasterProcedureEnv 884 */ 885 private void updateMeta(final MasterProcedureEnv env) throws IOException { 886 env.getAssignmentManager().markRegionAsSplit(getParentRegion(), getParentRegionServerName(env), 887 daughterOneRI, daughterTwoRI); 888 } 889 890 /** 891 * Pre split region actions after the Point-of-No-Return step 892 * @param env MasterProcedureEnv 893 **/ 894 private void preSplitRegionAfterMETA(final MasterProcedureEnv env) 895 throws IOException, InterruptedException { 896 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 897 if (cpHost != null) { 898 cpHost.preSplitAfterMETAAction(getUser()); 899 } 900 } 901 902 /** 903 * Post split region actions 904 * @param env MasterProcedureEnv 905 **/ 906 private void postSplitRegion(final MasterProcedureEnv env) throws IOException { 907 final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost(); 908 if (cpHost != null) { 909 cpHost.postCompletedSplitRegionAction(daughterOneRI, daughterTwoRI, getUser()); 910 } 911 } 912 913 private ServerName getParentRegionServerName(final MasterProcedureEnv env) { 914 return env.getMasterServices().getAssignmentManager().getRegionStates() 915 .getRegionServerOfRegion(getParentRegion()); 916 } 917 918 private TransitRegionStateProcedure[] createUnassignProcedures(MasterProcedureEnv env) 919 throws IOException { 920 return AssignmentManagerUtil.createUnassignProceduresForSplitOrMerge(env, 921 Stream.of(getParentRegion()), getRegionReplication(env)); 922 } 923 924 private TransitRegionStateProcedure[] createAssignProcedures(MasterProcedureEnv env) 925 throws IOException { 926 List<RegionInfo> hris = new ArrayList<RegionInfo>(2); 927 hris.add(daughterOneRI); 928 hris.add(daughterTwoRI); 929 return AssignmentManagerUtil.createAssignProceduresForSplitDaughters(env, hris, 930 getRegionReplication(env), getParentRegionServerName(env)); 931 } 932 933 private int getRegionReplication(final MasterProcedureEnv env) throws IOException { 934 final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName()); 935 return htd.getRegionReplication(); 936 } 937 938 private void writeMaxSequenceIdFile(MasterProcedureEnv env) throws IOException { 939 MasterFileSystem fs = env.getMasterFileSystem(); 940 long maxSequenceId = WALSplitUtil.getMaxRegionSequenceId(env.getMasterConfiguration(), 941 getParentRegion(), fs::getFileSystem, fs::getWALFileSystem); 942 if (maxSequenceId > 0) { 943 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 944 getWALRegionDir(env, daughterOneRI), maxSequenceId); 945 WALSplitUtil.writeRegionSequenceIdFile(fs.getWALFileSystem(), 946 getWALRegionDir(env, daughterTwoRI), maxSequenceId); 947 } 948 } 949 950 @Override 951 protected boolean abort(MasterProcedureEnv env) { 952 // Abort means rollback. We can't rollback all steps. HBASE-18018 added abort to all 953 // Procedures. Here is a Procedure that has a PONR and cannot be aborted wants it enters this 954 // range of steps; what do we do for these should an operator want to cancel them? HBASE-20022. 955 return isRollbackSupported(getCurrentState()) ? super.abort(env) : false; 956 } 957}