001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import org.apache.hadoop.fs.Path; 022import org.apache.hadoop.hbase.ServerName; 023import org.apache.hadoop.hbase.master.SplitWALManager; 024import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 025import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 026import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 027import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 028import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; 029import org.apache.hadoop.hbase.util.RetryCounter; 030import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 036import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 037import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 038 039/** 040 * The procedure is to split a WAL. It will get an available region server and schedule a 041 * {@link SplitWALRemoteProcedure} to actually send the request to region server to split this WAL. 042 * It also check if the split wal task really succeed. If the WAL still exists, it will schedule 043 * another region server to split this WAL. 044 */ 045@InterfaceAudience.Private 046public class SplitWALProcedure 047 extends StateMachineProcedure<MasterProcedureEnv, MasterProcedureProtos.SplitWALState> 048 implements ServerProcedureInterface { 049 private static final Logger LOG = LoggerFactory.getLogger(SplitWALProcedure.class); 050 private String walPath; 051 private ServerName worker; 052 private ServerName crashedServer; 053 private RetryCounter retryCounter; 054 055 public SplitWALProcedure() { 056 } 057 058 public SplitWALProcedure(String walPath, ServerName crashedServer) { 059 this.walPath = walPath; 060 this.crashedServer = crashedServer; 061 } 062 063 @Override 064 protected Flow executeFromState(MasterProcedureEnv env, MasterProcedureProtos.SplitWALState state) 065 throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { 066 SplitWALManager splitWALManager = env.getMasterServices().getSplitWALManager(); 067 switch (state) { 068 case ACQUIRE_SPLIT_WAL_WORKER: 069 worker = splitWALManager.acquireSplitWALWorker(this); 070 setNextState(MasterProcedureProtos.SplitWALState.DISPATCH_WAL_TO_WORKER); 071 return Flow.HAS_MORE_STATE; 072 case DISPATCH_WAL_TO_WORKER: 073 assert worker != null; 074 addChildProcedure(new SplitWALRemoteProcedure(worker, crashedServer, walPath)); 075 setNextState(MasterProcedureProtos.SplitWALState.RELEASE_SPLIT_WORKER); 076 return Flow.HAS_MORE_STATE; 077 case RELEASE_SPLIT_WORKER: 078 boolean finished; 079 try { 080 finished = splitWALManager.isSplitWALFinished(walPath); 081 } catch (IOException ioe) { 082 if (retryCounter == null) { 083 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 084 } 085 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 086 LOG.warn("Failed to check whether splitting wal {} success, wait {} seconds to retry", 087 walPath, backoff / 1000, ioe); 088 setTimeout(Math.toIntExact(backoff)); 089 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 090 skipPersistence(); 091 throw new ProcedureSuspendedException(); 092 } 093 splitWALManager.releaseSplitWALWorker(worker, env.getProcedureScheduler()); 094 if (!finished) { 095 LOG.warn("Failed to split wal {} by server {}, retry...", walPath, worker); 096 setNextState(MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER); 097 return Flow.HAS_MORE_STATE; 098 } 099 ServerCrashProcedure.updateProgress(env, getParentProcId()); 100 return Flow.NO_MORE_STATE; 101 default: 102 throw new UnsupportedOperationException("unhandled state=" + state); 103 } 104 } 105 106 @Override 107 protected void rollbackState(MasterProcedureEnv env, 108 MasterProcedureProtos.SplitWALState splitOneWalState) throws IOException, InterruptedException { 109 if (splitOneWalState == getInitialState()) { 110 return; 111 } 112 throw new UnsupportedOperationException(); 113 } 114 115 @Override 116 protected MasterProcedureProtos.SplitWALState getState(int stateId) { 117 return MasterProcedureProtos.SplitWALState.forNumber(stateId); 118 } 119 120 @Override 121 protected int getStateId(MasterProcedureProtos.SplitWALState state) { 122 return state.getNumber(); 123 } 124 125 @Override 126 protected MasterProcedureProtos.SplitWALState getInitialState() { 127 return MasterProcedureProtos.SplitWALState.ACQUIRE_SPLIT_WAL_WORKER; 128 } 129 130 @Override 131 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 132 super.serializeStateData(serializer); 133 MasterProcedureProtos.SplitWALData.Builder builder = 134 MasterProcedureProtos.SplitWALData.newBuilder(); 135 builder.setWalPath(walPath).setCrashedServer(ProtobufUtil.toServerName(crashedServer)); 136 if (worker != null) { 137 builder.setWorker(ProtobufUtil.toServerName(worker)); 138 } 139 serializer.serialize(builder.build()); 140 } 141 142 @Override 143 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 144 super.deserializeStateData(serializer); 145 MasterProcedureProtos.SplitWALData data = 146 serializer.deserialize(MasterProcedureProtos.SplitWALData.class); 147 walPath = data.getWalPath(); 148 crashedServer = ProtobufUtil.toServerName(data.getCrashedServer()); 149 if (data.hasWorker()) { 150 worker = ProtobufUtil.toServerName(data.getWorker()); 151 } 152 } 153 154 @Override 155 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 156 setState(ProcedureProtos.ProcedureState.RUNNABLE); 157 env.getProcedureScheduler().addFront(this); 158 return false; 159 } 160 161 public String getWAL() { 162 return walPath; 163 } 164 165 public ServerName getWorker() { 166 return worker; 167 } 168 169 @Override 170 public ServerName getServerName() { 171 return this.crashedServer; 172 } 173 174 @Override 175 public boolean hasMetaTableRegion() { 176 return AbstractFSWALProvider.isMetaFile(new Path(walPath)); 177 } 178 179 @Override 180 public ServerOperationType getServerOperationType() { 181 return ServerOperationType.SPLIT_WAL; 182 } 183 184 @Override 185 protected void afterReplay(MasterProcedureEnv env) { 186 if (worker != null) { 187 if ( 188 env != null && env.getMasterServices() != null 189 && env.getMasterServices().getSplitWALManager() != null 190 ) { 191 env.getMasterServices().getSplitWALManager().addUsedSplitWALWorker(worker); 192 } 193 } 194 } 195 196 @Override 197 protected void toStringClassDetails(StringBuilder builder) { 198 builder.append(getProcName()); 199 if (this.worker != null) { 200 builder.append(", worker="); 201 builder.append(this.worker); 202 } 203 if (this.retryCounter != null) { 204 builder.append(", retry="); 205 builder.append(this.retryCounter); 206 } 207 } 208 209 @Override 210 public String getProcName() { 211 return getClass().getSimpleName() + " " + getWALNameFromStrPath(getWAL()); 212 } 213 214 /** 215 * @return Return the WAL filename when given a Path-as-a-string; i.e. return the last path 216 * component only. 217 */ 218 static String getWALNameFromStrPath(String path) { 219 int slashIndex = path.lastIndexOf('/'); 220 return slashIndex != -1 ? path.substring(slashIndex + 1) : path; 221 } 222}