001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.actions; 019 020import java.io.IOException; 021import java.util.Arrays; 022import java.util.List; 023import java.util.Random; 024import java.util.concurrent.ThreadLocalRandom; 025import org.apache.hadoop.hbase.ServerName; 026import org.apache.hadoop.hbase.util.RegionMover; 027import org.apache.hadoop.util.Shell; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031/** 032 * Gracefully restarts every regionserver in a rolling fashion. At each step, it unloads, restarts 033 * the loads every rs server sleeping randomly (0-sleepTime) in between servers. 034 */ 035public class GracefulRollingRestartRsAction extends RestartActionBaseAction { 036 private static final Logger LOG = LoggerFactory.getLogger(GracefulRollingRestartRsAction.class); 037 038 public GracefulRollingRestartRsAction(long sleepTime) { 039 super(sleepTime); 040 } 041 042 @Override 043 protected Logger getLogger() { 044 return LOG; 045 } 046 047 @Override 048 public void perform() throws Exception { 049 getLogger().info("Performing action: Rolling restarting non-master region servers"); 050 List<ServerName> selectedServers = selectServers(); 051 getLogger().info("Disabling balancer to make unloading possible"); 052 setBalancer(false, true); 053 Random rand = ThreadLocalRandom.current(); 054 for (ServerName server : selectedServers) { 055 String rsName = server.getAddress().toString(); 056 try ( 057 RegionMover rm = new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) { 058 getLogger().info("Unloading {}", server); 059 rm.unload(); 060 getLogger().info("Restarting {}", server); 061 gracefulRestartRs(server, sleepTime); 062 getLogger().info("Loading {}", server); 063 rm.load(); 064 } catch (Shell.ExitCodeException e) { 065 getLogger().info("Problem restarting but presume successful; code={}", e.getExitCode(), e); 066 } catch (Exception e) { 067 getLogger().info("Exception but continuing...", e); 068 } 069 sleep(rand.nextInt((int) sleepTime)); 070 } 071 getLogger().info("Enabling balancer"); 072 setBalancer(true, true); 073 } 074 075 protected List<ServerName> selectServers() throws IOException { 076 return Arrays.asList(getCurrentServers()); 077 } 078}