001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.actions; 019 020import java.io.IOException; 021import org.apache.hadoop.hbase.ServerName; 022import org.apache.hadoop.hbase.util.Threads; 023 024/** 025 * Base class for restarting HBaseServer's 026 */ 027public abstract class RestartActionBaseAction extends Action { 028 long sleepTime; // how long should we sleep 029 030 public RestartActionBaseAction(long sleepTime) { 031 this.sleepTime = sleepTime; 032 } 033 034 void sleep(long sleepTime) { 035 getLogger().info("Sleeping for:" + sleepTime); 036 Threads.sleep(sleepTime); 037 } 038 039 void restartMaster(ServerName server, long sleepTime) throws IOException { 040 sleepTime = Math.max(sleepTime, 1000); 041 // Don't try the kill if we're stopping 042 if (context.isStopping()) { 043 return; 044 } 045 046 getLogger().info("Killing master: {}", server); 047 killMaster(server); 048 sleep(sleepTime); 049 getLogger().info("Starting master: {}", server); 050 startMaster(server); 051 } 052 053 /** 054 * Stop and then restart the region server instead of killing it. 055 * @param server hostname to restart the regionserver on 056 * @param sleepTime number of milliseconds between stop and restart 057 * @throws IOException if something goes wrong 058 */ 059 void gracefulRestartRs(ServerName server, long sleepTime) throws IOException { 060 sleepTime = Math.max(sleepTime, 1000); 061 // Don't try the stop if we're stopping already 062 if (context.isStopping()) { 063 return; 064 } 065 getLogger().info("Stopping region server: {}", server); 066 stopRs(server); 067 sleep(sleepTime); 068 getLogger().info("Starting region server: {}", server); 069 startRs(server); 070 } 071 072 void restartRs(ServerName server, long sleepTime) throws IOException { 073 sleepTime = Math.max(sleepTime, 1000); 074 // Don't try the kill if we're stopping 075 if (context.isStopping()) { 076 return; 077 } 078 getLogger().info("Killing region server: {}", server); 079 killRs(server); 080 sleep(sleepTime); 081 getLogger().info("Starting region server: {}", server); 082 startRs(server); 083 // Sleep some time to make sure RS is online. 084 sleep(sleepTime); 085 } 086 087 void restartZKNode(ServerName server, long sleepTime) throws IOException { 088 sleepTime = Math.max(sleepTime, 1000); 089 // Don't try the kill if we're stopping 090 if (context.isStopping()) { 091 return; 092 } 093 getLogger().info("Killing zookeeper node: {}", server); 094 killZKNode(server); 095 sleep(sleepTime); 096 getLogger().info("Starting zookeeper node: {}", server); 097 startZKNode(server); 098 } 099 100 void restartDataNode(ServerName server, long sleepTime) throws IOException { 101 sleepTime = Math.max(sleepTime, 1000); 102 // Don't try the kill if we're stopping 103 if (context.isStopping()) { 104 return; 105 } 106 getLogger().info("Killing data node: {}", server); 107 killDataNode(server); 108 sleep(sleepTime); 109 getLogger().info("Starting data node: {}", server); 110 startDataNode(server); 111 } 112 113 void restartNameNode(ServerName server, long sleepTime) throws IOException { 114 sleepTime = Math.max(sleepTime, 1000); 115 // Don't try the kill if we're stopping 116 if (context.isStopping()) { 117 return; 118 } 119 getLogger().info("Killing name node: {}", server); 120 killNameNode(server); 121 sleep(sleepTime); 122 getLogger().info("Starting name node: {}", server); 123 startNameNode(server); 124 } 125 126 void restartJournalNode(ServerName server, long sleepTime) throws IOException { 127 sleepTime = Math.max(sleepTime, 1000); 128 // Don't try the kill if we're stopping 129 if (context.isStopping()) { 130 return; 131 } 132 getLogger().info("Killing journal node: {}", server); 133 killJournalNode(server); 134 sleep(sleepTime); 135 getLogger().info("Starting journal node: {}", server); 136 startJournalNode(server); 137 } 138}