001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.actions;
019
020import java.io.IOException;
021import org.apache.hadoop.hbase.ServerName;
022import org.apache.hadoop.hbase.util.Threads;
023
024/**
025 * Base class for restarting HBaseServer's
026 */
027public abstract class RestartActionBaseAction extends Action {
028  long sleepTime; // how long should we sleep
029
030  public RestartActionBaseAction(long sleepTime) {
031    this.sleepTime = sleepTime;
032  }
033
034  void sleep(long sleepTime) {
035    getLogger().info("Sleeping for:" + sleepTime);
036    Threads.sleep(sleepTime);
037  }
038
039  void restartMaster(ServerName server, long sleepTime) throws IOException {
040    sleepTime = Math.max(sleepTime, 1000);
041    // Don't try the kill if we're stopping
042    if (context.isStopping()) {
043      return;
044    }
045
046    getLogger().info("Killing master: {}", server);
047    killMaster(server);
048    sleep(sleepTime);
049    getLogger().info("Starting master: {}", server);
050    startMaster(server);
051  }
052
053  /**
054   * Stop and then restart the region server instead of killing it.
055   * @param server    hostname to restart the regionserver on
056   * @param sleepTime number of milliseconds between stop and restart
057   * @throws IOException if something goes wrong
058   */
059  void gracefulRestartRs(ServerName server, long sleepTime) throws IOException {
060    sleepTime = Math.max(sleepTime, 1000);
061    // Don't try the stop if we're stopping already
062    if (context.isStopping()) {
063      return;
064    }
065    getLogger().info("Stopping region server: {}", server);
066    stopRs(server);
067    sleep(sleepTime);
068    getLogger().info("Starting region server: {}", server);
069    startRs(server);
070  }
071
072  void restartRs(ServerName server, long sleepTime) throws IOException {
073    sleepTime = Math.max(sleepTime, 1000);
074    // Don't try the kill if we're stopping
075    if (context.isStopping()) {
076      return;
077    }
078    getLogger().info("Killing region server: {}", server);
079    killRs(server);
080    sleep(sleepTime);
081    getLogger().info("Starting region server: {}", server);
082    startRs(server);
083    // Sleep some time to make sure RS is online.
084    sleep(sleepTime);
085  }
086
087  void restartZKNode(ServerName server, long sleepTime) throws IOException {
088    sleepTime = Math.max(sleepTime, 1000);
089    // Don't try the kill if we're stopping
090    if (context.isStopping()) {
091      return;
092    }
093    getLogger().info("Killing zookeeper node: {}", server);
094    killZKNode(server);
095    sleep(sleepTime);
096    getLogger().info("Starting zookeeper node: {}", server);
097    startZKNode(server);
098  }
099
100  void restartDataNode(ServerName server, long sleepTime) throws IOException {
101    sleepTime = Math.max(sleepTime, 1000);
102    // Don't try the kill if we're stopping
103    if (context.isStopping()) {
104      return;
105    }
106    getLogger().info("Killing data node: {}", server);
107    killDataNode(server);
108    sleep(sleepTime);
109    getLogger().info("Starting data node: {}", server);
110    startDataNode(server);
111  }
112
113  void restartNameNode(ServerName server, long sleepTime) throws IOException {
114    sleepTime = Math.max(sleepTime, 1000);
115    // Don't try the kill if we're stopping
116    if (context.isStopping()) {
117      return;
118    }
119    getLogger().info("Killing name node: {}", server);
120    killNameNode(server);
121    sleep(sleepTime);
122    getLogger().info("Starting name node: {}", server);
123    startNameNode(server);
124  }
125
126  void restartJournalNode(ServerName server, long sleepTime) throws IOException {
127    sleepTime = Math.max(sleepTime, 1000);
128    // Don't try the kill if we're stopping
129    if (context.isStopping()) {
130      return;
131    }
132    getLogger().info("Killing journal node: {}", server);
133    killJournalNode(server);
134    sleep(sleepTime);
135    getLogger().info("Starting journal node: {}", server);
136    startJournalNode(server);
137  }
138}