001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.factories;
019
020import org.apache.hadoop.hbase.chaos.actions.Action;
021import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
022import org.apache.hadoop.hbase.chaos.actions.DumpHdfsClusterStatusAction;
023import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction;
024import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction;
025import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
026import org.apache.hadoop.hbase.chaos.actions.RestartActiveNameNodeAction;
027import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction;
028import org.apache.hadoop.hbase.chaos.actions.RestartRandomJournalNodeAction;
029import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
030import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction;
031import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction;
032import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction;
033import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
034import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
035import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
036import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
037import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
038
039/**
040 * Creates ChaosMonkeys for doing server restart actions, but not flush / compact / snapshot kind of
041 * actions.
042 */
043public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
044
045  private long restartRandomRsExceptMetaSleepTime;
046  private long restartActiveMasterSleepTime;
047  private long rollingBatchRestartRSSleepTime;
048  private long restartActiveNameNodeSleepTime;
049  private long restartRandomDataNodeSleepTime;
050  private long restartRandomJournalNodeSleepTime;
051  private long restartRandomZKNodeSleepTime;
052  private long gracefulRollingRestartTSSLeepTime;
053  private long rollingBatchSuspendRSSleepTime;
054  private float rollingBatchSuspendtRSRatio;
055  private long action1Period;
056
057  @Override
058  public ChaosMonkey build() {
059    loadProperties();
060
061    // Destructive actions to mess things around. Cannot run batch restart.
062    // @formatter:off
063    Action[] actions1 = new Action[] {
064      new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
065      new RestartActiveMasterAction(restartActiveMasterSleepTime),
066      // only allow 2 servers to be dead.
067      new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true),
068      new ForceBalancerAction(),
069      new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
070      new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
071      new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
072      new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
073      new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
074      new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
075          rollingBatchSuspendtRSRatio)
076    };
077    // @formatter:on
078
079    // Action to log more info for debugging
080    Action[] actions2 =
081      new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() };
082
083    return new PolicyBasedChaosMonkey(properties, util,
084      new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1),
085        new PeriodicRandomActionPolicy(action1Period, actions1)),
086      new PeriodicRandomActionPolicy(action1Period, actions2));
087  }
088
089  private void loadProperties() {
090    restartRandomRsExceptMetaSleepTime = Long
091      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
092        MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
093    restartActiveMasterSleepTime =
094      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
095        MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
096    rollingBatchRestartRSSleepTime = Long
097      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
098        MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
099    restartActiveNameNodeSleepTime =
100      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
101        MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
102    restartRandomDataNodeSleepTime =
103      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
104        MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
105    restartRandomJournalNodeSleepTime = Long
106      .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
107        MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
108    restartRandomZKNodeSleepTime =
109      Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
110        MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
111    gracefulRollingRestartTSSLeepTime =
112      Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
113        MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
114    rollingBatchSuspendRSSleepTime = Long
115      .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME,
116        MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME + ""));
117    rollingBatchSuspendtRSRatio =
118      Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
119        MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
120    action1Period =
121      Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
122        MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
123  }
124}