001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.factories; 019 020import org.apache.hadoop.hbase.chaos.actions.Action; 021import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction; 022import org.apache.hadoop.hbase.chaos.actions.DumpHdfsClusterStatusAction; 023import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction; 024import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction; 025import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction; 026import org.apache.hadoop.hbase.chaos.actions.RestartActiveNameNodeAction; 027import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction; 028import org.apache.hadoop.hbase.chaos.actions.RestartRandomJournalNodeAction; 029import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction; 030import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction; 031import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction; 032import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction; 033import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey; 034import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; 035import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy; 036import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy; 037import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy; 038 039/** 040 * Creates ChaosMonkeys for doing server restart actions, but not flush / compact / snapshot kind of 041 * actions. 042 */ 043public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory { 044 045 private long restartRandomRsExceptMetaSleepTime; 046 private long restartActiveMasterSleepTime; 047 private long rollingBatchRestartRSSleepTime; 048 private long restartActiveNameNodeSleepTime; 049 private long restartRandomDataNodeSleepTime; 050 private long restartRandomJournalNodeSleepTime; 051 private long restartRandomZKNodeSleepTime; 052 private long gracefulRollingRestartTSSLeepTime; 053 private long rollingBatchSuspendRSSleepTime; 054 private float rollingBatchSuspendtRSRatio; 055 private long action1Period; 056 057 @Override 058 public ChaosMonkey build() { 059 loadProperties(); 060 061 // Destructive actions to mess things around. Cannot run batch restart. 062 // @formatter:off 063 Action[] actions1 = new Action[] { 064 new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime), 065 new RestartActiveMasterAction(restartActiveMasterSleepTime), 066 // only allow 2 servers to be dead. 067 new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2, true), 068 new ForceBalancerAction(), 069 new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime), 070 new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime), 071 new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime), 072 new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime), 073 new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), 074 new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, 075 rollingBatchSuspendtRSRatio) 076 }; 077 // @formatter:on 078 079 // Action to log more info for debugging 080 Action[] actions2 = 081 new Action[] { new DumpClusterStatusAction(), new DumpHdfsClusterStatusAction() }; 082 083 return new PolicyBasedChaosMonkey(properties, util, 084 new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period, actions1), 085 new PeriodicRandomActionPolicy(action1Period, actions1)), 086 new PeriodicRandomActionPolicy(action1Period, actions2)); 087 } 088 089 private void loadProperties() { 090 restartRandomRsExceptMetaSleepTime = Long 091 .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME, 092 MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + "")); 093 restartActiveMasterSleepTime = 094 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME, 095 MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + "")); 096 rollingBatchRestartRSSleepTime = Long 097 .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME, 098 MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + "")); 099 restartActiveNameNodeSleepTime = 100 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME, 101 MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + "")); 102 restartRandomDataNodeSleepTime = 103 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME, 104 MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + "")); 105 restartRandomJournalNodeSleepTime = Long 106 .parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME, 107 MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + "")); 108 restartRandomZKNodeSleepTime = 109 Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME, 110 MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + "")); 111 gracefulRollingRestartTSSLeepTime = 112 Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME, 113 MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + "")); 114 rollingBatchSuspendRSSleepTime = Long 115 .parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME, 116 MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME + "")); 117 rollingBatchSuspendtRSRatio = 118 Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO, 119 MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + "")); 120 action1Period = 121 Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD, 122 MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + "")); 123 } 124}