001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.chaos.actions; 019 020import java.util.Collections; 021import java.util.List; 022import java.util.Optional; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.ServerName; 025import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper; 026import org.apache.hadoop.hbase.zookeeper.ZKUtil; 027import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 028import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 029import org.apache.hadoop.hdfs.DFSUtil; 030import org.apache.hadoop.hdfs.DistributedFileSystem; 031import org.apache.hadoop.hdfs.HAUtil; 032import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo; 033import org.slf4j.Logger; 034import org.slf4j.LoggerFactory; 035 036/** 037 * Action that tries to restart the active namenode. 038 */ 039public class RestartActiveNameNodeAction extends RestartActionBaseAction { 040 private static final Logger LOG = LoggerFactory.getLogger(RestartActiveNameNodeAction.class); 041 042 // Value taken from org.apache.hadoop.ha.ActiveStandbyElector.java, variable :- LOCK_FILENAME 043 private static final String ACTIVE_NN_LOCK_NAME = "ActiveStandbyElectorLock"; 044 045 // Value taken from org.apache.hadoop.ha.ZKFailoverController.java 046 // variable :- ZK_PARENT_ZNODE_DEFAULT and ZK_PARENT_ZNODE_KEY 047 private static final String ZK_PARENT_ZNODE_DEFAULT = "/hadoop-ha"; 048 private static final String ZK_PARENT_ZNODE_KEY = "ha.zookeeper.parent-znode"; 049 050 public RestartActiveNameNodeAction(long sleepTime) { 051 super(sleepTime); 052 } 053 054 @Override 055 protected Logger getLogger() { 056 return LOG; 057 } 058 059 @Override 060 public void perform() throws Exception { 061 getLogger().info("Performing action: Restart active namenode"); 062 063 final String hadoopHAZkNode; 064 String activeNamenode = null; 065 int activeNamenodePort = -1; 066 try (final DistributedFileSystem dfs = HdfsActionUtils.createDfs(getConf())) { 067 final Configuration conf = dfs.getConf(); 068 hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT); 069 final String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf); 070 071 if (!HAUtil.isHAEnabled(conf, nameServiceID)) { 072 getLogger().info("HA for HDFS is not enabled; skipping"); 073 return; 074 } 075 try (final ZKWatcher zkw = new ZKWatcher(conf, "get-active-namenode", null)) { 076 final RecoverableZooKeeper rzk = zkw.getRecoverableZooKeeper(); 077 // If hadoopHAZkNode == '/', pass '' instead because then joinZNode will return '//' as a 078 // prefix 079 // which zk doesn't like as a prefix on the path. 080 final String hadoopHAZkNodePath = ZNodePaths.joinZNode( 081 (hadoopHAZkNode != null && hadoopHAZkNode.equals("/")) ? "" : hadoopHAZkNode, 082 nameServiceID); 083 final List<String> subChildren = 084 Optional.ofNullable(ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath)) 085 .orElse(Collections.emptyList()); 086 for (final String eachEntry : subChildren) { 087 if (!eachEntry.contains(ACTIVE_NN_LOCK_NAME)) { 088 continue; 089 } 090 byte[] data = 091 rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false, null); 092 ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data); 093 activeNamenode = proto.getHostname(); 094 activeNamenodePort = proto.getPort(); 095 } 096 } 097 } 098 099 if (activeNamenode == null) { 100 getLogger().info("No active Name node found in zookeeper under '{}'", hadoopHAZkNode); 101 return; 102 } 103 104 getLogger().info("Found Active NameNode host: {}", activeNamenode); 105 final ServerName activeNNHost = ServerName.valueOf(activeNamenode, activeNamenodePort, -1L); 106 getLogger().info("Restarting Active NameNode: {}", activeNamenode); 107 restartNameNode(activeNNHost, this.sleepTime); 108 } 109}