001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.Closeable; 021import java.io.IOException; 022import org.apache.hadoop.conf.Configurable; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.client.RegionInfoBuilder; 025import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 026import org.apache.hadoop.hbase.util.Threads; 027import org.apache.yetus.audience.InterfaceAudience; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031/** 032 * This class defines methods that can help with managing HBase clusters from unit tests and system 033 * tests. There are 3 types of cluster deployments: 034 * <ul> 035 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads, used by unit 036 * tests</li> 037 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can 038 * interact with the cluster.</li> 039 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate JVMs. 040 * </li> 041 * </ul> 042 * <p> 043 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can be run 044 * against a mini-cluster during unit test execution, or a distributed cluster having tens/hundreds 045 * of nodes during execution of integration tests. 046 * <p> 047 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume 048 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, and 049 * some tests will still need to mock stuff and introspect internal state. For those use cases from 050 * unit tests, or if more control is needed, you can use the subclasses directly. In that sense, 051 * this class does not abstract away <strong>every</strong> interface that MiniHBaseCluster or 052 * DistributedHBaseCluster provide. 053 * @deprecated since 3.0.0, will be removed in 4.0.0. Use 054 * {@link org.apache.hadoop.hbase.testing.TestingHBaseCluster} instead. 055 */ 056@InterfaceAudience.Public 057@Deprecated 058public abstract class HBaseCluster implements Closeable, Configurable { 059 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope 060 static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName()); 061 protected Configuration conf; 062 063 /** the status of the cluster before we begin */ 064 protected ClusterMetrics initialClusterStatus; 065 066 /** 067 * Construct an HBaseCluster 068 * @param conf Configuration to be used for cluster 069 */ 070 public HBaseCluster(Configuration conf) { 071 setConf(conf); 072 } 073 074 @Override 075 public void setConf(Configuration conf) { 076 this.conf = conf; 077 } 078 079 @Override 080 public Configuration getConf() { 081 return conf; 082 } 083 084 /** 085 * Returns a ClusterMetrics for this HBase cluster. 086 * @see #getInitialClusterMetrics() 087 */ 088 public abstract ClusterMetrics getClusterMetrics() throws IOException; 089 090 /** 091 * Returns a ClusterStatus for this HBase cluster as observed at the starting of the HBaseCluster 092 */ 093 public ClusterMetrics getInitialClusterMetrics() throws IOException { 094 return initialClusterStatus; 095 } 096 097 /** 098 * Starts a new region server on the given hostname or if this is a mini/local cluster, starts a 099 * region server locally. 100 * @param hostname the hostname to start the regionserver on 101 * @throws IOException if something goes wrong 102 */ 103 public abstract void startRegionServer(String hostname, int port) throws IOException; 104 105 /** 106 * Kills the region server process if this is a distributed cluster, otherwise this causes the 107 * region server to exit doing basic clean up only. 108 * @throws IOException if something goes wrong 109 */ 110 public abstract void killRegionServer(ServerName serverName) throws IOException; 111 112 /** 113 * Keeping track of killed servers and being able to check if a particular server was killed makes 114 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete 115 * example of such case is - killing servers and waiting for all regions of a particular table to 116 * be assigned. We can check for server column in META table and that its value is not one of the 117 * killed servers. 118 */ 119 public abstract boolean isKilledRS(ServerName serverName); 120 121 /** 122 * Stops the given region server, by attempting a gradual stop. 123 * @throws IOException if something goes wrong 124 */ 125 public abstract void stopRegionServer(ServerName serverName) throws IOException; 126 127 /** 128 * Wait for the specified region server to join the cluster 129 * @throws IOException if something goes wrong or timeout occurs 130 */ 131 public void waitForRegionServerToStart(String hostname, int port, long timeout) 132 throws IOException { 133 long start = EnvironmentEdgeManager.currentTime(); 134 while ((EnvironmentEdgeManager.currentTime() - start) < timeout) { 135 for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) { 136 if (server.getHostname().equals(hostname) && server.getPort() == port) { 137 return; 138 } 139 } 140 Threads.sleep(100); 141 } 142 throw new IOException( 143 "did timeout " + timeout + "ms waiting for region server to start: " + hostname); 144 } 145 146 /** 147 * Wait for the specified region server to stop the thread / process. 148 * @throws IOException if something goes wrong or timeout occurs 149 */ 150 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) 151 throws IOException; 152 153 /** 154 * Suspend the region server 155 * @param serverName the hostname to suspend the regionserver on 156 * @throws IOException if something goes wrong 157 */ 158 public abstract void suspendRegionServer(ServerName serverName) throws IOException; 159 160 /** 161 * Resume the region server 162 * @param serverName the hostname to resume the regionserver on 163 * @throws IOException if something goes wrong 164 */ 165 public abstract void resumeRegionServer(ServerName serverName) throws IOException; 166 167 /** 168 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, silently 169 * logs warning message. 170 * @param hostname the hostname to start the regionserver on 171 * @throws IOException if something goes wrong 172 */ 173 public abstract void startZkNode(String hostname, int port) throws IOException; 174 175 /** 176 * Kills the zookeeper node process if this is a distributed cluster, otherwise, this causes 177 * master to exit doing basic clean up only. 178 * @throws IOException if something goes wrong 179 */ 180 public abstract void killZkNode(ServerName serverName) throws IOException; 181 182 /** 183 * Stops the region zookeeper if this is a distributed cluster, otherwise silently logs warning 184 * message. 185 * @throws IOException if something goes wrong 186 */ 187 public abstract void stopZkNode(ServerName serverName) throws IOException; 188 189 /** 190 * Wait for the specified zookeeper node to join the cluster 191 * @throws IOException if something goes wrong or timeout occurs 192 */ 193 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException; 194 195 /** 196 * Wait for the specified zookeeper node to stop the thread / process. 197 * @throws IOException if something goes wrong or timeout occurs 198 */ 199 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException; 200 201 /** 202 * Starts a new datanode on the given hostname or if this is a mini/local cluster, silently logs 203 * warning message. 204 * @throws IOException if something goes wrong 205 */ 206 public abstract void startDataNode(ServerName serverName) throws IOException; 207 208 /** 209 * Kills the datanode process if this is a distributed cluster, otherwise, this causes master to 210 * exit doing basic clean up only. 211 * @throws IOException if something goes wrong 212 */ 213 public abstract void killDataNode(ServerName serverName) throws IOException; 214 215 /** 216 * Stops the datanode if this is a distributed cluster, otherwise silently logs warning message. 217 * @throws IOException if something goes wrong 218 */ 219 public abstract void stopDataNode(ServerName serverName) throws IOException; 220 221 /** 222 * Wait for the specified datanode to join the cluster 223 * @throws IOException if something goes wrong or timeout occurs 224 */ 225 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout) 226 throws IOException; 227 228 /** 229 * Wait for the specified datanode to stop the thread / process. 230 * @throws IOException if something goes wrong or timeout occurs 231 */ 232 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout) 233 throws IOException; 234 235 /** 236 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs 237 * warning message. 238 * @throws IOException if something goes wrong 239 */ 240 public abstract void startNameNode(ServerName serverName) throws IOException; 241 242 /** 243 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to 244 * exit doing basic clean up only. 245 * @throws IOException if something goes wrong 246 */ 247 public abstract void killNameNode(ServerName serverName) throws IOException; 248 249 /** 250 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message. 251 * @throws IOException if something goes wrong 252 */ 253 public abstract void stopNameNode(ServerName serverName) throws IOException; 254 255 /** 256 * Wait for the specified namenode to join the cluster 257 * @throws IOException if something goes wrong or timeout occurs 258 */ 259 public abstract void waitForNameNodeToStart(ServerName serverName, long timeout) 260 throws IOException; 261 262 /** 263 * Wait for the specified namenode to stop 264 * @throws IOException if something goes wrong or timeout occurs 265 */ 266 public abstract void waitForNameNodeToStop(ServerName serverName, long timeout) 267 throws IOException; 268 269 /** 270 * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master 271 * locally. 272 * @param hostname the hostname to start the master on 273 * @throws IOException if something goes wrong 274 */ 275 public abstract void startMaster(String hostname, int port) throws IOException; 276 277 /** 278 * Kills the master process if this is a distributed cluster, otherwise, this causes master to 279 * exit doing basic clean up only. 280 * @throws IOException if something goes wrong 281 */ 282 public abstract void killMaster(ServerName serverName) throws IOException; 283 284 /** 285 * Stops the given master, by attempting a gradual stop. 286 * @throws IOException if something goes wrong 287 */ 288 public abstract void stopMaster(ServerName serverName) throws IOException; 289 290 /** 291 * Wait for the specified master to stop the thread / process. 292 * @throws IOException if something goes wrong or timeout occurs 293 */ 294 public abstract void waitForMasterToStop(ServerName serverName, long timeout) throws IOException; 295 296 /** 297 * Blocks until there is an active master and that master has completed initialization. 298 * @return true if an active master becomes available. false if there are no masters left. 299 * @throws IOException if something goes wrong or timeout occurs 300 */ 301 public boolean waitForActiveAndReadyMaster() throws IOException { 302 return waitForActiveAndReadyMaster(Long.MAX_VALUE); 303 } 304 305 /** 306 * Blocks until there is an active master and that master has completed initialization. 307 * @param timeout the timeout limit in ms 308 * @return true if an active master becomes available. false if there are no masters left. 309 */ 310 public abstract boolean waitForActiveAndReadyMaster(long timeout) throws IOException; 311 312 /** 313 * Wait for HBase Cluster to shut down. 314 */ 315 public abstract void waitUntilShutDown() throws IOException; 316 317 /** 318 * Shut down the HBase cluster 319 */ 320 public abstract void shutdown() throws IOException; 321 322 /** 323 * Restores the cluster to it's initial state if this is a real cluster, otherwise does nothing. 324 * This is a best effort restore. If the servers are not reachable, or insufficient permissions, 325 * etc. restoration might be partial. 326 * @return whether restoration is complete 327 */ 328 public boolean restoreInitialStatus() throws IOException { 329 return restoreClusterMetrics(getInitialClusterMetrics()); 330 } 331 332 /** 333 * Restores the cluster to given state if this is a real cluster, otherwise does nothing. This is 334 * a best effort restore. If the servers are not reachable, or insufficient permissions, etc. 335 * restoration might be partial. 336 * @return whether restoration is complete 337 */ 338 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException { 339 return true; 340 } 341 342 /** 343 * Get the ServerName of region server serving the first hbase:meta region 344 */ 345 public ServerName getServerHoldingMeta() throws IOException { 346 return getServerHoldingRegion(TableName.META_TABLE_NAME, 347 RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName()); 348 } 349 350 /** 351 * Get the ServerName of region server serving the specified region 352 * @param regionName Name of the region in bytes 353 * @param tn Table name that has the region. 354 * @return ServerName that hosts the region or null 355 */ 356 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName) 357 throws IOException; 358 359 /** 360 * @return whether we are interacting with a distributed cluster as opposed to an in-process 361 * mini/local cluster. 362 */ 363 public boolean isDistributedCluster() { 364 return false; 365 } 366 367 /** 368 * Closes all the resources held open for this cluster. Note that this call does not shutdown the 369 * cluster. 370 * @see #shutdown() 371 */ 372 @Override 373 public abstract void close() throws IOException; 374 375 /** 376 * Wait for the namenode. 377 */ 378 public void waitForNamenodeAvailable() throws InterruptedException { 379 } 380 381 public void waitForDatanodesRegistered(int nbDN) throws Exception { 382 } 383}