001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertNotEquals; 022import static org.junit.Assert.assertNotNull; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.util.Arrays; 027import java.util.EnumSet; 028import java.util.List; 029import java.util.Map; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.ClusterMetrics; 032import org.apache.hadoop.hbase.ClusterMetrics.Option; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.HConstants; 036import org.apache.hadoop.hbase.ServerName; 037import org.apache.hadoop.hbase.SingleProcessHBaseCluster; 038import org.apache.hadoop.hbase.TableName; 039import org.apache.hadoop.hbase.Waiter; 040import org.apache.hadoop.hbase.client.Admin; 041import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 046import org.apache.hadoop.hbase.favored.FavoredNodesManager; 047import org.apache.hadoop.hbase.master.HMaster; 048import org.apache.hadoop.hbase.master.LoadBalancer; 049import org.apache.hadoop.hbase.master.RackManager; 050import org.apache.hadoop.hbase.master.assignment.RegionStates; 051import org.apache.hadoop.hbase.regionserver.HRegion; 052import org.apache.hadoop.hbase.regionserver.HRegionServer; 053import org.apache.hadoop.hbase.regionserver.Region; 054import org.apache.hadoop.hbase.testclassification.LargeTests; 055import org.apache.hadoop.hbase.util.Bytes; 056import org.apache.hadoop.hbase.util.JVMClusterUtil; 057import org.junit.After; 058import org.junit.Before; 059import org.junit.BeforeClass; 060import org.junit.ClassRule; 061import org.junit.Rule; 062import org.junit.Test; 063import org.junit.experimental.categories.Category; 064import org.junit.rules.TestName; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 069import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 070 071@Category(LargeTests.class) 072public class TestFavoredStochasticBalancerPickers extends BalancerTestBase { 073 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestFavoredStochasticBalancerPickers.class); 077 078 private static final Logger LOG = 079 LoggerFactory.getLogger(TestFavoredStochasticBalancerPickers.class); 080 081 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 082 private static final int SLAVES = 6; 083 private static final int REGIONS = SLAVES * 3; 084 private static Configuration conf; 085 086 private Admin admin; 087 private SingleProcessHBaseCluster cluster; 088 089 @Rule 090 public TestName name = new TestName(); 091 092 @BeforeClass 093 public static void setupBeforeClass() throws Exception { 094 conf = TEST_UTIL.getConfiguration(); 095 // Enable favored nodes based load balancer 096 conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, 097 LoadOnlyFavoredStochasticBalancer.class, LoadBalancer.class); 098 conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 30000); 099 conf.setInt("hbase.master.balancer.stochastic.moveCost", 0); 100 conf.setBoolean("hbase.master.balancer.stochastic.execute.maxSteps", true); 101 } 102 103 @Before 104 public void startCluster() throws Exception { 105 TEST_UTIL.startMiniCluster(SLAVES); 106 TEST_UTIL.getDFSCluster().waitClusterUp(); 107 TEST_UTIL.getHBaseCluster().waitForActiveAndReadyMaster(120 * 1000); 108 cluster = TEST_UTIL.getMiniHBaseCluster(); 109 admin = TEST_UTIL.getAdmin(); 110 admin.balancerSwitch(false, true); 111 } 112 113 @After 114 public void stopCluster() throws Exception { 115 TEST_UTIL.cleanupTestDir(); 116 TEST_UTIL.shutdownMiniCluster(); 117 } 118 119 @Test 120 public void testPickers() throws Exception { 121 TableName tableName = TableName.valueOf(name.getMethodName()); 122 ColumnFamilyDescriptor columnFamilyDescriptor = 123 ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY).build(); 124 TableDescriptor desc = 125 TableDescriptorBuilder.newBuilder(tableName).setColumnFamily(columnFamilyDescriptor).build(); 126 admin.createTable(desc, Bytes.toBytes("aaa"), Bytes.toBytes("zzz"), REGIONS); 127 TEST_UTIL.waitUntilAllRegionsAssigned(tableName); 128 TEST_UTIL.loadTable(admin.getConnection().getTable(tableName), HConstants.CATALOG_FAMILY); 129 admin.flush(tableName); 130 131 HMaster master = cluster.getMaster(); 132 FavoredNodesManager fnm = master.getFavoredNodesManager(); 133 ServerName masterServerName = master.getServerName(); 134 List<ServerName> excludedServers = Lists.newArrayList(masterServerName); 135 final ServerName mostLoadedServer = getRSWithMaxRegions(tableName, excludedServers); 136 assertNotNull(mostLoadedServer); 137 int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size(); 138 excludedServers.add(mostLoadedServer); 139 // Lets find another server with more regions to calculate number of regions to move 140 ServerName source = getRSWithMaxRegions(tableName, excludedServers); 141 assertNotNull(source); 142 int regionsToMove = getTableRegionsFromServer(tableName, source).size() / 2; 143 144 // Since move only works if the target is part of favored nodes of the region, lets get all 145 // regions that are movable to mostLoadedServer 146 List<RegionInfo> hris = getRegionsThatCanBeMoved(tableName, mostLoadedServer); 147 RegionStates rst = master.getAssignmentManager().getRegionStates(); 148 for (int i = 0; i < regionsToMove; i++) { 149 final RegionInfo regionInfo = hris.get(i); 150 admin.move(regionInfo.getEncodedNameAsBytes(), mostLoadedServer); 151 LOG.info("Moving region: " + hris.get(i).getRegionNameAsString() + " to " + mostLoadedServer); 152 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 153 @Override 154 public boolean evaluate() throws Exception { 155 return ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), 156 mostLoadedServer); 157 } 158 }); 159 } 160 final int finalRegions = numRegions + regionsToMove; 161 TEST_UTIL.waitUntilNoRegionsInTransition(60000); 162 TEST_UTIL.waitFor(60000, new Waiter.Predicate<Exception>() { 163 @Override 164 public boolean evaluate() throws Exception { 165 int numRegions = getTableRegionsFromServer(tableName, mostLoadedServer).size(); 166 return (numRegions == finalRegions); 167 } 168 }); 169 TEST_UTIL.getHBaseCluster().startRegionServerAndWait(60000); 170 171 Map<ServerName, List<RegionInfo>> serverAssignments = Maps.newHashMap(); 172 ClusterMetrics status = admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS)); 173 for (ServerName sn : status.getLiveServerMetrics().keySet()) { 174 if (!ServerName.isSameAddress(sn, masterServerName)) { 175 serverAssignments.put(sn, getTableRegionsFromServer(tableName, sn)); 176 } 177 } 178 RegionHDFSBlockLocationFinder regionFinder = new RegionHDFSBlockLocationFinder(); 179 regionFinder.setClusterMetrics(admin.getClusterMetrics(EnumSet.of(Option.LIVE_SERVERS))); 180 regionFinder.setConf(conf); 181 regionFinder.setClusterInfoProvider( 182 new MasterClusterInfoProvider(TEST_UTIL.getMiniHBaseCluster().getMaster())); 183 BalancerClusterState cluster = 184 new BalancerClusterState(serverAssignments, null, regionFinder, new RackManager(conf)); 185 LoadOnlyFavoredStochasticBalancer balancer = (LoadOnlyFavoredStochasticBalancer) TEST_UTIL 186 .getMiniHBaseCluster().getMaster().getLoadBalancer().getInternalBalancer(); 187 188 cluster.sortServersByRegionCount(); 189 Integer[] servers = cluster.serverIndicesSortedByRegionCount; 190 LOG.info("Servers sorted by region count:" + Arrays.toString(servers)); 191 LOG.info("Cluster dump: " + cluster); 192 if (!mostLoadedServer.equals(cluster.servers[servers[servers.length - 1]])) { 193 LOG.error("Most loaded server: " + mostLoadedServer + " does not match: " 194 + cluster.servers[servers[servers.length - 1]]); 195 } 196 assertEquals(mostLoadedServer, cluster.servers[servers[servers.length - 1]]); 197 FavoredStochasticBalancer.FavoredNodeLoadPicker loadPicker = 198 balancer.new FavoredNodeLoadPicker(); 199 boolean userRegionPicked = false; 200 for (int i = 0; i < 100; i++) { 201 if (userRegionPicked) { 202 break; 203 } else { 204 BalanceAction action = loadPicker.generate(cluster); 205 if (action.getType() == BalanceAction.Type.MOVE_REGION) { 206 MoveRegionAction moveRegionAction = (MoveRegionAction) action; 207 RegionInfo region = cluster.regions[moveRegionAction.getRegion()]; 208 assertNotEquals(-1, moveRegionAction.getToServer()); 209 ServerName destinationServer = cluster.servers[moveRegionAction.getToServer()]; 210 assertEquals(cluster.servers[moveRegionAction.getFromServer()], mostLoadedServer); 211 if (!region.getTable().isSystemTable()) { 212 List<ServerName> favNodes = fnm.getFavoredNodes(region); 213 assertTrue(favNodes.contains(ServerName.valueOf(destinationServer.getAddress(), -1))); 214 userRegionPicked = true; 215 } 216 } 217 } 218 } 219 assertTrue("load picker did not pick expected regions in 100 iterations.", userRegionPicked); 220 } 221 222 /* 223 * A region can only be moved to one of its favored node. Hence this method helps us to get that 224 * list which makes it easy to write non-flaky tests. 225 */ 226 private List<RegionInfo> getRegionsThatCanBeMoved(TableName tableName, ServerName serverName) { 227 List<RegionInfo> regions = Lists.newArrayList(); 228 RegionStates rst = cluster.getMaster().getAssignmentManager().getRegionStates(); 229 FavoredNodesManager fnm = cluster.getMaster().getFavoredNodesManager(); 230 for (RegionInfo regionInfo : fnm.getRegionsOfFavoredNode(serverName)) { 231 if ( 232 regionInfo.getTable().equals(tableName) 233 && !ServerName.isSameAddress(rst.getRegionServerOfRegion(regionInfo), serverName) 234 ) { 235 regions.add(regionInfo); 236 } 237 } 238 return regions; 239 } 240 241 private List<RegionInfo> getTableRegionsFromServer(TableName tableName, ServerName source) 242 throws IOException { 243 List<RegionInfo> regionInfos = Lists.newArrayList(); 244 HRegionServer regionServer = cluster.getRegionServer(source); 245 for (Region region : regionServer.getRegions(tableName)) { 246 regionInfos.add(region.getRegionInfo()); 247 } 248 return regionInfos; 249 } 250 251 private ServerName getRSWithMaxRegions(TableName tableName, List<ServerName> excludeNodes) 252 throws IOException { 253 254 int maxRegions = 0; 255 ServerName maxLoadedServer = null; 256 257 for (JVMClusterUtil.RegionServerThread rst : cluster.getLiveRegionServerThreads()) { 258 List<HRegion> regions = rst.getRegionServer().getRegions(tableName); 259 LOG.debug("Server: " + rst.getRegionServer().getServerName() + " regions: " + regions.size()); 260 if (regions.size() > maxRegions) { 261 if ( 262 excludeNodes == null 263 || !doesMatchExcludeNodes(excludeNodes, rst.getRegionServer().getServerName()) 264 ) { 265 maxRegions = regions.size(); 266 maxLoadedServer = rst.getRegionServer().getServerName(); 267 } 268 } 269 } 270 return maxLoadedServer; 271 } 272 273 private boolean doesMatchExcludeNodes(List<ServerName> excludeNodes, ServerName sn) { 274 for (ServerName excludeSN : excludeNodes) { 275 if (ServerName.isSameAddress(sn, excludeSN)) { 276 return true; 277 } 278 } 279 return false; 280 } 281}