001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY; 021import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY; 022 023import java.time.Duration; 024import java.util.ArrayList; 025import java.util.Base64; 026import java.util.HashMap; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import java.util.function.Function; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.master.RegionPlan; 038import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042public final class CandidateGeneratorTestUtil { 043 044 private static final Logger LOG = LoggerFactory.getLogger(CandidateGeneratorTestUtil.class); 045 046 private CandidateGeneratorTestUtil() { 047 } 048 049 static void runBalancerToExhaustion(Configuration conf, 050 Map<ServerName, List<RegionInfo>> serverToRegions, 051 Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost) { 052 runBalancerToExhaustion(conf, serverToRegions, expectations, targetMaxBalancerCost, 15000); 053 } 054 055 static void runBalancerToExhaustion(Configuration conf, 056 Map<ServerName, List<RegionInfo>> serverToRegions, 057 Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost, 058 long maxRunningTime) { 059 // Do the full plan. We're testing with a lot of regions 060 conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true); 061 conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime); 062 063 conf.setFloat(MIN_COST_NEED_BALANCE_KEY, targetMaxBalancerCost); 064 065 BalancerClusterState cluster = createMockBalancerClusterState(serverToRegions); 066 StochasticLoadBalancer stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); 067 printClusterDistribution(cluster, 0); 068 int balancerRuns = 0; 069 int actionsTaken = 0; 070 long balancingMillis = 0; 071 boolean isBalanced = false; 072 while (!isBalanced) { 073 balancerRuns++; 074 if (balancerRuns > 1000) { 075 throw new RuntimeException("Balancer failed to find balance & meet expectations"); 076 } 077 long start = System.currentTimeMillis(); 078 List<RegionPlan> regionPlans = 079 stochasticLoadBalancer.balanceCluster(partitionRegionsByTable(serverToRegions)); 080 balancingMillis += System.currentTimeMillis() - start; 081 actionsTaken++; 082 if (regionPlans != null) { 083 // Apply all plans to serverToRegions 084 for (RegionPlan rp : regionPlans) { 085 ServerName source = rp.getSource(); 086 ServerName dest = rp.getDestination(); 087 RegionInfo region = rp.getRegionInfo(); 088 089 // Update serverToRegions 090 serverToRegions.get(source).remove(region); 091 serverToRegions.get(dest).add(region); 092 actionsTaken++; 093 } 094 095 // Now rebuild cluster and balancer from updated serverToRegions 096 cluster = createMockBalancerClusterState(serverToRegions); 097 stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf); 098 } 099 printClusterDistribution(cluster, actionsTaken); 100 isBalanced = true; 101 for (Function<BalancerClusterState, Boolean> condition : expectations) { 102 // Check if we've met all expectations for the candidate generator 103 if (!condition.apply(cluster)) { 104 isBalanced = false; 105 break; 106 } 107 } 108 if (isBalanced) { // Check if the balancer thinks we're done too 109 LOG.info("All balancer conditions passed. Checking if balancer thinks it's done."); 110 if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) { 111 LOG.info("Balancer would still like to run"); 112 isBalanced = false; 113 } else { 114 LOG.info("Balancer is done"); 115 } 116 } 117 } 118 LOG.info("Balancing took {}sec", Duration.ofMillis(balancingMillis).toMinutes()); 119 } 120 121 /** 122 * Prints the current cluster distribution of regions per table per server 123 */ 124 static void printClusterDistribution(BalancerClusterState cluster, long actionsTaken) { 125 LOG.info("=== Cluster Distribution after {} balancer actions taken ===", actionsTaken); 126 127 for (int i = 0; i < cluster.numServers; i++) { 128 int[] regions = cluster.regionsPerServer[i]; 129 int regionCount = (regions == null) ? 0 : regions.length; 130 131 LOG.info("Server {}: {} regions", cluster.servers[i].getServerName(), regionCount); 132 133 if (regionCount > 0) { 134 Map<TableName, Integer> tableRegionCounts = new HashMap<>(); 135 136 for (int regionIndex : regions) { 137 RegionInfo regionInfo = cluster.regions[regionIndex]; 138 TableName tableName = regionInfo.getTable(); 139 tableRegionCounts.put(tableName, tableRegionCounts.getOrDefault(tableName, 0) + 1); 140 } 141 142 tableRegionCounts 143 .forEach((table, count) -> LOG.info(" - Table {}: {} regions", table, count)); 144 } 145 } 146 147 LOG.info("==========================================="); 148 } 149 150 /** 151 * Partitions the given serverToRegions map by table The tables are derived from the RegionInfo 152 * objects found in serverToRegions. 153 * @param serverToRegions The map of servers to their assigned regions. 154 * @return A map of tables to their server-to-region assignments. 155 */ 156 public static Map<TableName, Map<ServerName, List<RegionInfo>>> 157 partitionRegionsByTable(Map<ServerName, List<RegionInfo>> serverToRegions) { 158 159 // First, gather all tables from the regions 160 Set<TableName> allTables = new HashSet<>(); 161 for (List<RegionInfo> regions : serverToRegions.values()) { 162 for (RegionInfo region : regions) { 163 allTables.add(region.getTable()); 164 } 165 } 166 167 Map<TableName, Map<ServerName, List<RegionInfo>>> tablesToServersToRegions = new HashMap<>(); 168 169 // Initialize each table with all servers mapped to empty lists 170 for (TableName table : allTables) { 171 Map<ServerName, List<RegionInfo>> serverMap = new HashMap<>(); 172 for (ServerName server : serverToRegions.keySet()) { 173 serverMap.put(server, new ArrayList<>()); 174 } 175 tablesToServersToRegions.put(table, serverMap); 176 } 177 178 // Distribute regions to their respective tables 179 for (Map.Entry<ServerName, List<RegionInfo>> serverAndRegions : serverToRegions.entrySet()) { 180 ServerName server = serverAndRegions.getKey(); 181 List<RegionInfo> regions = serverAndRegions.getValue(); 182 183 for (RegionInfo region : regions) { 184 TableName regionTable = region.getTable(); 185 // Now we know for sure regionTable is in allTables 186 Map<ServerName, List<RegionInfo>> tableServerMap = 187 tablesToServersToRegions.get(regionTable); 188 tableServerMap.get(server).add(region); 189 } 190 } 191 192 return tablesToServersToRegions; 193 } 194 195 static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState cluster, 196 Configuration conf) { 197 StochasticLoadBalancer stochasticLoadBalancer = 198 new StochasticLoadBalancer(new DummyMetricsStochasticBalancer()); 199 stochasticLoadBalancer.setClusterInfoProvider(new DummyClusterInfoProvider(conf)); 200 stochasticLoadBalancer.loadConf(conf); 201 stochasticLoadBalancer.initCosts(cluster); 202 return stochasticLoadBalancer; 203 } 204 205 static BalancerClusterState 206 createMockBalancerClusterState(Map<ServerName, List<RegionInfo>> serverToRegions) { 207 return new BalancerClusterState(serverToRegions, null, null, null, null); 208 } 209 210 /** 211 * Validates that each replica is isolated from its others. Ensures that no server hosts more than 212 * one replica of the same region (i.e., regions with identical start and end keys). 213 * @param cluster The current state of the cluster. 214 * @return true if all replicas are properly isolated, false otherwise. 215 */ 216 static boolean areAllReplicasDistributed(BalancerClusterState cluster) { 217 // Iterate over each server 218 for (int[] regionsPerServer : cluster.regionsPerServer) { 219 if (regionsPerServer == null || regionsPerServer.length == 0) { 220 continue; // Skip empty servers 221 } 222 223 Set<ReplicaKey> foundKeys = new HashSet<>(); 224 for (int regionIndex : regionsPerServer) { 225 RegionInfo regionInfo = cluster.regions[regionIndex]; 226 ReplicaKey replicaKey = new ReplicaKey(regionInfo); 227 if (foundKeys.contains(replicaKey)) { 228 // Violation: Multiple replicas of the same region on the same server 229 LOG.warn("Replica isolation violated: one server hosts multiple replicas of key [{}].", 230 generateRegionKey(regionInfo)); 231 return false; 232 } 233 234 foundKeys.add(replicaKey); 235 } 236 } 237 238 LOG.info( 239 "Replica isolation validation passed: No server hosts multiple replicas of the same region."); 240 return true; 241 } 242 243 /** 244 * Generic method to validate table isolation. 245 */ 246 static boolean isTableIsolated(BalancerClusterState cluster, TableName tableName, 247 String tableType) { 248 for (int i = 0; i < cluster.numServers; i++) { 249 int[] regionsOnServer = cluster.regionsPerServer[i]; 250 if (regionsOnServer == null || regionsOnServer.length == 0) { 251 continue; // Skip empty servers 252 } 253 254 boolean hasTargetTableRegion = false; 255 boolean hasOtherTableRegion = false; 256 257 for (int regionIndex : regionsOnServer) { 258 RegionInfo regionInfo = cluster.regions[regionIndex]; 259 if (regionInfo.getTable().equals(tableName)) { 260 hasTargetTableRegion = true; 261 } else { 262 hasOtherTableRegion = true; 263 } 264 265 // If the target table and any other table are on the same server, isolation is violated 266 if (hasTargetTableRegion && hasOtherTableRegion) { 267 LOG.debug( 268 "Server {} has both {} table regions and other table regions, violating isolation.", 269 cluster.servers[i].getServerName(), tableType); 270 return false; 271 } 272 } 273 } 274 LOG.debug("{} table isolation validation passed.", tableType); 275 return true; 276 } 277 278 /** 279 * Generates a unique key for a region based on its start and end keys. This method ensures that 280 * regions with identical start and end keys have the same key. 281 * @param regionInfo The RegionInfo object. 282 * @return A string representing the unique key of the region. 283 */ 284 private static String generateRegionKey(RegionInfo regionInfo) { 285 // Using Base64 encoding for byte arrays to ensure uniqueness and readability 286 String startKey = Base64.getEncoder().encodeToString(regionInfo.getStartKey()); 287 String endKey = Base64.getEncoder().encodeToString(regionInfo.getEndKey()); 288 289 return regionInfo.getTable().getNameAsString() + ":" + startKey + ":" + endKey; 290 } 291 292}