001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MAX_RUNNING_TIME_KEY;
021import static org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer.MIN_COST_NEED_BALANCE_KEY;
022
023import java.time.Duration;
024import java.util.ArrayList;
025import java.util.Base64;
026import java.util.HashMap;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import java.util.function.Function;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.ServerName;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.client.RegionInfo;
037import org.apache.hadoop.hbase.master.RegionPlan;
038import org.apache.hadoop.hbase.master.balancer.replicas.ReplicaKey;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042public final class CandidateGeneratorTestUtil {
043
044  private static final Logger LOG = LoggerFactory.getLogger(CandidateGeneratorTestUtil.class);
045
046  private CandidateGeneratorTestUtil() {
047  }
048
049  static void runBalancerToExhaustion(Configuration conf,
050    Map<ServerName, List<RegionInfo>> serverToRegions,
051    Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost) {
052    runBalancerToExhaustion(conf, serverToRegions, expectations, targetMaxBalancerCost, 15000);
053  }
054
055  static void runBalancerToExhaustion(Configuration conf,
056    Map<ServerName, List<RegionInfo>> serverToRegions,
057    Set<Function<BalancerClusterState, Boolean>> expectations, float targetMaxBalancerCost,
058    long maxRunningTime) {
059    // Do the full plan. We're testing with a lot of regions
060    conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
061    conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
062
063    conf.setFloat(MIN_COST_NEED_BALANCE_KEY, targetMaxBalancerCost);
064
065    BalancerClusterState cluster = createMockBalancerClusterState(serverToRegions);
066    StochasticLoadBalancer stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf);
067    printClusterDistribution(cluster, 0);
068    int balancerRuns = 0;
069    int actionsTaken = 0;
070    long balancingMillis = 0;
071    boolean isBalanced = false;
072    while (!isBalanced) {
073      balancerRuns++;
074      if (balancerRuns > 1000) {
075        throw new RuntimeException("Balancer failed to find balance & meet expectations");
076      }
077      long start = System.currentTimeMillis();
078      List<RegionPlan> regionPlans =
079        stochasticLoadBalancer.balanceCluster(partitionRegionsByTable(serverToRegions));
080      balancingMillis += System.currentTimeMillis() - start;
081      actionsTaken++;
082      if (regionPlans != null) {
083        // Apply all plans to serverToRegions
084        for (RegionPlan rp : regionPlans) {
085          ServerName source = rp.getSource();
086          ServerName dest = rp.getDestination();
087          RegionInfo region = rp.getRegionInfo();
088
089          // Update serverToRegions
090          serverToRegions.get(source).remove(region);
091          serverToRegions.get(dest).add(region);
092          actionsTaken++;
093        }
094
095        // Now rebuild cluster and balancer from updated serverToRegions
096        cluster = createMockBalancerClusterState(serverToRegions);
097        stochasticLoadBalancer = buildStochasticLoadBalancer(cluster, conf);
098      }
099      printClusterDistribution(cluster, actionsTaken);
100      isBalanced = true;
101      for (Function<BalancerClusterState, Boolean> condition : expectations) {
102        // Check if we've met all expectations for the candidate generator
103        if (!condition.apply(cluster)) {
104          isBalanced = false;
105          break;
106        }
107      }
108      if (isBalanced) { // Check if the balancer thinks we're done too
109        LOG.info("All balancer conditions passed. Checking if balancer thinks it's done.");
110        if (stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
111          LOG.info("Balancer would still like to run");
112          isBalanced = false;
113        } else {
114          LOG.info("Balancer is done");
115        }
116      }
117    }
118    LOG.info("Balancing took {}sec", Duration.ofMillis(balancingMillis).toMinutes());
119  }
120
121  /**
122   * Prints the current cluster distribution of regions per table per server
123   */
124  static void printClusterDistribution(BalancerClusterState cluster, long actionsTaken) {
125    LOG.info("=== Cluster Distribution after {} balancer actions taken ===", actionsTaken);
126
127    for (int i = 0; i < cluster.numServers; i++) {
128      int[] regions = cluster.regionsPerServer[i];
129      int regionCount = (regions == null) ? 0 : regions.length;
130
131      LOG.info("Server {}: {} regions", cluster.servers[i].getServerName(), regionCount);
132
133      if (regionCount > 0) {
134        Map<TableName, Integer> tableRegionCounts = new HashMap<>();
135
136        for (int regionIndex : regions) {
137          RegionInfo regionInfo = cluster.regions[regionIndex];
138          TableName tableName = regionInfo.getTable();
139          tableRegionCounts.put(tableName, tableRegionCounts.getOrDefault(tableName, 0) + 1);
140        }
141
142        tableRegionCounts
143          .forEach((table, count) -> LOG.info("  - Table {}: {} regions", table, count));
144      }
145    }
146
147    LOG.info("===========================================");
148  }
149
150  /**
151   * Partitions the given serverToRegions map by table The tables are derived from the RegionInfo
152   * objects found in serverToRegions.
153   * @param serverToRegions The map of servers to their assigned regions.
154   * @return A map of tables to their server-to-region assignments.
155   */
156  public static Map<TableName, Map<ServerName, List<RegionInfo>>>
157    partitionRegionsByTable(Map<ServerName, List<RegionInfo>> serverToRegions) {
158
159    // First, gather all tables from the regions
160    Set<TableName> allTables = new HashSet<>();
161    for (List<RegionInfo> regions : serverToRegions.values()) {
162      for (RegionInfo region : regions) {
163        allTables.add(region.getTable());
164      }
165    }
166
167    Map<TableName, Map<ServerName, List<RegionInfo>>> tablesToServersToRegions = new HashMap<>();
168
169    // Initialize each table with all servers mapped to empty lists
170    for (TableName table : allTables) {
171      Map<ServerName, List<RegionInfo>> serverMap = new HashMap<>();
172      for (ServerName server : serverToRegions.keySet()) {
173        serverMap.put(server, new ArrayList<>());
174      }
175      tablesToServersToRegions.put(table, serverMap);
176    }
177
178    // Distribute regions to their respective tables
179    for (Map.Entry<ServerName, List<RegionInfo>> serverAndRegions : serverToRegions.entrySet()) {
180      ServerName server = serverAndRegions.getKey();
181      List<RegionInfo> regions = serverAndRegions.getValue();
182
183      for (RegionInfo region : regions) {
184        TableName regionTable = region.getTable();
185        // Now we know for sure regionTable is in allTables
186        Map<ServerName, List<RegionInfo>> tableServerMap =
187          tablesToServersToRegions.get(regionTable);
188        tableServerMap.get(server).add(region);
189      }
190    }
191
192    return tablesToServersToRegions;
193  }
194
195  static StochasticLoadBalancer buildStochasticLoadBalancer(BalancerClusterState cluster,
196    Configuration conf) {
197    StochasticLoadBalancer stochasticLoadBalancer =
198      new StochasticLoadBalancer(new DummyMetricsStochasticBalancer());
199    stochasticLoadBalancer.setClusterInfoProvider(new DummyClusterInfoProvider(conf));
200    stochasticLoadBalancer.loadConf(conf);
201    stochasticLoadBalancer.initCosts(cluster);
202    return stochasticLoadBalancer;
203  }
204
205  static BalancerClusterState
206    createMockBalancerClusterState(Map<ServerName, List<RegionInfo>> serverToRegions) {
207    return new BalancerClusterState(serverToRegions, null, null, null, null);
208  }
209
210  /**
211   * Validates that each replica is isolated from its others. Ensures that no server hosts more than
212   * one replica of the same region (i.e., regions with identical start and end keys).
213   * @param cluster The current state of the cluster.
214   * @return true if all replicas are properly isolated, false otherwise.
215   */
216  static boolean areAllReplicasDistributed(BalancerClusterState cluster) {
217    // Iterate over each server
218    for (int[] regionsPerServer : cluster.regionsPerServer) {
219      if (regionsPerServer == null || regionsPerServer.length == 0) {
220        continue; // Skip empty servers
221      }
222
223      Set<ReplicaKey> foundKeys = new HashSet<>();
224      for (int regionIndex : regionsPerServer) {
225        RegionInfo regionInfo = cluster.regions[regionIndex];
226        ReplicaKey replicaKey = new ReplicaKey(regionInfo);
227        if (foundKeys.contains(replicaKey)) {
228          // Violation: Multiple replicas of the same region on the same server
229          LOG.warn("Replica isolation violated: one server hosts multiple replicas of key [{}].",
230            generateRegionKey(regionInfo));
231          return false;
232        }
233
234        foundKeys.add(replicaKey);
235      }
236    }
237
238    LOG.info(
239      "Replica isolation validation passed: No server hosts multiple replicas of the same region.");
240    return true;
241  }
242
243  /**
244   * Generic method to validate table isolation.
245   */
246  static boolean isTableIsolated(BalancerClusterState cluster, TableName tableName,
247    String tableType) {
248    for (int i = 0; i < cluster.numServers; i++) {
249      int[] regionsOnServer = cluster.regionsPerServer[i];
250      if (regionsOnServer == null || regionsOnServer.length == 0) {
251        continue; // Skip empty servers
252      }
253
254      boolean hasTargetTableRegion = false;
255      boolean hasOtherTableRegion = false;
256
257      for (int regionIndex : regionsOnServer) {
258        RegionInfo regionInfo = cluster.regions[regionIndex];
259        if (regionInfo.getTable().equals(tableName)) {
260          hasTargetTableRegion = true;
261        } else {
262          hasOtherTableRegion = true;
263        }
264
265        // If the target table and any other table are on the same server, isolation is violated
266        if (hasTargetTableRegion && hasOtherTableRegion) {
267          LOG.debug(
268            "Server {} has both {} table regions and other table regions, violating isolation.",
269            cluster.servers[i].getServerName(), tableType);
270          return false;
271        }
272      }
273    }
274    LOG.debug("{} table isolation validation passed.", tableType);
275    return true;
276  }
277
278  /**
279   * Generates a unique key for a region based on its start and end keys. This method ensures that
280   * regions with identical start and end keys have the same key.
281   * @param regionInfo The RegionInfo object.
282   * @return A string representing the unique key of the region.
283   */
284  private static String generateRegionKey(RegionInfo regionInfo) {
285    // Using Base64 encoding for byte arrays to ensure uniqueness and readability
286    String startKey = Base64.getEncoder().encodeToString(regionInfo.getStartKey());
287    String endKey = Base64.getEncoder().encodeToString(regionInfo.getEndKey());
288
289    return regionInfo.getTable().getNameAsString() + ":" + startKey + ":" + endKey;
290  }
291
292}