001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.HashMap; 026import java.util.Iterator; 027import java.util.List; 028import java.util.Map; 029import java.util.NavigableMap; 030import java.util.Random; 031import java.util.Set; 032import java.util.TreeMap; 033import java.util.concurrent.ThreadLocalRandom; 034import java.util.function.Predicate; 035import java.util.stream.Collectors; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.hbase.ClusterMetrics; 038import org.apache.hadoop.hbase.HBaseIOException; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.ServerMetrics; 041import org.apache.hadoop.hbase.ServerName; 042import org.apache.hadoop.hbase.TableName; 043import org.apache.hadoop.hbase.client.RegionInfo; 044import org.apache.hadoop.hbase.client.TableDescriptor; 045import org.apache.hadoop.hbase.master.LoadBalancer; 046import org.apache.hadoop.hbase.master.MasterServices; 047import org.apache.hadoop.hbase.master.RackManager; 048import org.apache.hadoop.hbase.master.RegionPlan; 049import org.apache.yetus.audience.InterfaceAudience; 050import org.slf4j.Logger; 051import org.slf4j.LoggerFactory; 052 053import org.apache.hbase.thirdparty.com.google.common.base.Joiner; 054import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap; 055import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 056import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 057 058/** 059 * The base class for load balancers. It provides the functions used to by 060 * {@link org.apache.hadoop.hbase.master.assignment.AssignmentManager} to assign regions in the edge 061 * cases. It doesn't provide an implementation of the actual balancing algorithm. 062 */ 063@InterfaceAudience.Private 064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "IS2_INCONSISTENT_SYNC", 065 justification = "All the unsynchronized access is before initialization") 066public abstract class BaseLoadBalancer implements LoadBalancer { 067 068 private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class); 069 070 public static final String BALANCER_DECISION_BUFFER_ENABLED = 071 "hbase.master.balancer.decision.buffer.enabled"; 072 public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false; 073 074 public static final String BALANCER_REJECTION_BUFFER_ENABLED = 075 "hbase.master.balancer.rejection.buffer.enabled"; 076 public static final boolean DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED = false; 077 078 public static final boolean DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE = false; 079 080 protected static final int MIN_SERVER_BALANCE = 2; 081 private volatile boolean stopped = false; 082 083 private static final Predicate<ServerMetrics> IDLE_SERVER_PREDICATOR = 084 load -> load.getRegionMetrics().isEmpty(); 085 086 protected volatile RegionLocationFinder regionFinder; 087 protected boolean useRegionFinder; 088 protected boolean isByTable = DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE; 089 090 // slop for regions 091 protected float slop; 092 protected volatile RackManager rackManager; 093 protected MetricsBalancer metricsBalancer = null; 094 protected ClusterMetrics clusterStatus = null; 095 protected ServerName masterServerName; 096 protected MasterServices services; 097 098 /** 099 * @deprecated since 2.4.0, will be removed in 3.0.0. 100 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 101 */ 102 @Deprecated 103 protected boolean onlySystemTablesOnMaster; 104 105 /** 106 * The constructor that uses the basic MetricsBalancer 107 */ 108 protected BaseLoadBalancer() { 109 this(null); 110 } 111 112 /** 113 * This Constructor accepts an instance of MetricsBalancer, which will be used instead of creating 114 * a new one 115 */ 116 protected BaseLoadBalancer(MetricsBalancer metricsBalancer) { 117 this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer(); 118 } 119 120 protected final Configuration getConf() { 121 return services.getConfiguration(); 122 } 123 124 /** 125 * Check if a region belongs to some system table. If so, the primary replica may be expected to 126 * be put on the master regionserver. 127 * @deprecated since 2.4.0, will be removed in 3.0.0. 128 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 129 */ 130 @Deprecated 131 public boolean shouldBeOnMaster(RegionInfo region) { 132 return this.onlySystemTablesOnMaster && region.getTable().isSystemTable(); 133 } 134 135 /** 136 * Balance the regions that should be on master regionserver. 137 * @deprecated since 2.4.0, will be removed in 3.0.0. 138 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 139 */ 140 @Deprecated 141 protected List<RegionPlan> balanceMasterRegions(Map<ServerName, List<RegionInfo>> clusterMap) { 142 if (masterServerName == null || clusterMap == null || clusterMap.size() <= 1) return null; 143 List<RegionPlan> plans = null; 144 List<RegionInfo> regions = clusterMap.get(masterServerName); 145 if (regions != null) { 146 Iterator<ServerName> keyIt = null; 147 for (RegionInfo region : regions) { 148 if (shouldBeOnMaster(region)) continue; 149 150 // Find a non-master regionserver to host the region 151 if (keyIt == null || !keyIt.hasNext()) { 152 keyIt = clusterMap.keySet().iterator(); 153 } 154 ServerName dest = keyIt.next(); 155 if (masterServerName.equals(dest)) { 156 if (!keyIt.hasNext()) { 157 keyIt = clusterMap.keySet().iterator(); 158 } 159 dest = keyIt.next(); 160 } 161 162 // Move this region away from the master regionserver 163 RegionPlan plan = new RegionPlan(region, masterServerName, dest); 164 if (plans == null) { 165 plans = new ArrayList<>(); 166 } 167 plans.add(plan); 168 } 169 } 170 for (Map.Entry<ServerName, List<RegionInfo>> server : clusterMap.entrySet()) { 171 if (masterServerName.equals(server.getKey())) continue; 172 for (RegionInfo region : server.getValue()) { 173 if (!shouldBeOnMaster(region)) continue; 174 175 // Move this region to the master regionserver 176 RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName); 177 if (plans == null) { 178 plans = new ArrayList<>(); 179 } 180 plans.add(plan); 181 } 182 } 183 return plans; 184 } 185 186 /** 187 * If master is configured to carry system tables only, in here is where we figure what to assign 188 * it. 189 * @deprecated since 2.4.0, will be removed in 3.0.0. 190 * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a> 191 */ 192 @Deprecated 193 @NonNull 194 protected Map<ServerName, List<RegionInfo>> 195 assignMasterSystemRegions(Collection<RegionInfo> regions, List<ServerName> servers) { 196 Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>(); 197 if (this.onlySystemTablesOnMaster) { 198 if (masterServerName != null && servers.contains(masterServerName)) { 199 assignments.put(masterServerName, new ArrayList<>()); 200 for (RegionInfo region : regions) { 201 if (shouldBeOnMaster(region)) { 202 assignments.get(masterServerName).add(region); 203 } 204 } 205 } 206 } 207 return assignments; 208 } 209 210 @Override 211 public synchronized void updateClusterMetrics(ClusterMetrics st) { 212 this.clusterStatus = st; 213 if (useRegionFinder) { 214 regionFinder.setClusterMetrics(st); 215 } 216 } 217 218 @Override 219 public void setMasterServices(MasterServices masterServices) { 220 masterServerName = masterServices.getServerName(); 221 this.services = masterServices; 222 } 223 224 @Override 225 public synchronized void postMasterStartupInitialize() { 226 if (services != null && regionFinder != null) { 227 try { 228 Set<RegionInfo> regions = 229 services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet(); 230 regionFinder.refreshAndWait(regions); 231 } catch (Exception e) { 232 LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e); 233 } 234 } 235 } 236 237 protected final boolean idleRegionServerExist(BalancerClusterState c) { 238 boolean isServerExistsWithMoreRegions = false; 239 boolean isServerExistsWithZeroRegions = false; 240 for (int[] serverList : c.regionsPerServer) { 241 if (serverList.length > 1) { 242 isServerExistsWithMoreRegions = true; 243 } 244 if (serverList.length == 0) { 245 isServerExistsWithZeroRegions = true; 246 } 247 } 248 return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions; 249 } 250 251 protected final boolean sloppyRegionServerExist(ClusterLoadState cs) { 252 if (slop < 0) { 253 LOG.debug("Slop is less than zero, not checking for sloppiness."); 254 return false; 255 } 256 float average = cs.getLoadAverage(); // for logging 257 int floor = (int) Math.floor(average * (1 - slop)); 258 int ceiling = (int) Math.ceil(average * (1 + slop)); 259 if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) { 260 NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad(); 261 if (LOG.isTraceEnabled()) { 262 // If nothing to balance, then don't say anything unless trace-level logging. 263 LOG.trace("Skipping load balancing because balanced cluster; " + "servers=" 264 + cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average 265 + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" 266 + serversByLoad.firstKey().getLoad()); 267 } 268 return false; 269 } 270 return true; 271 } 272 273 /** 274 * Generates a bulk assignment plan to be used on cluster startup using a simple round-robin 275 * assignment. 276 * <p/> 277 * Takes a list of all the regions and all the servers in the cluster and returns a map of each 278 * server to the regions that it should be assigned. 279 * <p/> 280 * Currently implemented as a round-robin assignment. Same invariant as load balancing, all 281 * servers holding floor(avg) or ceiling(avg). TODO: Use block locations from HDFS to place 282 * regions with their blocks 283 * @param regions all regions 284 * @param servers all servers 285 * @return map of server to the regions it should take, or emptyMap if no assignment is possible 286 * (ie. no servers) 287 */ 288 @Override 289 @NonNull 290 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 291 List<ServerName> servers) throws HBaseIOException { 292 metricsBalancer.incrMiscInvocations(); 293 Map<ServerName, List<RegionInfo>> assignments = assignMasterSystemRegions(regions, servers); 294 if (!assignments.isEmpty()) { 295 servers = new ArrayList<>(servers); 296 // Guarantee not to put other regions on master 297 servers.remove(masterServerName); 298 List<RegionInfo> masterRegions = assignments.get(masterServerName); 299 if (!masterRegions.isEmpty()) { 300 regions = new ArrayList<>(regions); 301 regions.removeAll(masterRegions); 302 } 303 } 304 /** 305 * only need assign system table 306 */ 307 if (regions.isEmpty()) { 308 return assignments; 309 } 310 311 int numServers = servers == null ? 0 : servers.size(); 312 if (numServers == 0) { 313 LOG.warn("Wanted to do round robin assignment but no servers to assign to"); 314 return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions)); 315 } 316 317 // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the 318 // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate 319 // generator for AssignRegionAction. The LB will ensure the regions are mostly local 320 // and balanced. This should also run fast with fewer number of iterations. 321 322 if (numServers == 1) { // Only one server, nothing fancy we can do here 323 ServerName server = servers.get(0); 324 assignments.put(server, new ArrayList<>(regions)); 325 return assignments; 326 } 327 328 BalancerClusterState cluster = createCluster(servers, regions); 329 roundRobinAssignment(cluster, regions, servers, assignments); 330 return assignments; 331 } 332 333 private BalancerClusterState createCluster(List<ServerName> servers, 334 Collection<RegionInfo> regions) throws HBaseIOException { 335 boolean hasRegionReplica = false; 336 try { 337 if (services != null && services.getTableDescriptors() != null) { 338 Map<String, TableDescriptor> tds = services.getTableDescriptors().getAll(); 339 for (RegionInfo regionInfo : regions) { 340 TableDescriptor td = tds.get(regionInfo.getTable().getNameWithNamespaceInclAsString()); 341 if (td != null && td.getRegionReplication() > 1) { 342 hasRegionReplica = true; 343 break; 344 } 345 } 346 } 347 } catch (IOException ioe) { 348 throw new HBaseIOException(ioe); 349 } 350 351 // Get the snapshot of the current assignments for the regions in question, and then create 352 // a cluster out of it. Note that we might have replicas already assigned to some servers 353 // earlier. So we want to get the snapshot to see those assignments, but this will only contain 354 // replicas of the regions that are passed (for performance). 355 Map<ServerName, List<RegionInfo>> clusterState = null; 356 if (!hasRegionReplica) { 357 clusterState = getRegionAssignmentsByServer(regions); 358 } else { 359 // for the case where we have region replica it is better we get the entire cluster's snapshot 360 clusterState = getRegionAssignmentsByServer(null); 361 } 362 363 for (ServerName server : servers) { 364 if (!clusterState.containsKey(server)) { 365 clusterState.put(server, Collections.emptyList()); 366 } 367 } 368 return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager, 369 null); 370 } 371 372 private List<ServerName> findIdleServers(List<ServerName> servers) { 373 return this.services.getServerManager().getOnlineServersListWithPredicator(servers, 374 IDLE_SERVER_PREDICATOR); 375 } 376 377 /** 378 * Used to assign a single region to a random server. 379 */ 380 @Override 381 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 382 throws HBaseIOException { 383 metricsBalancer.incrMiscInvocations(); 384 if (servers != null && servers.contains(masterServerName)) { 385 if (shouldBeOnMaster(regionInfo)) { 386 return masterServerName; 387 } 388 if (!LoadBalancer.isTablesOnMaster(getConf())) { 389 // Guarantee we do not put any regions on master 390 servers = new ArrayList<>(servers); 391 servers.remove(masterServerName); 392 } 393 } 394 395 int numServers = servers == null ? 0 : servers.size(); 396 if (numServers == 0) { 397 LOG.warn("Wanted to retain assignment but no servers to assign to"); 398 return null; 399 } 400 if (numServers == 1) { // Only one server, nothing fancy we can do here 401 return servers.get(0); 402 } 403 List<ServerName> idleServers = findIdleServers(servers); 404 if (idleServers.size() == 1) { 405 return idleServers.get(0); 406 } 407 final List<ServerName> finalServers = idleServers.isEmpty() ? servers : idleServers; 408 List<RegionInfo> regions = Lists.newArrayList(regionInfo); 409 BalancerClusterState cluster = createCluster(finalServers, regions); 410 return randomAssignment(cluster, regionInfo, finalServers); 411 } 412 413 /** 414 * Generates a bulk assignment startup plan, attempting to reuse the existing assignment 415 * information from META, but adjusting for the specified list of available/online servers 416 * available for assignment. 417 * <p> 418 * Takes a map of all regions to their existing assignment from META. Also takes a list of online 419 * servers for regions to be assigned to. Attempts to retain all assignment, so in some instances 420 * initial assignment will not be completely balanced. 421 * <p> 422 * Any leftover regions without an existing server to be assigned to will be assigned randomly to 423 * available servers. 424 * @param regions regions and existing assignment from meta 425 * @param servers available servers 426 * @return map of servers and regions to be assigned to them, or emptyMap if no assignment is 427 * possible (ie. no servers) 428 */ 429 @Override 430 @NonNull 431 public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions, 432 List<ServerName> servers) throws HBaseIOException { 433 // Update metrics 434 metricsBalancer.incrMiscInvocations(); 435 Map<ServerName, List<RegionInfo>> assignments = 436 assignMasterSystemRegions(regions.keySet(), servers); 437 if (!assignments.isEmpty()) { 438 servers = new ArrayList<>(servers); 439 // Guarantee not to put other regions on master 440 servers.remove(masterServerName); 441 List<RegionInfo> masterRegions = assignments.get(masterServerName); 442 regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey())) 443 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 444 } 445 if (regions.isEmpty()) { 446 return assignments; 447 } 448 449 int numServers = servers == null ? 0 : servers.size(); 450 if (numServers == 0) { 451 LOG.warn("Wanted to do retain assignment but no servers to assign to"); 452 return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet())); 453 } 454 if (numServers == 1) { // Only one server, nothing fancy we can do here 455 ServerName server = servers.get(0); 456 assignments.put(server, new ArrayList<>(regions.keySet())); 457 return assignments; 458 } 459 460 // Group all the old assignments by their hostname. 461 // We can't group directly by ServerName since the servers all have 462 // new start-codes. 463 464 // Group the servers by their hostname. It's possible we have multiple 465 // servers on the same host on different ports. 466 ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create(); 467 for (ServerName server : servers) { 468 assignments.put(server, new ArrayList<>()); 469 serversByHostname.put(server.getHostnameLowerCase(), server); 470 } 471 472 // Collection of the hostnames that used to have regions 473 // assigned, but for which we no longer have any RS running 474 // after the cluster restart. 475 Set<String> oldHostsNoLongerPresent = Sets.newTreeSet(); 476 477 // If the old servers aren't present, lets assign those regions later. 478 List<RegionInfo> randomAssignRegions = Lists.newArrayList(); 479 480 int numRandomAssignments = 0; 481 int numRetainedAssigments = 0; 482 for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) { 483 RegionInfo region = entry.getKey(); 484 ServerName oldServerName = entry.getValue(); 485 List<ServerName> localServers = new ArrayList<>(); 486 if (oldServerName != null) { 487 localServers = serversByHostname.get(oldServerName.getHostnameLowerCase()); 488 } 489 if (localServers.isEmpty()) { 490 // No servers on the new cluster match up with this hostname, assign randomly, later. 491 randomAssignRegions.add(region); 492 if (oldServerName != null) { 493 oldHostsNoLongerPresent.add(oldServerName.getHostnameLowerCase()); 494 } 495 } else if (localServers.size() == 1) { 496 // the usual case - one new server on same host 497 ServerName target = localServers.get(0); 498 assignments.get(target).add(region); 499 numRetainedAssigments++; 500 } else { 501 // multiple new servers in the cluster on this same host 502 if (localServers.contains(oldServerName)) { 503 assignments.get(oldServerName).add(region); 504 numRetainedAssigments++; 505 } else { 506 ServerName target = null; 507 for (ServerName tmp : localServers) { 508 if (tmp.getPort() == oldServerName.getPort()) { 509 target = tmp; 510 assignments.get(tmp).add(region); 511 numRetainedAssigments++; 512 break; 513 } 514 } 515 if (target == null) { 516 randomAssignRegions.add(region); 517 } 518 } 519 } 520 } 521 522 // If servers from prior assignment aren't present, then lets do randomAssignment on regions. 523 if (randomAssignRegions.size() > 0) { 524 BalancerClusterState cluster = createCluster(servers, regions.keySet()); 525 for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) { 526 ServerName sn = entry.getKey(); 527 for (RegionInfo region : entry.getValue()) { 528 cluster.doAssignRegion(region, sn); 529 } 530 } 531 for (RegionInfo region : randomAssignRegions) { 532 ServerName target = randomAssignment(cluster, region, servers); 533 assignments.get(target).add(region); 534 numRandomAssignments++; 535 } 536 } 537 538 String randomAssignMsg = ""; 539 if (numRandomAssignments > 0) { 540 randomAssignMsg = numRandomAssignments + " regions were assigned " 541 + "to random hosts, since the old hosts for these regions are no " 542 + "longer present in the cluster. These hosts were:\n " 543 + Joiner.on("\n ").join(oldHostsNoLongerPresent); 544 } 545 546 LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments 547 + " retained the pre-restart assignment. " + randomAssignMsg); 548 return assignments; 549 } 550 551 protected float getDefaultSlop() { 552 return 0.2f; 553 } 554 555 private RegionLocationFinder createRegionLocationFinder(Configuration conf) { 556 RegionLocationFinder finder = new RegionLocationFinder(); 557 finder.setConf(conf); 558 finder.setServices(services); 559 return finder; 560 } 561 562 protected void loadConf(Configuration conf) { 563 this.slop = conf.getFloat("hbase.regions.slop", getDefaultSlop()); 564 this.rackManager = new RackManager(getConf()); 565 this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(conf); 566 useRegionFinder = conf.getBoolean("hbase.master.balancer.uselocality", true); 567 if (useRegionFinder) { 568 regionFinder = createRegionLocationFinder(conf); 569 } else { 570 regionFinder = null; 571 } 572 this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, 573 DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE); 574 // Print out base configs. Don't print overallSlop since it for simple balancer exclusively. 575 LOG.info("slop={}", this.slop); 576 } 577 578 @Override 579 public void initialize() { 580 loadConf(getConf()); 581 } 582 583 @Override 584 public void regionOnline(RegionInfo regionInfo, ServerName sn) { 585 } 586 587 @Override 588 public void regionOffline(RegionInfo regionInfo) { 589 } 590 591 @Override 592 public boolean isStopped() { 593 return stopped; 594 } 595 596 @Override 597 public void stop(String why) { 598 LOG.info("Load Balancer stop requested: {}", why); 599 stopped = true; 600 } 601 602 /** 603 * Updates the balancer status tag reported to JMX 604 */ 605 public void updateBalancerStatus(boolean status) { 606 metricsBalancer.balancerStatus(status); 607 } 608 609 /** 610 * Used to assign a single region to a random server. 611 */ 612 private ServerName randomAssignment(BalancerClusterState cluster, RegionInfo regionInfo, 613 List<ServerName> servers) { 614 int numServers = servers.size(); // servers is not null, numServers > 1 615 ServerName sn = null; 616 final int maxIterations = numServers * 4; 617 int iterations = 0; 618 List<ServerName> usedSNs = new ArrayList<>(servers.size()); 619 Random rand = ThreadLocalRandom.current(); 620 do { 621 int i = rand.nextInt(numServers); 622 sn = servers.get(i); 623 if (!usedSNs.contains(sn)) { 624 usedSNs.add(sn); 625 } 626 } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations); 627 if (iterations >= maxIterations) { 628 // We have reached the max. Means the servers that we collected is still lowering the 629 // availability 630 for (ServerName unusedServer : servers) { 631 if (!usedSNs.contains(unusedServer)) { 632 // check if any other unused server is there for us to use. 633 // If so use it. Else we have not other go but to go with one of them 634 if (!cluster.wouldLowerAvailability(regionInfo, unusedServer)) { 635 sn = unusedServer; 636 break; 637 } 638 } 639 } 640 } 641 cluster.doAssignRegion(regionInfo, sn); 642 return sn; 643 } 644 645 /** 646 * Round-robin a list of regions to a list of servers 647 */ 648 private void roundRobinAssignment(BalancerClusterState cluster, List<RegionInfo> regions, 649 List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) { 650 Random rand = ThreadLocalRandom.current(); 651 List<RegionInfo> unassignedRegions = new ArrayList<>(); 652 int numServers = servers.size(); 653 int numRegions = regions.size(); 654 int max = (int) Math.ceil((float) numRegions / numServers); 655 int serverIdx = 0; 656 if (numServers > 1) { 657 serverIdx = rand.nextInt(numServers); 658 } 659 int regionIdx = 0; 660 for (int j = 0; j < numServers; j++) { 661 ServerName server = servers.get((j + serverIdx) % numServers); 662 List<RegionInfo> serverRegions = new ArrayList<>(max); 663 for (int i = regionIdx; i < numRegions; i += numServers) { 664 RegionInfo region = regions.get(i % numRegions); 665 if (cluster.wouldLowerAvailability(region, server)) { 666 unassignedRegions.add(region); 667 } else { 668 serverRegions.add(region); 669 cluster.doAssignRegion(region, server); 670 } 671 } 672 assignments.put(server, serverRegions); 673 regionIdx++; 674 } 675 676 List<RegionInfo> lastFewRegions = new ArrayList<>(); 677 // assign the remaining by going through the list and try to assign to servers one-by-one 678 serverIdx = rand.nextInt(numServers); 679 for (RegionInfo region : unassignedRegions) { 680 boolean assigned = false; 681 for (int j = 0; j < numServers; j++) { // try all servers one by one 682 ServerName server = servers.get((j + serverIdx) % numServers); 683 if (cluster.wouldLowerAvailability(region, server)) { 684 continue; 685 } else { 686 assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); 687 cluster.doAssignRegion(region, server); 688 serverIdx = (j + serverIdx + 1) % numServers; // remain from next server 689 assigned = true; 690 break; 691 } 692 } 693 if (!assigned) { 694 lastFewRegions.add(region); 695 } 696 } 697 // just sprinkle the rest of the regions on random regionservers. The balanceCluster will 698 // make it optimal later. we can end up with this if numReplicas > numServers. 699 for (RegionInfo region : lastFewRegions) { 700 int i = rand.nextInt(numServers); 701 ServerName server = servers.get(i); 702 assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); 703 cluster.doAssignRegion(region, server); 704 } 705 } 706 707 private Map<ServerName, List<RegionInfo>> 708 getRegionAssignmentsByServer(Collection<RegionInfo> regions) { 709 if (this.services != null && this.services.getAssignmentManager() != null) { 710 return this.services.getAssignmentManager().getSnapShotOfAssignment(regions); 711 } else { 712 return new HashMap<>(); 713 } 714 } 715 716 protected final Map<ServerName, List<RegionInfo>> 717 toEnsumbleTableLoad(Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) { 718 Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>(); 719 for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) { 720 serverNameListMap.forEach((serverName, regionInfoList) -> { 721 List<RegionInfo> regionInfos = 722 returnMap.computeIfAbsent(serverName, k -> new ArrayList<>()); 723 regionInfos.addAll(regionInfoList); 724 }); 725 } 726 return returnMap; 727 } 728 729 /** 730 * Perform the major balance operation for table, all sub classes should override this method. 731 * <p/> 732 * Will be invoked by {@link #balanceCluster(Map)}. If 733 * {@link HConstants#HBASE_MASTER_LOADBALANCE_BYTABLE} is enabled, we will call this method 734 * multiple times, one table a time, where we will only pass in the regions for a single table 735 * each time. If not, we will pass in all the regions at once, and the {@code tableName} will be 736 * {@link HConstants#ENSEMBLE_TABLE_NAME}. 737 * @param tableName the table to be balanced 738 * @param loadOfOneTable region load of servers for the specific one table 739 * @return List of plans 740 */ 741 protected abstract List<RegionPlan> balanceTable(TableName tableName, 742 Map<ServerName, List<RegionInfo>> loadOfOneTable); 743 744 /** 745 * Called before actually executing balanceCluster. The sub classes could override this method to 746 * do some initialization work. 747 */ 748 protected void 749 preBalanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) { 750 } 751 752 /** 753 * Perform the major balance operation for cluster, will invoke 754 * {@link #balanceTable(TableName, Map)} to do actual balance. 755 * <p/> 756 * THIs method is marked as final which means you should not override this method. See the javadoc 757 * for {@link #balanceTable(TableName, Map)} for more details. 758 * @param loadOfAllTable region load of servers for all table 759 * @return a list of regions to be moved, including source and destination, or null if cluster is 760 * already balanced 761 * @see #balanceTable(TableName, Map) 762 */ 763 @Override 764 public synchronized final List<RegionPlan> 765 balanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) { 766 preBalanceCluster(loadOfAllTable); 767 if (isByTable) { 768 List<RegionPlan> result = new ArrayList<>(); 769 loadOfAllTable.forEach((tableName, loadOfOneTable) -> { 770 LOG.info("Start Generate Balance plan for table: " + tableName); 771 List<RegionPlan> partialPlans = balanceTable(tableName, loadOfOneTable); 772 if (partialPlans != null) { 773 result.addAll(partialPlans); 774 } 775 }); 776 return result; 777 } else { 778 LOG.debug("Start Generate Balance plan for cluster."); 779 return balanceTable(HConstants.ENSEMBLE_TABLE_NAME, toEnsumbleTableLoad(loadOfAllTable)); 780 } 781 } 782 783 @Override 784 public synchronized void onConfigurationChange(Configuration conf) { 785 loadConf(conf); 786 } 787}