001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.balancer;
019
020import edu.umd.cs.findbugs.annotations.NonNull;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Collection;
024import java.util.Collections;
025import java.util.HashMap;
026import java.util.Iterator;
027import java.util.List;
028import java.util.Map;
029import java.util.NavigableMap;
030import java.util.Random;
031import java.util.Set;
032import java.util.TreeMap;
033import java.util.concurrent.ThreadLocalRandom;
034import java.util.function.Predicate;
035import java.util.stream.Collectors;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.hbase.ClusterMetrics;
038import org.apache.hadoop.hbase.HBaseIOException;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.ServerMetrics;
041import org.apache.hadoop.hbase.ServerName;
042import org.apache.hadoop.hbase.TableName;
043import org.apache.hadoop.hbase.client.RegionInfo;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.master.LoadBalancer;
046import org.apache.hadoop.hbase.master.MasterServices;
047import org.apache.hadoop.hbase.master.RackManager;
048import org.apache.hadoop.hbase.master.RegionPlan;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
054import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
055import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
056import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
057
058/**
059 * The base class for load balancers. It provides the functions used to by
060 * {@link org.apache.hadoop.hbase.master.assignment.AssignmentManager} to assign regions in the edge
061 * cases. It doesn't provide an implementation of the actual balancing algorithm.
062 */
063@InterfaceAudience.Private
064@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "IS2_INCONSISTENT_SYNC",
065    justification = "All the unsynchronized access is before initialization")
066public abstract class BaseLoadBalancer implements LoadBalancer {
067
068  private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
069
070  public static final String BALANCER_DECISION_BUFFER_ENABLED =
071    "hbase.master.balancer.decision.buffer.enabled";
072  public static final boolean DEFAULT_BALANCER_DECISION_BUFFER_ENABLED = false;
073
074  public static final String BALANCER_REJECTION_BUFFER_ENABLED =
075    "hbase.master.balancer.rejection.buffer.enabled";
076  public static final boolean DEFAULT_BALANCER_REJECTION_BUFFER_ENABLED = false;
077
078  public static final boolean DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE = false;
079
080  protected static final int MIN_SERVER_BALANCE = 2;
081  private volatile boolean stopped = false;
082
083  private static final Predicate<ServerMetrics> IDLE_SERVER_PREDICATOR =
084    load -> load.getRegionMetrics().isEmpty();
085
086  protected volatile RegionLocationFinder regionFinder;
087  protected boolean useRegionFinder;
088  protected boolean isByTable = DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE;
089
090  // slop for regions
091  protected float slop;
092  protected volatile RackManager rackManager;
093  protected MetricsBalancer metricsBalancer = null;
094  protected ClusterMetrics clusterStatus = null;
095  protected ServerName masterServerName;
096  protected MasterServices services;
097
098  /**
099   * @deprecated since 2.4.0, will be removed in 3.0.0.
100   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
101   */
102  @Deprecated
103  protected boolean onlySystemTablesOnMaster;
104
105  /**
106   * The constructor that uses the basic MetricsBalancer
107   */
108  protected BaseLoadBalancer() {
109    this(null);
110  }
111
112  /**
113   * This Constructor accepts an instance of MetricsBalancer, which will be used instead of creating
114   * a new one
115   */
116  protected BaseLoadBalancer(MetricsBalancer metricsBalancer) {
117    this.metricsBalancer = (metricsBalancer != null) ? metricsBalancer : new MetricsBalancer();
118  }
119
120  protected final Configuration getConf() {
121    return services.getConfiguration();
122  }
123
124  /**
125   * Check if a region belongs to some system table. If so, the primary replica may be expected to
126   * be put on the master regionserver.
127   * @deprecated since 2.4.0, will be removed in 3.0.0.
128   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
129   */
130  @Deprecated
131  public boolean shouldBeOnMaster(RegionInfo region) {
132    return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
133  }
134
135  /**
136   * Balance the regions that should be on master regionserver.
137   * @deprecated since 2.4.0, will be removed in 3.0.0.
138   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
139   */
140  @Deprecated
141  protected List<RegionPlan> balanceMasterRegions(Map<ServerName, List<RegionInfo>> clusterMap) {
142    if (masterServerName == null || clusterMap == null || clusterMap.size() <= 1) return null;
143    List<RegionPlan> plans = null;
144    List<RegionInfo> regions = clusterMap.get(masterServerName);
145    if (regions != null) {
146      Iterator<ServerName> keyIt = null;
147      for (RegionInfo region : regions) {
148        if (shouldBeOnMaster(region)) continue;
149
150        // Find a non-master regionserver to host the region
151        if (keyIt == null || !keyIt.hasNext()) {
152          keyIt = clusterMap.keySet().iterator();
153        }
154        ServerName dest = keyIt.next();
155        if (masterServerName.equals(dest)) {
156          if (!keyIt.hasNext()) {
157            keyIt = clusterMap.keySet().iterator();
158          }
159          dest = keyIt.next();
160        }
161
162        // Move this region away from the master regionserver
163        RegionPlan plan = new RegionPlan(region, masterServerName, dest);
164        if (plans == null) {
165          plans = new ArrayList<>();
166        }
167        plans.add(plan);
168      }
169    }
170    for (Map.Entry<ServerName, List<RegionInfo>> server : clusterMap.entrySet()) {
171      if (masterServerName.equals(server.getKey())) continue;
172      for (RegionInfo region : server.getValue()) {
173        if (!shouldBeOnMaster(region)) continue;
174
175        // Move this region to the master regionserver
176        RegionPlan plan = new RegionPlan(region, server.getKey(), masterServerName);
177        if (plans == null) {
178          plans = new ArrayList<>();
179        }
180        plans.add(plan);
181      }
182    }
183    return plans;
184  }
185
186  /**
187   * If master is configured to carry system tables only, in here is where we figure what to assign
188   * it.
189   * @deprecated since 2.4.0, will be removed in 3.0.0.
190   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
191   */
192  @Deprecated
193  @NonNull
194  protected Map<ServerName, List<RegionInfo>>
195    assignMasterSystemRegions(Collection<RegionInfo> regions, List<ServerName> servers) {
196    Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
197    if (this.onlySystemTablesOnMaster) {
198      if (masterServerName != null && servers.contains(masterServerName)) {
199        assignments.put(masterServerName, new ArrayList<>());
200        for (RegionInfo region : regions) {
201          if (shouldBeOnMaster(region)) {
202            assignments.get(masterServerName).add(region);
203          }
204        }
205      }
206    }
207    return assignments;
208  }
209
210  @Override
211  public synchronized void updateClusterMetrics(ClusterMetrics st) {
212    this.clusterStatus = st;
213    if (useRegionFinder) {
214      regionFinder.setClusterMetrics(st);
215    }
216  }
217
218  @Override
219  public void setMasterServices(MasterServices masterServices) {
220    masterServerName = masterServices.getServerName();
221    this.services = masterServices;
222  }
223
224  @Override
225  public synchronized void postMasterStartupInitialize() {
226    if (services != null && regionFinder != null) {
227      try {
228        Set<RegionInfo> regions =
229          services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
230        regionFinder.refreshAndWait(regions);
231      } catch (Exception e) {
232        LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
233      }
234    }
235  }
236
237  protected final boolean idleRegionServerExist(BalancerClusterState c) {
238    boolean isServerExistsWithMoreRegions = false;
239    boolean isServerExistsWithZeroRegions = false;
240    for (int[] serverList : c.regionsPerServer) {
241      if (serverList.length > 1) {
242        isServerExistsWithMoreRegions = true;
243      }
244      if (serverList.length == 0) {
245        isServerExistsWithZeroRegions = true;
246      }
247    }
248    return isServerExistsWithMoreRegions && isServerExistsWithZeroRegions;
249  }
250
251  protected final boolean sloppyRegionServerExist(ClusterLoadState cs) {
252    if (slop < 0) {
253      LOG.debug("Slop is less than zero, not checking for sloppiness.");
254      return false;
255    }
256    float average = cs.getLoadAverage(); // for logging
257    int floor = (int) Math.floor(average * (1 - slop));
258    int ceiling = (int) Math.ceil(average * (1 + slop));
259    if (!(cs.getMaxLoad() > ceiling || cs.getMinLoad() < floor)) {
260      NavigableMap<ServerAndLoad, List<RegionInfo>> serversByLoad = cs.getServersByLoad();
261      if (LOG.isTraceEnabled()) {
262        // If nothing to balance, then don't say anything unless trace-level logging.
263        LOG.trace("Skipping load balancing because balanced cluster; " + "servers="
264          + cs.getNumServers() + " regions=" + cs.getNumRegions() + " average=" + average
265          + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded="
266          + serversByLoad.firstKey().getLoad());
267      }
268      return false;
269    }
270    return true;
271  }
272
273  /**
274   * Generates a bulk assignment plan to be used on cluster startup using a simple round-robin
275   * assignment.
276   * <p/>
277   * Takes a list of all the regions and all the servers in the cluster and returns a map of each
278   * server to the regions that it should be assigned.
279   * <p/>
280   * Currently implemented as a round-robin assignment. Same invariant as load balancing, all
281   * servers holding floor(avg) or ceiling(avg). TODO: Use block locations from HDFS to place
282   * regions with their blocks
283   * @param regions all regions
284   * @param servers all servers
285   * @return map of server to the regions it should take, or emptyMap if no assignment is possible
286   *         (ie. no servers)
287   */
288  @Override
289  @NonNull
290  public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
291    List<ServerName> servers) throws HBaseIOException {
292    metricsBalancer.incrMiscInvocations();
293    Map<ServerName, List<RegionInfo>> assignments = assignMasterSystemRegions(regions, servers);
294    if (!assignments.isEmpty()) {
295      servers = new ArrayList<>(servers);
296      // Guarantee not to put other regions on master
297      servers.remove(masterServerName);
298      List<RegionInfo> masterRegions = assignments.get(masterServerName);
299      if (!masterRegions.isEmpty()) {
300        regions = new ArrayList<>(regions);
301        regions.removeAll(masterRegions);
302      }
303    }
304    /**
305     * only need assign system table
306     */
307    if (regions.isEmpty()) {
308      return assignments;
309    }
310
311    int numServers = servers == null ? 0 : servers.size();
312    if (numServers == 0) {
313      LOG.warn("Wanted to do round robin assignment but no servers to assign to");
314      return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions));
315    }
316
317    // TODO: instead of retainAssignment() and roundRobinAssignment(), we should just run the
318    // normal LB.balancerCluster() with unassignedRegions. We only need to have a candidate
319    // generator for AssignRegionAction. The LB will ensure the regions are mostly local
320    // and balanced. This should also run fast with fewer number of iterations.
321
322    if (numServers == 1) { // Only one server, nothing fancy we can do here
323      ServerName server = servers.get(0);
324      assignments.put(server, new ArrayList<>(regions));
325      return assignments;
326    }
327
328    BalancerClusterState cluster = createCluster(servers, regions);
329    roundRobinAssignment(cluster, regions, servers, assignments);
330    return assignments;
331  }
332
333  private BalancerClusterState createCluster(List<ServerName> servers,
334    Collection<RegionInfo> regions) throws HBaseIOException {
335    boolean hasRegionReplica = false;
336    try {
337      if (services != null && services.getTableDescriptors() != null) {
338        Map<String, TableDescriptor> tds = services.getTableDescriptors().getAll();
339        for (RegionInfo regionInfo : regions) {
340          TableDescriptor td = tds.get(regionInfo.getTable().getNameWithNamespaceInclAsString());
341          if (td != null && td.getRegionReplication() > 1) {
342            hasRegionReplica = true;
343            break;
344          }
345        }
346      }
347    } catch (IOException ioe) {
348      throw new HBaseIOException(ioe);
349    }
350
351    // Get the snapshot of the current assignments for the regions in question, and then create
352    // a cluster out of it. Note that we might have replicas already assigned to some servers
353    // earlier. So we want to get the snapshot to see those assignments, but this will only contain
354    // replicas of the regions that are passed (for performance).
355    Map<ServerName, List<RegionInfo>> clusterState = null;
356    if (!hasRegionReplica) {
357      clusterState = getRegionAssignmentsByServer(regions);
358    } else {
359      // for the case where we have region replica it is better we get the entire cluster's snapshot
360      clusterState = getRegionAssignmentsByServer(null);
361    }
362
363    for (ServerName server : servers) {
364      if (!clusterState.containsKey(server)) {
365        clusterState.put(server, Collections.emptyList());
366      }
367    }
368    return new BalancerClusterState(regions, clusterState, null, this.regionFinder, rackManager,
369      null);
370  }
371
372  private List<ServerName> findIdleServers(List<ServerName> servers) {
373    return this.services.getServerManager().getOnlineServersListWithPredicator(servers,
374      IDLE_SERVER_PREDICATOR);
375  }
376
377  /**
378   * Used to assign a single region to a random server.
379   */
380  @Override
381  public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
382    throws HBaseIOException {
383    metricsBalancer.incrMiscInvocations();
384    if (servers != null && servers.contains(masterServerName)) {
385      if (shouldBeOnMaster(regionInfo)) {
386        return masterServerName;
387      }
388      if (!LoadBalancer.isTablesOnMaster(getConf())) {
389        // Guarantee we do not put any regions on master
390        servers = new ArrayList<>(servers);
391        servers.remove(masterServerName);
392      }
393    }
394
395    int numServers = servers == null ? 0 : servers.size();
396    if (numServers == 0) {
397      LOG.warn("Wanted to retain assignment but no servers to assign to");
398      return null;
399    }
400    if (numServers == 1) { // Only one server, nothing fancy we can do here
401      return servers.get(0);
402    }
403    List<ServerName> idleServers = findIdleServers(servers);
404    if (idleServers.size() == 1) {
405      return idleServers.get(0);
406    }
407    final List<ServerName> finalServers = idleServers.isEmpty() ? servers : idleServers;
408    List<RegionInfo> regions = Lists.newArrayList(regionInfo);
409    BalancerClusterState cluster = createCluster(finalServers, regions);
410    return randomAssignment(cluster, regionInfo, finalServers);
411  }
412
413  /**
414   * Generates a bulk assignment startup plan, attempting to reuse the existing assignment
415   * information from META, but adjusting for the specified list of available/online servers
416   * available for assignment.
417   * <p>
418   * Takes a map of all regions to their existing assignment from META. Also takes a list of online
419   * servers for regions to be assigned to. Attempts to retain all assignment, so in some instances
420   * initial assignment will not be completely balanced.
421   * <p>
422   * Any leftover regions without an existing server to be assigned to will be assigned randomly to
423   * available servers.
424   * @param regions regions and existing assignment from meta
425   * @param servers available servers
426   * @return map of servers and regions to be assigned to them, or emptyMap if no assignment is
427   *         possible (ie. no servers)
428   */
429  @Override
430  @NonNull
431  public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
432    List<ServerName> servers) throws HBaseIOException {
433    // Update metrics
434    metricsBalancer.incrMiscInvocations();
435    Map<ServerName, List<RegionInfo>> assignments =
436      assignMasterSystemRegions(regions.keySet(), servers);
437    if (!assignments.isEmpty()) {
438      servers = new ArrayList<>(servers);
439      // Guarantee not to put other regions on master
440      servers.remove(masterServerName);
441      List<RegionInfo> masterRegions = assignments.get(masterServerName);
442      regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
443        .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
444    }
445    if (regions.isEmpty()) {
446      return assignments;
447    }
448
449    int numServers = servers == null ? 0 : servers.size();
450    if (numServers == 0) {
451      LOG.warn("Wanted to do retain assignment but no servers to assign to");
452      return Collections.singletonMap(BOGUS_SERVER_NAME, new ArrayList<>(regions.keySet()));
453    }
454    if (numServers == 1) { // Only one server, nothing fancy we can do here
455      ServerName server = servers.get(0);
456      assignments.put(server, new ArrayList<>(regions.keySet()));
457      return assignments;
458    }
459
460    // Group all the old assignments by their hostname.
461    // We can't group directly by ServerName since the servers all have
462    // new start-codes.
463
464    // Group the servers by their hostname. It's possible we have multiple
465    // servers on the same host on different ports.
466    ArrayListMultimap<String, ServerName> serversByHostname = ArrayListMultimap.create();
467    for (ServerName server : servers) {
468      assignments.put(server, new ArrayList<>());
469      serversByHostname.put(server.getHostnameLowerCase(), server);
470    }
471
472    // Collection of the hostnames that used to have regions
473    // assigned, but for which we no longer have any RS running
474    // after the cluster restart.
475    Set<String> oldHostsNoLongerPresent = Sets.newTreeSet();
476
477    // If the old servers aren't present, lets assign those regions later.
478    List<RegionInfo> randomAssignRegions = Lists.newArrayList();
479
480    int numRandomAssignments = 0;
481    int numRetainedAssigments = 0;
482    for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) {
483      RegionInfo region = entry.getKey();
484      ServerName oldServerName = entry.getValue();
485      List<ServerName> localServers = new ArrayList<>();
486      if (oldServerName != null) {
487        localServers = serversByHostname.get(oldServerName.getHostnameLowerCase());
488      }
489      if (localServers.isEmpty()) {
490        // No servers on the new cluster match up with this hostname, assign randomly, later.
491        randomAssignRegions.add(region);
492        if (oldServerName != null) {
493          oldHostsNoLongerPresent.add(oldServerName.getHostnameLowerCase());
494        }
495      } else if (localServers.size() == 1) {
496        // the usual case - one new server on same host
497        ServerName target = localServers.get(0);
498        assignments.get(target).add(region);
499        numRetainedAssigments++;
500      } else {
501        // multiple new servers in the cluster on this same host
502        if (localServers.contains(oldServerName)) {
503          assignments.get(oldServerName).add(region);
504          numRetainedAssigments++;
505        } else {
506          ServerName target = null;
507          for (ServerName tmp : localServers) {
508            if (tmp.getPort() == oldServerName.getPort()) {
509              target = tmp;
510              assignments.get(tmp).add(region);
511              numRetainedAssigments++;
512              break;
513            }
514          }
515          if (target == null) {
516            randomAssignRegions.add(region);
517          }
518        }
519      }
520    }
521
522    // If servers from prior assignment aren't present, then lets do randomAssignment on regions.
523    if (randomAssignRegions.size() > 0) {
524      BalancerClusterState cluster = createCluster(servers, regions.keySet());
525      for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) {
526        ServerName sn = entry.getKey();
527        for (RegionInfo region : entry.getValue()) {
528          cluster.doAssignRegion(region, sn);
529        }
530      }
531      for (RegionInfo region : randomAssignRegions) {
532        ServerName target = randomAssignment(cluster, region, servers);
533        assignments.get(target).add(region);
534        numRandomAssignments++;
535      }
536    }
537
538    String randomAssignMsg = "";
539    if (numRandomAssignments > 0) {
540      randomAssignMsg = numRandomAssignments + " regions were assigned "
541        + "to random hosts, since the old hosts for these regions are no "
542        + "longer present in the cluster. These hosts were:\n  "
543        + Joiner.on("\n  ").join(oldHostsNoLongerPresent);
544    }
545
546    LOG.info("Reassigned " + regions.size() + " regions. " + numRetainedAssigments
547      + " retained the pre-restart assignment. " + randomAssignMsg);
548    return assignments;
549  }
550
551  protected float getDefaultSlop() {
552    return 0.2f;
553  }
554
555  private RegionLocationFinder createRegionLocationFinder(Configuration conf) {
556    RegionLocationFinder finder = new RegionLocationFinder();
557    finder.setConf(conf);
558    finder.setServices(services);
559    return finder;
560  }
561
562  protected void loadConf(Configuration conf) {
563    this.slop = conf.getFloat("hbase.regions.slop", getDefaultSlop());
564    this.rackManager = new RackManager(getConf());
565    this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(conf);
566    useRegionFinder = conf.getBoolean("hbase.master.balancer.uselocality", true);
567    if (useRegionFinder) {
568      regionFinder = createRegionLocationFinder(conf);
569    } else {
570      regionFinder = null;
571    }
572    this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE,
573      DEFAULT_HBASE_MASTER_LOADBALANCE_BYTABLE);
574    // Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
575    LOG.info("slop={}", this.slop);
576  }
577
578  @Override
579  public void initialize() {
580    loadConf(getConf());
581  }
582
583  @Override
584  public void regionOnline(RegionInfo regionInfo, ServerName sn) {
585  }
586
587  @Override
588  public void regionOffline(RegionInfo regionInfo) {
589  }
590
591  @Override
592  public boolean isStopped() {
593    return stopped;
594  }
595
596  @Override
597  public void stop(String why) {
598    LOG.info("Load Balancer stop requested: {}", why);
599    stopped = true;
600  }
601
602  /**
603   * Updates the balancer status tag reported to JMX
604   */
605  public void updateBalancerStatus(boolean status) {
606    metricsBalancer.balancerStatus(status);
607  }
608
609  /**
610   * Used to assign a single region to a random server.
611   */
612  private ServerName randomAssignment(BalancerClusterState cluster, RegionInfo regionInfo,
613    List<ServerName> servers) {
614    int numServers = servers.size(); // servers is not null, numServers > 1
615    ServerName sn = null;
616    final int maxIterations = numServers * 4;
617    int iterations = 0;
618    List<ServerName> usedSNs = new ArrayList<>(servers.size());
619    Random rand = ThreadLocalRandom.current();
620    do {
621      int i = rand.nextInt(numServers);
622      sn = servers.get(i);
623      if (!usedSNs.contains(sn)) {
624        usedSNs.add(sn);
625      }
626    } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations);
627    if (iterations >= maxIterations) {
628      // We have reached the max. Means the servers that we collected is still lowering the
629      // availability
630      for (ServerName unusedServer : servers) {
631        if (!usedSNs.contains(unusedServer)) {
632          // check if any other unused server is there for us to use.
633          // If so use it. Else we have not other go but to go with one of them
634          if (!cluster.wouldLowerAvailability(regionInfo, unusedServer)) {
635            sn = unusedServer;
636            break;
637          }
638        }
639      }
640    }
641    cluster.doAssignRegion(regionInfo, sn);
642    return sn;
643  }
644
645  /**
646   * Round-robin a list of regions to a list of servers
647   */
648  private void roundRobinAssignment(BalancerClusterState cluster, List<RegionInfo> regions,
649    List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
650    Random rand = ThreadLocalRandom.current();
651    List<RegionInfo> unassignedRegions = new ArrayList<>();
652    int numServers = servers.size();
653    int numRegions = regions.size();
654    int max = (int) Math.ceil((float) numRegions / numServers);
655    int serverIdx = 0;
656    if (numServers > 1) {
657      serverIdx = rand.nextInt(numServers);
658    }
659    int regionIdx = 0;
660    for (int j = 0; j < numServers; j++) {
661      ServerName server = servers.get((j + serverIdx) % numServers);
662      List<RegionInfo> serverRegions = new ArrayList<>(max);
663      for (int i = regionIdx; i < numRegions; i += numServers) {
664        RegionInfo region = regions.get(i % numRegions);
665        if (cluster.wouldLowerAvailability(region, server)) {
666          unassignedRegions.add(region);
667        } else {
668          serverRegions.add(region);
669          cluster.doAssignRegion(region, server);
670        }
671      }
672      assignments.put(server, serverRegions);
673      regionIdx++;
674    }
675
676    List<RegionInfo> lastFewRegions = new ArrayList<>();
677    // assign the remaining by going through the list and try to assign to servers one-by-one
678    serverIdx = rand.nextInt(numServers);
679    for (RegionInfo region : unassignedRegions) {
680      boolean assigned = false;
681      for (int j = 0; j < numServers; j++) { // try all servers one by one
682        ServerName server = servers.get((j + serverIdx) % numServers);
683        if (cluster.wouldLowerAvailability(region, server)) {
684          continue;
685        } else {
686          assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
687          cluster.doAssignRegion(region, server);
688          serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
689          assigned = true;
690          break;
691        }
692      }
693      if (!assigned) {
694        lastFewRegions.add(region);
695      }
696    }
697    // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
698    // make it optimal later. we can end up with this if numReplicas > numServers.
699    for (RegionInfo region : lastFewRegions) {
700      int i = rand.nextInt(numServers);
701      ServerName server = servers.get(i);
702      assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
703      cluster.doAssignRegion(region, server);
704    }
705  }
706
707  private Map<ServerName, List<RegionInfo>>
708    getRegionAssignmentsByServer(Collection<RegionInfo> regions) {
709    if (this.services != null && this.services.getAssignmentManager() != null) {
710      return this.services.getAssignmentManager().getSnapShotOfAssignment(regions);
711    } else {
712      return new HashMap<>();
713    }
714  }
715
716  protected final Map<ServerName, List<RegionInfo>>
717    toEnsumbleTableLoad(Map<TableName, Map<ServerName, List<RegionInfo>>> LoadOfAllTable) {
718    Map<ServerName, List<RegionInfo>> returnMap = new TreeMap<>();
719    for (Map<ServerName, List<RegionInfo>> serverNameListMap : LoadOfAllTable.values()) {
720      serverNameListMap.forEach((serverName, regionInfoList) -> {
721        List<RegionInfo> regionInfos =
722          returnMap.computeIfAbsent(serverName, k -> new ArrayList<>());
723        regionInfos.addAll(regionInfoList);
724      });
725    }
726    return returnMap;
727  }
728
729  /**
730   * Perform the major balance operation for table, all sub classes should override this method.
731   * <p/>
732   * Will be invoked by {@link #balanceCluster(Map)}. If
733   * {@link HConstants#HBASE_MASTER_LOADBALANCE_BYTABLE} is enabled, we will call this method
734   * multiple times, one table a time, where we will only pass in the regions for a single table
735   * each time. If not, we will pass in all the regions at once, and the {@code tableName} will be
736   * {@link HConstants#ENSEMBLE_TABLE_NAME}.
737   * @param tableName      the table to be balanced
738   * @param loadOfOneTable region load of servers for the specific one table
739   * @return List of plans
740   */
741  protected abstract List<RegionPlan> balanceTable(TableName tableName,
742    Map<ServerName, List<RegionInfo>> loadOfOneTable);
743
744  /**
745   * Called before actually executing balanceCluster. The sub classes could override this method to
746   * do some initialization work.
747   */
748  protected void
749    preBalanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) {
750  }
751
752  /**
753   * Perform the major balance operation for cluster, will invoke
754   * {@link #balanceTable(TableName, Map)} to do actual balance.
755   * <p/>
756   * THIs method is marked as final which means you should not override this method. See the javadoc
757   * for {@link #balanceTable(TableName, Map)} for more details.
758   * @param loadOfAllTable region load of servers for all table
759   * @return a list of regions to be moved, including source and destination, or null if cluster is
760   *         already balanced
761   * @see #balanceTable(TableName, Map)
762   */
763  @Override
764  public synchronized final List<RegionPlan>
765    balanceCluster(Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) {
766    preBalanceCluster(loadOfAllTable);
767    if (isByTable) {
768      List<RegionPlan> result = new ArrayList<>();
769      loadOfAllTable.forEach((tableName, loadOfOneTable) -> {
770        LOG.info("Start Generate Balance plan for table: " + tableName);
771        List<RegionPlan> partialPlans = balanceTable(tableName, loadOfOneTable);
772        if (partialPlans != null) {
773          result.addAll(partialPlans);
774        }
775      });
776      return result;
777    } else {
778      LOG.debug("Start Generate Balance plan for cluster.");
779      return balanceTable(HConstants.ENSEMBLE_TABLE_NAME, toEnsumbleTableLoad(loadOfAllTable));
780    }
781  }
782
783  @Override
784  public synchronized void onConfigurationChange(Configuration conf) {
785    loadConf(conf);
786  }
787}