001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.favored; 019 020import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY; 021import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY; 022import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY; 023 024import edu.umd.cs.findbugs.annotations.NonNull; 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.HashMap; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.hbase.HBaseIOException; 032import org.apache.hadoop.hbase.HBaseInterfaceAudience; 033import org.apache.hadoop.hbase.ServerMetrics; 034import org.apache.hadoop.hbase.ServerName; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position; 038import org.apache.hadoop.hbase.master.RegionPlan; 039import org.apache.hadoop.hbase.master.ServerManager; 040import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; 041import org.apache.hadoop.hbase.util.Pair; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 047import org.apache.hbase.thirdparty.com.google.common.collect.Maps; 048import org.apache.hbase.thirdparty.com.google.common.collect.Sets; 049 050/** 051 * An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that assigns favored 052 * nodes for each region. There is a Primary RegionServer that hosts the region, and then there is 053 * Secondary and Tertiary RegionServers. Currently, the favored nodes information is used in 054 * creating HDFS files - the Primary RegionServer passes the primary, secondary, tertiary node 055 * addresses as hints to the DistributedFileSystem API for creating files on the filesystem. These 056 * nodes are treated as hints by the HDFS to place the blocks of the file. This alleviates the 057 * problem to do with reading from remote nodes (since we can make the Secondary RegionServer as the 058 * new Primary RegionServer) after a region is recovered. This should help provide consistent read 059 * latencies for the regions even when their primary region servers die. 060 */ 061@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) 062public class FavoredNodeLoadBalancer extends BaseLoadBalancer implements FavoredNodesPromoter { 063 private static final Logger LOG = LoggerFactory.getLogger(FavoredNodeLoadBalancer.class); 064 065 private FavoredNodesManager fnm; 066 067 @Override 068 public void initialize() { 069 super.initialize(); 070 this.fnm = services.getFavoredNodesManager(); 071 } 072 073 @Override 074 protected List<RegionPlan> balanceTable(TableName tableName, 075 Map<ServerName, List<RegionInfo>> loadOfOneTable) { 076 // TODO. Look at is whether Stochastic loadbalancer can be integrated with this 077 List<RegionPlan> plans = new ArrayList<>(); 078 Map<ServerName, ServerName> serverNameWithoutCodeToServerName = new HashMap<>(); 079 ServerManager serverMgr = super.services.getServerManager(); 080 for (ServerName sn : serverMgr.getOnlineServersList()) { 081 ServerName s = ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE); 082 // FindBugs complains about useless store! serverNameToServerNameWithoutCode.put(sn, s); 083 serverNameWithoutCodeToServerName.put(s, sn); 084 } 085 for (Map.Entry<ServerName, List<RegionInfo>> entry : loadOfOneTable.entrySet()) { 086 ServerName currentServer = entry.getKey(); 087 // get a server without the startcode for the currentServer 088 ServerName currentServerWithoutStartCode = ServerName.valueOf(currentServer.getHostname(), 089 currentServer.getPort(), ServerName.NON_STARTCODE); 090 List<RegionInfo> list = entry.getValue(); 091 for (RegionInfo region : list) { 092 if (!FavoredNodesManager.isFavoredNodeApplicable(region)) { 093 continue; 094 } 095 List<ServerName> favoredNodes = fnm.getFavoredNodes(region); 096 if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) { 097 continue; // either favorednodes does not exist or we are already on the primary node 098 } 099 ServerName destination = null; 100 // check whether the primary is available 101 destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(0)); 102 if (destination == null) { 103 // check whether the region is on secondary/tertiary 104 if ( 105 currentServerWithoutStartCode.equals(favoredNodes.get(1)) 106 || currentServerWithoutStartCode.equals(favoredNodes.get(2)) 107 ) { 108 continue; 109 } 110 // the region is currently on none of the favored nodes 111 // get it on one of them if possible 112 ServerMetrics l1 = super.services.getServerManager() 113 .getLoad(serverNameWithoutCodeToServerName.get(favoredNodes.get(1))); 114 ServerMetrics l2 = super.services.getServerManager() 115 .getLoad(serverNameWithoutCodeToServerName.get(favoredNodes.get(2))); 116 if (l1 != null && l2 != null) { 117 if (l1.getRegionMetrics().size() > l2.getRegionMetrics().size()) { 118 destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2)); 119 } else { 120 destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1)); 121 } 122 } else if (l1 != null) { 123 destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(1)); 124 } else if (l2 != null) { 125 destination = serverNameWithoutCodeToServerName.get(favoredNodes.get(2)); 126 } 127 } 128 129 if (destination != null) { 130 RegionPlan plan = new RegionPlan(region, currentServer, destination); 131 plans.add(plan); 132 } 133 } 134 } 135 return plans; 136 } 137 138 @Override 139 @NonNull 140 public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions, 141 List<ServerName> servers) throws HBaseIOException { 142 Map<ServerName, List<RegionInfo>> assignmentMap; 143 try { 144 FavoredNodeAssignmentHelper assignmentHelper = 145 new FavoredNodeAssignmentHelper(servers, rackManager); 146 assignmentHelper.initialize(); 147 if (!assignmentHelper.canPlaceFavoredNodes()) { 148 return super.roundRobinAssignment(regions, servers); 149 } 150 // Segregate the regions into two types: 151 // 1. The regions that have favored node assignment, and where at least 152 // one of the favored node is still alive. In this case, try to adhere 153 // to the current favored nodes assignment as much as possible - i.e., 154 // if the current primary is gone, then make the secondary or tertiary 155 // as the new host for the region (based on their current load). 156 // Note that we don't change the favored 157 // node assignments here (even though one or more favored node is currently 158 // down). It is up to the balanceCluster to do this hard work. The HDFS 159 // can handle the fact that some nodes in the favored nodes hint is down 160 // It'd allocate some other DNs. In combination with stale settings for HDFS, 161 // we should be just fine. 162 // 2. The regions that currently don't have favored node assignment. We will 163 // need to come up with favored nodes assignments for them. The corner case 164 // in (1) above is that all the nodes are unavailable and in that case, we 165 // will note that this region doesn't have favored nodes. 166 Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> segregatedRegions = 167 segregateRegionsAndAssignRegionsWithFavoredNodes(regions, servers); 168 Map<ServerName, List<RegionInfo>> regionsWithFavoredNodesMap = segregatedRegions.getFirst(); 169 List<RegionInfo> regionsWithNoFavoredNodes = segregatedRegions.getSecond(); 170 assignmentMap = new HashMap<>(); 171 roundRobinAssignmentImpl(assignmentHelper, assignmentMap, regionsWithNoFavoredNodes, servers); 172 // merge the assignment maps 173 assignmentMap.putAll(regionsWithFavoredNodesMap); 174 } catch (Exception ex) { 175 LOG.warn("Encountered exception while doing favored-nodes assignment " + ex 176 + " Falling back to regular assignment"); 177 assignmentMap = super.roundRobinAssignment(regions, servers); 178 } 179 return assignmentMap; 180 } 181 182 @Override 183 public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) 184 throws HBaseIOException { 185 try { 186 FavoredNodeAssignmentHelper assignmentHelper = 187 new FavoredNodeAssignmentHelper(servers, rackManager); 188 assignmentHelper.initialize(); 189 ServerName primary = super.randomAssignment(regionInfo, servers); 190 if ( 191 !FavoredNodesManager.isFavoredNodeApplicable(regionInfo) 192 || !assignmentHelper.canPlaceFavoredNodes() 193 ) { 194 return primary; 195 } 196 List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo); 197 // check if we have a favored nodes mapping for this region and if so, return 198 // a server from the favored nodes list if the passed 'servers' contains this 199 // server as well (available servers, that is) 200 if (favoredNodes != null) { 201 for (ServerName s : favoredNodes) { 202 ServerName serverWithLegitStartCode = availableServersContains(servers, s); 203 if (serverWithLegitStartCode != null) { 204 return serverWithLegitStartCode; 205 } 206 } 207 } 208 List<RegionInfo> regions = new ArrayList<>(1); 209 regions.add(regionInfo); 210 Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>(1); 211 primaryRSMap.put(regionInfo, primary); 212 assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap); 213 return primary; 214 } catch (Exception ex) { 215 LOG.warn("Encountered exception while doing favored-nodes (random)assignment " + ex 216 + " Falling back to regular assignment"); 217 return super.randomAssignment(regionInfo, servers); 218 } 219 } 220 221 private Pair<Map<ServerName, List<RegionInfo>>, List<RegionInfo>> 222 segregateRegionsAndAssignRegionsWithFavoredNodes(List<RegionInfo> regions, 223 List<ServerName> availableServers) { 224 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes = 225 new HashMap<>(regions.size() / 2); 226 List<RegionInfo> regionsWithNoFavoredNodes = new ArrayList<>(regions.size() / 2); 227 for (RegionInfo region : regions) { 228 List<ServerName> favoredNodes = fnm.getFavoredNodes(region); 229 ServerName primaryHost = null; 230 ServerName secondaryHost = null; 231 ServerName tertiaryHost = null; 232 if (favoredNodes != null) { 233 for (ServerName s : favoredNodes) { 234 ServerName serverWithLegitStartCode = availableServersContains(availableServers, s); 235 if (serverWithLegitStartCode != null) { 236 FavoredNodesPlan.Position position = 237 FavoredNodesPlan.getFavoredServerPosition(favoredNodes, s); 238 if (Position.PRIMARY.equals(position)) { 239 primaryHost = serverWithLegitStartCode; 240 } else if (Position.SECONDARY.equals(position)) { 241 secondaryHost = serverWithLegitStartCode; 242 } else if (Position.TERTIARY.equals(position)) { 243 tertiaryHost = serverWithLegitStartCode; 244 } 245 } 246 } 247 assignRegionToAvailableFavoredNode(assignmentMapForFavoredNodes, region, primaryHost, 248 secondaryHost, tertiaryHost); 249 } 250 if (primaryHost == null && secondaryHost == null && tertiaryHost == null) { 251 // all favored nodes unavailable 252 regionsWithNoFavoredNodes.add(region); 253 } 254 } 255 return new Pair<>(assignmentMapForFavoredNodes, regionsWithNoFavoredNodes); 256 } 257 258 // Do a check of the hostname and port and return the servername from the servers list 259 // that matched (the favoredNode will have a startcode of -1 but we want the real 260 // server with the legit startcode 261 private ServerName availableServersContains(List<ServerName> servers, ServerName favoredNode) { 262 for (ServerName server : servers) { 263 if (ServerName.isSameAddress(favoredNode, server)) { 264 return server; 265 } 266 } 267 return null; 268 } 269 270 private void assignRegionToAvailableFavoredNode( 271 Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, RegionInfo region, 272 ServerName primaryHost, ServerName secondaryHost, ServerName tertiaryHost) { 273 if (primaryHost != null) { 274 addRegionToMap(assignmentMapForFavoredNodes, region, primaryHost); 275 } else if (secondaryHost != null && tertiaryHost != null) { 276 // assign the region to the one with a lower load 277 // (both have the desired hdfs blocks) 278 ServerName s; 279 ServerMetrics tertiaryLoad = super.services.getServerManager().getLoad(tertiaryHost); 280 ServerMetrics secondaryLoad = super.services.getServerManager().getLoad(secondaryHost); 281 if (secondaryLoad.getRegionMetrics().size() < tertiaryLoad.getRegionMetrics().size()) { 282 s = secondaryHost; 283 } else { 284 s = tertiaryHost; 285 } 286 addRegionToMap(assignmentMapForFavoredNodes, region, s); 287 } else if (secondaryHost != null) { 288 addRegionToMap(assignmentMapForFavoredNodes, region, secondaryHost); 289 } else if (tertiaryHost != null) { 290 addRegionToMap(assignmentMapForFavoredNodes, region, tertiaryHost); 291 } 292 } 293 294 private void addRegionToMap(Map<ServerName, List<RegionInfo>> assignmentMapForFavoredNodes, 295 RegionInfo region, ServerName host) { 296 List<RegionInfo> regionsOnServer = null; 297 if ((regionsOnServer = assignmentMapForFavoredNodes.get(host)) == null) { 298 regionsOnServer = new ArrayList<>(); 299 assignmentMapForFavoredNodes.put(host, regionsOnServer); 300 } 301 regionsOnServer.add(region); 302 } 303 304 public List<ServerName> getFavoredNodes(RegionInfo regionInfo) { 305 return this.fnm.getFavoredNodes(regionInfo); 306 } 307 308 private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper, 309 Map<ServerName, List<RegionInfo>> assignmentMap, List<RegionInfo> regions, 310 List<ServerName> servers) throws IOException { 311 Map<RegionInfo, ServerName> primaryRSMap = new HashMap<>(); 312 // figure the primary RSs 313 assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions); 314 assignSecondaryAndTertiaryNodesForRegion(assignmentHelper, regions, primaryRSMap); 315 } 316 317 private void assignSecondaryAndTertiaryNodesForRegion( 318 FavoredNodeAssignmentHelper assignmentHelper, List<RegionInfo> regions, 319 Map<RegionInfo, ServerName> primaryRSMap) throws IOException { 320 // figure the secondary and tertiary RSs 321 Map<RegionInfo, ServerName[]> secondaryAndTertiaryRSMap = 322 assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap); 323 324 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 325 // now record all the assignments so that we can serve queries later 326 for (RegionInfo region : regions) { 327 // Store the favored nodes without startCode for the ServerName objects 328 // We don't care about the startcode; but only the hostname really 329 List<ServerName> favoredNodesForRegion = new ArrayList<>(3); 330 ServerName sn = primaryRSMap.get(region); 331 favoredNodesForRegion 332 .add(ServerName.valueOf(sn.getHostname(), sn.getPort(), ServerName.NON_STARTCODE)); 333 ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(region); 334 if (secondaryAndTertiaryNodes != null) { 335 favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[0].getHostname(), 336 secondaryAndTertiaryNodes[0].getPort(), ServerName.NON_STARTCODE)); 337 favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(), 338 secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE)); 339 } 340 regionFNMap.put(region, favoredNodesForRegion); 341 } 342 fnm.updateFavoredNodes(regionFNMap); 343 } 344 345 /* 346 * Generate Favored Nodes for daughters during region split. If the parent does not have FN, 347 * regenerates them for the daughters. If the parent has FN, inherit two FN from parent for each 348 * daughter and generate the remaining. The primary FN for both the daughters should be the same 349 * as parent. Inherit the secondary FN from the parent but keep it different for each daughter. 350 * Choose the remaining FN randomly. This would give us better distribution over a period of time 351 * after enough splits. 352 */ 353 @Override 354 public void generateFavoredNodesForDaughter(List<ServerName> servers, RegionInfo parent, 355 RegionInfo regionA, RegionInfo regionB) throws IOException { 356 357 Map<RegionInfo, List<ServerName>> result = new HashMap<>(); 358 FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager); 359 helper.initialize(); 360 361 List<ServerName> parentFavoredNodes = getFavoredNodes(parent); 362 if (parentFavoredNodes == null) { 363 LOG.debug("Unable to find favored nodes for parent, " + parent 364 + " generating new favored nodes for daughter"); 365 result.put(regionA, helper.generateFavoredNodes(regionA)); 366 result.put(regionB, helper.generateFavoredNodes(regionB)); 367 368 } else { 369 370 // Lets get the primary and secondary from parent for regionA 371 Set<ServerName> regionAFN = 372 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY); 373 result.put(regionA, Lists.newArrayList(regionAFN)); 374 375 // Lets get the primary and tertiary from parent for regionB 376 Set<ServerName> regionBFN = 377 getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY); 378 result.put(regionB, Lists.newArrayList(regionBFN)); 379 } 380 381 fnm.updateFavoredNodes(result); 382 } 383 384 private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper, 385 List<ServerName> parentFavoredNodes, Position primary, Position secondary) throws IOException { 386 387 Set<ServerName> daughterFN = Sets.newLinkedHashSet(); 388 if (parentFavoredNodes.size() >= primary.ordinal()) { 389 daughterFN.add(parentFavoredNodes.get(primary.ordinal())); 390 } 391 392 if (parentFavoredNodes.size() >= secondary.ordinal()) { 393 daughterFN.add(parentFavoredNodes.get(secondary.ordinal())); 394 } 395 396 while (daughterFN.size() < FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) { 397 ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN)); 398 daughterFN.add(newNode); 399 } 400 return daughterFN; 401 } 402 403 /* 404 * Generate favored nodes for a region during merge. Choose the FN from one of the sources to keep 405 * it simple. 406 */ 407 @Override 408 public void generateFavoredNodesForMergedRegion(RegionInfo merged, RegionInfo[] mergeParents) 409 throws IOException { 410 Map<RegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap(); 411 regionFNMap.put(merged, getFavoredNodes(mergeParents[0])); 412 fnm.updateFavoredNodes(regionFNMap); 413 } 414 415}