001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import java.util.function.Consumer; 021import org.apache.yetus.audience.InterfaceAudience; 022 023/** 024 * A helper class to compute a scaled cost using 025 * {@link org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#DescriptiveStatistics()}. 026 * It assumes that this is a zero sum set of costs. It assumes that the worst case possible is all 027 * of the elements in one region server and the rest having 0. 028 */ 029@InterfaceAudience.Private 030final class DoubleArrayCost { 031 032 private double[] costs; 033 034 // computeCost call is expensive so we use this flag to indicate whether we need to recalculate 035 // the cost by calling computeCost 036 private boolean costsChanged; 037 038 private double cost; 039 040 void prepare(int length) { 041 if (costs == null || costs.length != length) { 042 costs = new double[length]; 043 } 044 } 045 046 /** 047 * We do not want to introduce a getCosts method to let upper layer get the cost array directly, 048 * so here we introduce this method to take a {@link Consumer} as parameter, where we will pass 049 * the actual cost array in, so you can change the element of the cost array in the 050 * {@link Consumer} implementation. 051 * <p/> 052 * Usually, in prepare method, you need to fill all the elements of the cost array, while in 053 * regionMoved method, you just need to update the element for the effect region servers. 054 */ 055 void applyCostsChange(Consumer<double[]> consumer) { 056 consumer.accept(costs); 057 costsChanged = true; 058 } 059 060 double cost() { 061 if (costsChanged) { 062 cost = computeCost(costs); 063 costsChanged = false; 064 } 065 return cost; 066 } 067 068 private static double computeCost(double[] stats) { 069 if (stats == null || stats.length == 0) { 070 return 0; 071 } 072 double totalCost = 0; 073 double total = getSum(stats); 074 075 double count = stats.length; 076 double mean = total / count; 077 078 for (int i = 0; i < stats.length; i++) { 079 double n = stats[i]; 080 double diff = (mean - n) * (mean - n); 081 totalCost += diff; 082 } 083 // No need to compute standard deviation with division by cluster size when scaling. 084 totalCost = Math.sqrt(totalCost); 085 return CostFunction.scale(getMinSkew(total, count), getMaxSkew(total, count), totalCost); 086 } 087 088 private static double getSum(double[] stats) { 089 double total = 0; 090 for (double s : stats) { 091 total += s; 092 } 093 return total; 094 } 095 096 /** 097 * Return the min skew of distribution 098 * @param total is total number of regions 099 */ 100 public static double getMinSkew(double total, double numServers) { 101 if (numServers == 0) { 102 return 0; 103 } 104 double mean = total / numServers; 105 // It's possible that there aren't enough regions to go around 106 double min; 107 if (numServers > total) { 108 min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total); 109 } else { 110 // Some will have 1 more than everything else. 111 int numHigh = (int) (total - (Math.floor(mean) * numServers)); 112 int numLow = (int) (numServers - numHigh); 113 min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) 114 + numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean)); 115 } 116 return Math.sqrt(min); 117 } 118 119 /** 120 * Return the max deviation of distribution Compute max as if all region servers had 0 and one had 121 * the sum of all costs. This must be a zero sum cost for this to make sense. 122 * @param total is total number of regions 123 */ 124 public static double getMaxSkew(double total, double numServers) { 125 if (numServers == 0) { 126 return 0; 127 } 128 double mean = total / numServers; 129 return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean); 130 } 131}