001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.balancer; 019 020import java.util.function.Consumer; 021import org.apache.yetus.audience.InterfaceAudience; 022 023/** 024 * A helper class to compute a scaled cost using 025 * {@link org.apache.commons.math3.stat.descriptive.DescriptiveStatistics#DescriptiveStatistics()}. 026 * It assumes that this is a zero sum set of costs. It assumes that the worst case possible is all 027 * of the elements in one region server and the rest having 0. 028 */ 029@InterfaceAudience.Private 030final class DoubleArrayCost { 031 032 private double[] costs; 033 034 // computeCost call is expensive so we use this flag to indicate whether we need to recalculate 035 // the cost by calling computeCost 036 private boolean costsChanged; 037 038 private double cost; 039 040 void prepare(int length) { 041 if (costs == null || costs.length != length) { 042 costs = new double[length]; 043 } 044 } 045 046 /** 047 * We do not want to introduce a getCosts method to let upper layer get the cost array directly, 048 * so here we introduce this method to take a {@link Consumer} as parameter, where we will pass 049 * the actual cost array in, so you can change the element of the cost array in the 050 * {@link Consumer} implementation. 051 * <p/> 052 * Usually, in prepare method, you need to fill all the elements of the cost array, while in 053 * regionMoved method, you just need to update the element for the effect region servers. 054 */ 055 void applyCostsChange(Consumer<double[]> consumer) { 056 consumer.accept(costs); 057 costsChanged = true; 058 } 059 060 double cost() { 061 if (costsChanged) { 062 cost = computeCost(costs); 063 costsChanged = false; 064 } 065 return cost; 066 } 067 068 private static double computeCost(double[] stats) { 069 if (stats == null || stats.length == 0) { 070 return 0; 071 } 072 double totalCost = 0; 073 double total = getSum(stats); 074 075 double count = stats.length; 076 double mean = total / count; 077 for (int i = 0; i < stats.length; i++) { 078 double n = stats[i]; 079 double diff = (mean - n) * (mean - n); 080 totalCost += diff; 081 } 082 // No need to compute standard deviation with division by cluster size when scaling. 083 totalCost = Math.sqrt(totalCost); 084 return CostFunction.scale(getMinSkew(total, count), getMaxSkew(total, count), totalCost); 085 } 086 087 private static double getSum(double[] stats) { 088 double total = 0; 089 for (double s : stats) { 090 total += s; 091 } 092 return total; 093 } 094 095 /** 096 * Return the min skew of distribution 097 * @param total is total number of regions 098 */ 099 public static double getMinSkew(double total, double numServers) { 100 if (numServers == 0) { 101 return 0; 102 } 103 double mean = total / numServers; 104 // It's possible that there aren't enough regions to go around 105 double min; 106 if (numServers > total) { 107 min = ((numServers - total) * mean * mean + (1 - mean) * (1 - mean) * total); 108 } else { 109 // Some will have 1 more than everything else. 110 int numHigh = (int) (total - (Math.floor(mean) * numServers)); 111 int numLow = (int) (numServers - numHigh); 112 min = numHigh * (Math.ceil(mean) - mean) * (Math.ceil(mean) - mean) 113 + numLow * (mean - Math.floor(mean)) * (mean - Math.floor(mean)); 114 } 115 return Math.sqrt(min); 116 } 117 118 /** 119 * Return the max deviation of distribution Compute max as if all region servers had 0 and one had 120 * the sum of all costs. This must be a zero sum cost for this to make sense. 121 */ 122 public static double getMaxSkew(double total, double numServers) { 123 if (numServers == 0) { 124 return 0; 125 } 126 double mean = total / numServers; 127 return Math.sqrt((total - mean) * (total - mean) + (numServers - 1) * mean * mean); 128 } 129}