001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.util.ArrayList; 021import java.util.Arrays; 022import java.util.Collections; 023import java.util.Random; 024 025/** 026 * A class that generates random numbers that follow some distribution. 027 * <p> 028 * Copied from <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>. 029 * Remove after tfile is committed and use the tfile version of this class instead. 030 * </p> 031 */ 032public class RandomDistribution { 033 /** 034 * Interface for discrete (integer) random distributions. 035 */ 036 public interface DiscreteRNG { 037 /** 038 * Get the next random number 039 * @return the next random number. 040 */ 041 int nextInt(); 042 } 043 044 /** 045 * P(i)=1/(max-min) 046 */ 047 public static final class Flat implements DiscreteRNG { 048 private final Random random; 049 private final int min; 050 private final int max; 051 052 /** 053 * Generate random integers from min (inclusive) to max (exclusive) following even distribution. 054 * The basic random number generator. Minimum integer maximum integer (exclusive). 055 */ 056 public Flat(Random random, int min, int max) { 057 if (min >= max) { 058 throw new IllegalArgumentException("Invalid range"); 059 } 060 this.random = random; 061 this.min = min; 062 this.max = max; 063 } 064 065 /** 066 * @see DiscreteRNG#nextInt() 067 */ 068 @Override 069 public int nextInt() { 070 return random.nextInt(max - min) + min; 071 } 072 } 073 074 /** 075 * Zipf distribution. The ratio of the probabilities of integer i and j is defined as follows: 076 * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma. 077 */ 078 public static final class Zipf implements DiscreteRNG { 079 private static final double DEFAULT_EPSILON = 0.001; 080 private final Random random; 081 private final ArrayList<Integer> k; 082 private final ArrayList<Double> v; 083 084 /** 085 * Constructor The random number generator. minimum integer (inclusvie) maximum integer 086 * (exclusive) parameter sigma. (sigma > 1.0) 087 */ 088 public Zipf(Random r, int min, int max, double sigma) { 089 this(r, min, max, sigma, DEFAULT_EPSILON); 090 } 091 092 /** 093 * Constructor. The random number generator. minimum integer (inclusvie) maximum integer 094 * (exclusive) parameter sigma. (sigma > 1.0) Allowable error percentage (0 < epsilon < 1.0). 095 */ 096 public Zipf(Random r, int min, int max, double sigma, double epsilon) { 097 if ((max <= min) || (sigma <= 1) || (epsilon <= 0) || (epsilon >= 0.5)) { 098 throw new IllegalArgumentException("Invalid arguments"); 099 } 100 random = r; 101 k = new ArrayList<>(); 102 v = new ArrayList<>(); 103 104 double sum = 0; 105 int last = -1; 106 for (int i = min; i < max; ++i) { 107 sum += Math.exp(-sigma * Math.log(i - min + 1)); 108 if ((last == -1) || i * (1 - epsilon) > last) { 109 k.add(i); 110 v.add(sum); 111 last = i; 112 } 113 } 114 115 if (last != max - 1) { 116 k.add(max - 1); 117 v.add(sum); 118 } 119 120 v.set(v.size() - 1, 1.0); 121 122 for (int i = v.size() - 2; i >= 0; --i) { 123 v.set(i, v.get(i) / sum); 124 } 125 } 126 127 /** 128 * @see DiscreteRNG#nextInt() 129 */ 130 @Override 131 public int nextInt() { 132 double d = random.nextDouble(); 133 int idx = Collections.binarySearch(v, d); 134 135 if (idx > 0) { 136 ++idx; 137 } else { 138 idx = -(idx + 1); 139 } 140 141 if (idx >= v.size()) { 142 idx = v.size() - 1; 143 } 144 145 if (idx == 0) { 146 return k.get(0); 147 } 148 149 int ceiling = k.get(idx); 150 int lower = k.get(idx - 1); 151 152 return ceiling - random.nextInt(ceiling - lower); 153 } 154 } 155 156 /** 157 * Binomial distribution. P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n) 158 * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1) 159 */ 160 public static final class Binomial implements DiscreteRNG { 161 private final Random random; 162 private final int min; 163 private final int n; 164 private final double[] v; 165 166 private static double select(int n, int k) { 167 double ret = 1.0; 168 for (int i = k + 1; i <= n; ++i) { 169 ret *= (double) i / (i - k); 170 } 171 return ret; 172 } 173 174 private static double power(double p, int k) { 175 return Math.exp(k * Math.log(p)); 176 } 177 178 /** 179 * Generate random integers from min (inclusive) to max (exclusive) following Binomial 180 * distribution. The basic random number generator. Minimum integer maximum integer (exclusive). 181 * parameter. 182 */ 183 public Binomial(Random random, int min, int max, double p) { 184 if (min >= max) { 185 throw new IllegalArgumentException("Invalid range"); 186 } 187 this.random = random; 188 this.min = min; 189 this.n = max - min - 1; 190 if (n > 0) { 191 v = new double[n + 1]; 192 double sum = 0.0; 193 for (int i = 0; i <= n; ++i) { 194 sum += select(n, i) * power(p, i) * power(1 - p, n - i); 195 v[i] = sum; 196 } 197 for (int i = 0; i <= n; ++i) { 198 v[i] /= sum; 199 } 200 } else { 201 v = null; 202 } 203 } 204 205 /** 206 * @see DiscreteRNG#nextInt() 207 */ 208 @Override 209 public int nextInt() { 210 if (v == null) { 211 return min; 212 } 213 double d = random.nextDouble(); 214 int idx = Arrays.binarySearch(v, d); 215 if (idx > 0) { 216 ++idx; 217 } else { 218 idx = -(idx + 1); 219 } 220 221 if (idx >= v.length) { 222 idx = v.length - 1; 223 } 224 return idx + min; 225 } 226 } 227}