001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.nio.ByteBuffer; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.Random; 027import org.apache.hadoop.hbase.ArrayBackedTag; 028import org.apache.hadoop.hbase.ByteBufferKeyValue; 029import org.apache.hadoop.hbase.CellComparator; 030import org.apache.hadoop.hbase.ExtendedCell; 031import org.apache.hadoop.hbase.KeyValue; 032import org.apache.hadoop.hbase.Tag; 033import org.apache.hadoop.io.WritableUtils; 034import org.apache.yetus.audience.InterfaceAudience; 035 036import org.apache.hbase.thirdparty.com.google.common.primitives.Bytes; 037 038/** 039 * Generate list of key values which are very useful to test data block encoding and compression. 040 */ 041@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "RV_ABSOLUTE_VALUE_OF_RANDOM_INT", 042 justification = "Should probably fix") 043@InterfaceAudience.Private 044public class RedundantKVGenerator { 045 // row settings 046 static byte[] DEFAULT_COMMON_PREFIX = new byte[0]; 047 static int DEFAULT_NUMBER_OF_ROW_PREFIXES = 10; 048 static int DEFAULT_AVERAGE_PREFIX_LENGTH = 6; 049 static int DEFAULT_PREFIX_LENGTH_VARIANCE = 3; 050 static int DEFAULT_AVERAGE_SUFFIX_LENGTH = 3; 051 static int DEFAULT_SUFFIX_LENGTH_VARIANCE = 3; 052 static int DEFAULT_NUMBER_OF_ROW = 500; 053 054 // qualifier 055 static float DEFAULT_CHANCE_FOR_SAME_QUALIFIER = 0.5f; 056 static float DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER = 0.4f; 057 static int DEFAULT_AVERAGE_QUALIFIER_LENGTH = 9; 058 static int DEFAULT_QUALIFIER_LENGTH_VARIANCE = 3; 059 060 static int DEFAULT_COLUMN_FAMILY_LENGTH = 9; 061 static int DEFAULT_VALUE_LENGTH = 8; 062 static float DEFAULT_CHANCE_FOR_ZERO_VALUE = 0.5f; 063 064 static int DEFAULT_BASE_TIMESTAMP_DIVIDE = 1000000; 065 static int DEFAULT_TIMESTAMP_DIFF_SIZE = 100000000; 066 067 /** 068 * Default constructor, assumes all parameters from class constants. 069 */ 070 public RedundantKVGenerator() { 071 this(new Random(42L), DEFAULT_NUMBER_OF_ROW_PREFIXES, DEFAULT_AVERAGE_PREFIX_LENGTH, 072 DEFAULT_PREFIX_LENGTH_VARIANCE, DEFAULT_AVERAGE_SUFFIX_LENGTH, DEFAULT_SUFFIX_LENGTH_VARIANCE, 073 DEFAULT_NUMBER_OF_ROW, 074 075 DEFAULT_CHANCE_FOR_SAME_QUALIFIER, DEFAULT_CHANCE_FOR_SIMILIAR_QUALIFIER, 076 DEFAULT_AVERAGE_QUALIFIER_LENGTH, DEFAULT_QUALIFIER_LENGTH_VARIANCE, 077 078 DEFAULT_COLUMN_FAMILY_LENGTH, DEFAULT_VALUE_LENGTH, DEFAULT_CHANCE_FOR_ZERO_VALUE, 079 080 DEFAULT_BASE_TIMESTAMP_DIVIDE, DEFAULT_TIMESTAMP_DIFF_SIZE); 081 } 082 083 /** 084 * Various configuration options for generating key values 085 * @param randomizer pick things by random 086 */ 087 public RedundantKVGenerator(Random randomizer, int numberOfRowPrefixes, int averagePrefixLength, 088 int prefixLengthVariance, int averageSuffixLength, int suffixLengthVariance, int numberOfRows, 089 090 float chanceForSameQualifier, float chanceForSimiliarQualifier, int averageQualifierLength, 091 int qualifierLengthVariance, 092 093 int columnFamilyLength, int valueLength, float chanceForZeroValue, 094 095 int baseTimestampDivide, int timestampDiffSize) { 096 this.randomizer = randomizer; 097 098 this.commonPrefix = DEFAULT_COMMON_PREFIX; 099 this.numberOfRowPrefixes = numberOfRowPrefixes; 100 this.averagePrefixLength = averagePrefixLength; 101 this.prefixLengthVariance = prefixLengthVariance; 102 this.averageSuffixLength = averageSuffixLength; 103 this.suffixLengthVariance = suffixLengthVariance; 104 this.numberOfRows = numberOfRows; 105 106 this.chanceForSameQualifier = chanceForSameQualifier; 107 this.chanceForSimilarQualifier = chanceForSimiliarQualifier; 108 this.averageQualifierLength = averageQualifierLength; 109 this.qualifierLengthVariance = qualifierLengthVariance; 110 111 this.columnFamilyLength = columnFamilyLength; 112 this.valueLength = valueLength; 113 this.chanceForZeroValue = chanceForZeroValue; 114 115 this.baseTimestampDivide = baseTimestampDivide; 116 this.timestampDiffSize = timestampDiffSize; 117 } 118 119 /** Used to generate dataset */ 120 private Random randomizer; 121 122 // row settings 123 private byte[] commonPrefix; // global prefix before rowPrefixes 124 private int numberOfRowPrefixes; 125 private int averagePrefixLength; 126 private int prefixLengthVariance; 127 private int averageSuffixLength; 128 private int suffixLengthVariance; 129 private int numberOfRows; 130 131 // family 132 private byte[] family; 133 134 // qualifier 135 private float chanceForSameQualifier; 136 private float chanceForSimilarQualifier; 137 private int averageQualifierLength; 138 private int qualifierLengthVariance; 139 140 private int columnFamilyLength; 141 private int valueLength; 142 private float chanceForZeroValue; 143 144 private int baseTimestampDivide; 145 private int timestampDiffSize; 146 147 private List<byte[]> generateRows() { 148 // generate prefixes 149 List<byte[]> prefixes = new ArrayList<>(); 150 prefixes.add(new byte[0]); 151 for (int i = 1; i < numberOfRowPrefixes; ++i) { 152 int prefixLength = averagePrefixLength; 153 prefixLength += randomizer.nextInt(2 * prefixLengthVariance + 1) - prefixLengthVariance; 154 byte[] newPrefix = new byte[prefixLength]; 155 randomizer.nextBytes(newPrefix); 156 prefixes.add(newPrefix); 157 } 158 159 // generate rest of the row 160 List<byte[]> rows = new ArrayList<>(); 161 for (int i = 0; i < numberOfRows; ++i) { 162 int suffixLength = averageSuffixLength; 163 suffixLength += randomizer.nextInt(2 * suffixLengthVariance + 1) - suffixLengthVariance; 164 int randomPrefix = randomizer.nextInt(prefixes.size()); 165 byte[] row = new byte[prefixes.get(randomPrefix).length + suffixLength]; 166 byte[] rowWithCommonPrefix = Bytes.concat(commonPrefix, row); 167 rows.add(rowWithCommonPrefix); 168 } 169 170 return rows; 171 } 172 173 /** 174 * Generate test data useful to test encoders. 175 * @param howMany How many Key values should be generated. 176 * @return sorted list of key values 177 */ 178 public List<KeyValue> generateTestKeyValues(int howMany) { 179 return generateTestKeyValues(howMany, false); 180 } 181 182 /** 183 * Generate test data useful to test encoders. 184 * @param howMany How many Key values should be generated. 185 * @return sorted list of key values 186 */ 187 public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) { 188 List<KeyValue> result = new ArrayList<>(); 189 190 List<byte[]> rows = generateRows(); 191 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 192 193 if (family == null) { 194 family = new byte[columnFamilyLength]; 195 randomizer.nextBytes(family); 196 } 197 198 long baseTimestamp = randomizer.nextInt(Integer.MAX_VALUE) / baseTimestampDivide; 199 200 byte[] value = new byte[valueLength]; 201 202 for (int i = 0; i < howMany; ++i) { 203 long timestamp = baseTimestamp; 204 if (timestampDiffSize > 0) { 205 timestamp += randomizer.nextInt(timestampDiffSize); 206 } 207 Integer rowId = randomizer.nextInt(rows.size()); 208 byte[] row = rows.get(rowId); 209 210 // generate qualifier, sometimes it is same, sometimes similar, 211 // occasionally completely different 212 byte[] qualifier; 213 float qualifierChance = randomizer.nextFloat(); 214 if ( 215 !rowsToQualifier.containsKey(rowId) 216 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier 217 ) { 218 int qualifierLength = averageQualifierLength; 219 qualifierLength += 220 randomizer.nextInt(2 * qualifierLengthVariance + 1) - qualifierLengthVariance; 221 qualifier = new byte[qualifierLength]; 222 randomizer.nextBytes(qualifier); 223 224 // add it to map 225 if (!rowsToQualifier.containsKey(rowId)) { 226 rowsToQualifier.put(rowId, new ArrayList<>()); 227 } 228 rowsToQualifier.get(rowId).add(qualifier); 229 } else if (qualifierChance > chanceForSameQualifier) { 230 // similar qualifier 231 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 232 byte[] originalQualifier = 233 previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 234 235 qualifier = new byte[originalQualifier.length]; 236 int commonPrefix = randomizer.nextInt(qualifier.length); 237 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 238 for (int j = commonPrefix; j < qualifier.length; ++j) { 239 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 240 } 241 242 rowsToQualifier.get(rowId).add(qualifier); 243 } else { 244 // same qualifier 245 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 246 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 247 } 248 249 if (randomizer.nextFloat() < chanceForZeroValue) { 250 Arrays.fill(value, (byte) 0); 251 } else { 252 randomizer.nextBytes(value); 253 } 254 255 if (useTags) { 256 result.add(new KeyValue(row, family, qualifier, timestamp, value, 257 new Tag[] { new ArrayBackedTag((byte) 1, "value1") })); 258 } else { 259 result.add(new KeyValue(row, family, qualifier, timestamp, value)); 260 } 261 } 262 263 result.sort(CellComparator.getInstance()); 264 265 return result; 266 } 267 268 /** 269 * Generate test data useful to test encoders. 270 * @param howMany How many Key values should be generated. 271 * @return sorted list of key values 272 */ 273 public List<ExtendedCell> generateTestExtendedOffheapKeyValues(int howMany, boolean useTags) { 274 List<ExtendedCell> result = new ArrayList<>(); 275 List<byte[]> rows = generateRows(); 276 Map<Integer, List<byte[]>> rowsToQualifier = new HashMap<>(); 277 278 if (family == null) { 279 family = new byte[columnFamilyLength]; 280 randomizer.nextBytes(family); 281 } 282 283 long baseTimestamp = randomizer.nextInt(Integer.MAX_VALUE) / baseTimestampDivide; 284 285 byte[] value = new byte[valueLength]; 286 287 for (int i = 0; i < howMany; ++i) { 288 long timestamp = baseTimestamp; 289 if (timestampDiffSize > 0) { 290 timestamp += randomizer.nextInt(timestampDiffSize); 291 } 292 Integer rowId = randomizer.nextInt(rows.size()); 293 byte[] row = rows.get(rowId); 294 295 // generate qualifier, sometimes it is same, sometimes similar, 296 // occasionally completely different 297 byte[] qualifier; 298 float qualifierChance = randomizer.nextFloat(); 299 if ( 300 !rowsToQualifier.containsKey(rowId) 301 || qualifierChance > chanceForSameQualifier + chanceForSimilarQualifier 302 ) { 303 int qualifierLength = averageQualifierLength; 304 qualifierLength += 305 randomizer.nextInt(2 * qualifierLengthVariance + 1) - qualifierLengthVariance; 306 qualifier = new byte[qualifierLength]; 307 randomizer.nextBytes(qualifier); 308 309 // add it to map 310 if (!rowsToQualifier.containsKey(rowId)) { 311 rowsToQualifier.put(rowId, new ArrayList<>()); 312 } 313 rowsToQualifier.get(rowId).add(qualifier); 314 } else if (qualifierChance > chanceForSameQualifier) { 315 // similar qualifier 316 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 317 byte[] originalQualifier = 318 previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 319 320 qualifier = new byte[originalQualifier.length]; 321 int commonPrefix = randomizer.nextInt(qualifier.length); 322 System.arraycopy(originalQualifier, 0, qualifier, 0, commonPrefix); 323 for (int j = commonPrefix; j < qualifier.length; ++j) { 324 qualifier[j] = (byte) (randomizer.nextInt() & 0xff); 325 } 326 327 rowsToQualifier.get(rowId).add(qualifier); 328 } else { 329 // same qualifier 330 List<byte[]> previousQualifiers = rowsToQualifier.get(rowId); 331 qualifier = previousQualifiers.get(randomizer.nextInt(previousQualifiers.size())); 332 } 333 334 if (randomizer.nextFloat() < chanceForZeroValue) { 335 Arrays.fill(value, (byte) 0); 336 } else { 337 randomizer.nextBytes(value); 338 } 339 if (useTags) { 340 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value, 341 new Tag[] { new ArrayBackedTag((byte) 1, "value1") }); 342 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 343 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 344 keyValue.getOffset(), keyValue.getLength()); 345 ByteBufferKeyValue offheapKV = 346 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 347 result.add(offheapKV); 348 } else { 349 KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, value); 350 ByteBuffer offheapKVBB = ByteBuffer.allocateDirect(keyValue.getLength()); 351 ByteBufferUtils.copyFromArrayToBuffer(offheapKVBB, keyValue.getBuffer(), 352 keyValue.getOffset(), keyValue.getLength()); 353 ByteBufferKeyValue offheapKV = 354 new ExtendedOffheapKeyValue(offheapKVBB, 0, keyValue.getLength(), 0); 355 result.add(offheapKV); 356 } 357 } 358 359 result.sort(CellComparator.getInstance()); 360 361 return result; 362 } 363 364 static class ExtendedOffheapKeyValue extends ByteBufferKeyValue { 365 public ExtendedOffheapKeyValue(ByteBuffer buf, int offset, int length, long seqId) { 366 super(buf, offset, length, seqId); 367 } 368 369 @Override 370 public byte[] getRowArray() { 371 throw new IllegalArgumentException("getRowArray operation is not allowed"); 372 } 373 374 @Override 375 public int getRowOffset() { 376 throw new IllegalArgumentException("getRowOffset operation is not allowed"); 377 } 378 379 @Override 380 public byte[] getFamilyArray() { 381 throw new IllegalArgumentException("getFamilyArray operation is not allowed"); 382 } 383 384 @Override 385 public int getFamilyOffset() { 386 throw new IllegalArgumentException("getFamilyOffset operation is not allowed"); 387 } 388 389 @Override 390 public byte[] getQualifierArray() { 391 throw new IllegalArgumentException("getQualifierArray operation is not allowed"); 392 } 393 394 @Override 395 public int getQualifierOffset() { 396 throw new IllegalArgumentException("getQualifierOffset operation is not allowed"); 397 } 398 399 @Override 400 public byte[] getValueArray() { 401 throw new IllegalArgumentException("getValueArray operation is not allowed"); 402 } 403 404 @Override 405 public int getValueOffset() { 406 throw new IllegalArgumentException("getValueOffset operation is not allowed"); 407 } 408 409 @Override 410 public byte[] getTagsArray() { 411 throw new IllegalArgumentException("getTagsArray operation is not allowed"); 412 } 413 414 @Override 415 public int getTagsOffset() { 416 throw new IllegalArgumentException("getTagsOffset operation is not allowed"); 417 } 418 } 419 420 /** 421 * Convert list of KeyValues to byte buffer. 422 * @param keyValues list of KeyValues to be converted. 423 * @return buffer with content from key values 424 */ 425 public static ByteBuffer convertKvToByteBuffer(List<KeyValue> keyValues, 426 boolean includesMemstoreTS) { 427 int totalSize = 0; 428 for (KeyValue kv : keyValues) { 429 totalSize += kv.getLength(); 430 if (includesMemstoreTS) { 431 totalSize += WritableUtils.getVIntSize(kv.getSequenceId()); 432 } 433 } 434 435 ByteBuffer result = ByteBuffer.allocate(totalSize); 436 for (KeyValue kv : keyValues) { 437 result.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); 438 if (includesMemstoreTS) { 439 ByteBufferUtils.writeVLong(result, kv.getSequenceId()); 440 } 441 } 442 return result; 443 } 444 445 public RedundantKVGenerator setFamily(byte[] family) { 446 this.family = family; 447 this.columnFamilyLength = family.length; 448 return this; 449 } 450}