001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import java.util.Random;
021import org.apache.yetus.audience.InterfaceAudience;
022import org.slf4j.Logger;
023import org.slf4j.LoggerFactory;
024
025/**
026 * A generator of random keys and values for load testing. Keys are generated by converting numeric
027 * indexes to strings and prefixing them with an MD5 hash. Values are generated by selecting value
028 * size in the configured range and generating a pseudo-random sequence of bytes seeded by key,
029 * column qualifier, and value size.
030 */
031@InterfaceAudience.Private
032public class LoadTestKVGenerator {
033
034  private static final Logger LOG = LoggerFactory.getLogger(LoadTestKVGenerator.class);
035  private static int logLimit = 10;
036
037  /** A random number generator for determining value size */
038  private Random randomForValueSize = new Random(); // Seed may be set with Random#setSeed
039
040  private final int minValueSize;
041  private final int maxValueSize;
042
043  public LoadTestKVGenerator(int minValueSize, int maxValueSize) {
044    if (minValueSize <= 0 || maxValueSize <= 0) {
045      throw new IllegalArgumentException(
046        "Invalid min/max value sizes: " + minValueSize + ", " + maxValueSize);
047    }
048    this.minValueSize = minValueSize;
049    this.maxValueSize = maxValueSize;
050  }
051
052  /**
053   * Verifies that the given byte array is the same as what would be generated for the given seed
054   * strings (row/cf/column/...). We are assuming that the value size is correct, and only verify
055   * the actual bytes. However, if the min/max value sizes are set sufficiently high, an accidental
056   * match should be extremely improbable.
057   */
058  public static boolean verify(byte[] value, byte[]... seedStrings) {
059    byte[] expectedData = getValueForRowColumn(value.length, seedStrings);
060    boolean equals = Bytes.equals(expectedData, value);
061    if (!equals && LOG.isDebugEnabled() && logLimit > 0) {
062      LOG.debug("verify failed, expected value: " + Bytes.toStringBinary(expectedData)
063        + " actual value: " + Bytes.toStringBinary(value));
064      logLimit--; // this is not thread safe, but at worst we will have more logging
065    }
066    return equals;
067  }
068
069  /**
070   * Converts the given key to string, and prefixes it with the MD5 hash of the index's string
071   * representation.
072   */
073  public static String md5PrefixedKey(long key) {
074    String stringKey = Long.toString(key);
075    String md5hash = MD5Hash.getMD5AsHex(Bytes.toBytes(stringKey));
076
077    // flip the key to randomize
078    return md5hash + "-" + stringKey;
079  }
080
081  /**
082   * Generates a value for the given key index and column qualifier. Size is selected randomly in
083   * the configured range. The generated value depends only on the combination of the strings passed
084   * (key/cf/column/...) and the selected value size. This allows to verify the actual value bytes
085   * when reading, as done in {#verify(byte[], byte[]...)} This method is as thread-safe as Random
086   * class. It appears that the worst bug ever found with the latter is that multiple threads will
087   * get some duplicate values, which we don't care about.
088   */
089  public byte[] generateRandomSizeValue(byte[]... seedStrings) {
090    int dataSize = minValueSize;
091    if (minValueSize != maxValueSize) {
092      dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
093    }
094    return getValueForRowColumn(dataSize, seedStrings);
095  }
096
097  /**
098   * Generates random bytes of the given size for the given row and column qualifier. The random
099   * seed is fully determined by these parameters.
100   */
101  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "DMI_RANDOM_USED_ONLY_ONCE")
102  private static byte[] getValueForRowColumn(int dataSize, byte[]... seedStrings) {
103    long seed = dataSize;
104    for (byte[] str : seedStrings) {
105      final String bytesString = Bytes.toString(str);
106      if (bytesString != null) {
107        seed += bytesString.hashCode();
108      }
109    }
110    Random seededRandom = new Random(seed);
111    byte[] randomBytes = new byte[dataSize];
112    seededRandom.nextBytes(randomBytes);
113    return randomBytes;
114  }
115
116}