001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.Random; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.ExtendedCell; 027import org.apache.hadoop.hbase.HBaseTestingUtil; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.KeyValue; 030import org.apache.hadoop.hbase.KeyValueUtil; 031import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 032import org.apache.hadoop.hbase.io.hfile.CacheConfig; 033import org.apache.hadoop.hbase.io.hfile.LruBlockCache; 034 035/** 036 * Test seek performance for encoded data blocks. Read an HFile and do several random seeks. 037 */ 038public class EncodedSeekPerformanceTest { 039 private static final double NANOSEC_IN_SEC = 1000.0 * 1000.0 * 1000.0; 040 private static final double BYTES_IN_MEGABYTES = 1024.0 * 1024.0; 041 /** Default number of seeks which will be used in benchmark. */ 042 public static int DEFAULT_NUMBER_OF_SEEKS = 10000; 043 044 private final HBaseTestingUtil testingUtility = new HBaseTestingUtil(); 045 private Configuration configuration = testingUtility.getConfiguration(); 046 private CacheConfig cacheConf = new CacheConfig(configuration); 047 private Random randomizer; 048 private int numberOfSeeks; 049 050 /** Use this benchmark with default options */ 051 public EncodedSeekPerformanceTest() { 052 configuration.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.5f); 053 randomizer = new Random(42L); 054 numberOfSeeks = DEFAULT_NUMBER_OF_SEEKS; 055 } 056 057 private List<ExtendedCell> prepareListOfTestSeeks(Path path) throws IOException { 058 List<ExtendedCell> allKeyValues = new ArrayList<>(); 059 060 // read all of the key values 061 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(configuration, 062 testingUtility.getTestFileSystem(), path, true); 063 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 064 storeFile.initReader(); 065 StoreFileReader reader = storeFile.getReader(); 066 StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false); 067 ExtendedCell current; 068 069 scanner.seek(KeyValue.LOWESTKEY); 070 while (null != (current = scanner.next())) { 071 allKeyValues.add(current); 072 } 073 074 storeFile.closeStoreFile(cacheConf.shouldEvictOnClose()); 075 076 // pick seeks by random 077 List<ExtendedCell> seeks = new ArrayList<>(); 078 for (int i = 0; i < numberOfSeeks; ++i) { 079 ExtendedCell keyValue = allKeyValues.get(randomizer.nextInt(allKeyValues.size())); 080 seeks.add(keyValue); 081 } 082 083 clearBlockCache(); 084 085 return seeks; 086 } 087 088 private void runTest(Path path, DataBlockEncoding blockEncoding, List<ExtendedCell> seeks) 089 throws IOException { 090 // read all of the key values 091 StoreFileInfo storeFileInfo = StoreFileInfo.createStoreFileInfoForHFile(configuration, 092 testingUtility.getTestFileSystem(), path, true); 093 HStoreFile storeFile = new HStoreFile(storeFileInfo, BloomType.NONE, cacheConf); 094 storeFile.initReader(); 095 long totalSize = 0; 096 097 StoreFileReader reader = storeFile.getReader(); 098 StoreFileScanner scanner = reader.getStoreFileScanner(true, false, false, 0, 0, false); 099 100 long startReadingTime = System.nanoTime(); 101 ExtendedCell current; 102 scanner.seek(KeyValue.LOWESTKEY); 103 while (null != (current = scanner.next())) { // just iterate it! 104 if (KeyValueUtil.ensureKeyValue(current).getLength() < 0) { 105 throw new IOException("Negative KV size: " + current); 106 } 107 totalSize += KeyValueUtil.ensureKeyValue(current).getLength(); 108 } 109 long finishReadingTime = System.nanoTime(); 110 111 // do seeks 112 long startSeeksTime = System.nanoTime(); 113 for (ExtendedCell keyValue : seeks) { 114 scanner.seek(keyValue); 115 ExtendedCell toVerify = scanner.next(); 116 if (!keyValue.equals(toVerify)) { 117 System.out 118 .println(String.format("KeyValue doesn't match:\n" + "Orig key: %s\n" + "Ret key: %s", 119 KeyValueUtil.ensureKeyValue(keyValue).getKeyString(), 120 KeyValueUtil.ensureKeyValue(toVerify).getKeyString())); 121 break; 122 } 123 } 124 long finishSeeksTime = System.nanoTime(); 125 if (finishSeeksTime < startSeeksTime) { 126 throw new AssertionError( 127 "Finish time " + finishSeeksTime + " is earlier than start time " + startSeeksTime); 128 } 129 130 // write some stats 131 double readInMbPerSec = 132 (totalSize * NANOSEC_IN_SEC) / (BYTES_IN_MEGABYTES * (finishReadingTime - startReadingTime)); 133 double seeksPerSec = (seeks.size() * NANOSEC_IN_SEC) / (finishSeeksTime - startSeeksTime); 134 135 storeFile.closeStoreFile(cacheConf.shouldEvictOnClose()); 136 clearBlockCache(); 137 138 System.out.println(blockEncoding); 139 System.out.printf(" Read speed: %8.2f (MB/s)\n", readInMbPerSec); 140 System.out.printf(" Seeks per second: %8.2f (#/s)\n", seeksPerSec); 141 System.out.printf(" Total KV size: %d\n", totalSize); 142 } 143 144 /** 145 * @param path Path to the HFile which will be used. 146 * @param encodings the data block encoding algorithms to use 147 * @throws IOException if there is a bug while reading from disk 148 */ 149 public void runTests(Path path, DataBlockEncoding[] encodings) throws IOException { 150 List<ExtendedCell> seeks = prepareListOfTestSeeks(path); 151 152 for (DataBlockEncoding blockEncoding : encodings) { 153 runTest(path, blockEncoding, seeks); 154 } 155 } 156 157 /** 158 * Command line interface: 159 * @param args Takes one argument - file size. 160 * @throws IOException if there is a bug while reading from disk 161 */ 162 public static void main(final String[] args) throws IOException { 163 if (args.length < 1) { 164 printUsage(); 165 System.exit(-1); 166 } 167 168 Path path = new Path(args[0]); 169 170 // TODO, this test doesn't work as expected any more. Need to fix. 171 EncodedSeekPerformanceTest utility = new EncodedSeekPerformanceTest(); 172 utility.runTests(path, DataBlockEncoding.values()); 173 174 System.exit(0); 175 } 176 177 private static void printUsage() { 178 System.out.println("Usage: one argument, name of the HFile"); 179 } 180 181 private void clearBlockCache() { 182 ((LruBlockCache) cacheConf.getBlockCache().get()).clearCache(); 183 } 184}