001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.io.hfile.HFileBlock.FILL_HEADER; 021 022import java.io.IOException; 023import java.nio.ByteBuffer; 024import java.util.NavigableMap; 025import java.util.NavigableSet; 026import java.util.concurrent.ConcurrentSkipListMap; 027import java.util.concurrent.ConcurrentSkipListSet; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; 030import org.apache.hadoop.hbase.nio.ByteBuff; 031import org.apache.hadoop.hbase.util.Bytes; 032import org.apache.hadoop.hbase.util.ChecksumType; 033import org.apache.hadoop.hbase.util.GsonUtil; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import org.apache.hbase.thirdparty.com.google.gson.Gson; 039import org.apache.hbase.thirdparty.com.google.gson.TypeAdapter; 040import org.apache.hbase.thirdparty.com.google.gson.stream.JsonReader; 041import org.apache.hbase.thirdparty.com.google.gson.stream.JsonWriter; 042 043/** 044 * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches. 045 * No attempt has been made at making this thread safe. 046 */ 047@InterfaceAudience.Private 048public class BlockCacheUtil { 049 050 private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class); 051 052 public static final long NANOS_PER_SECOND = 1000000000; 053 054 /** 055 * Needed generating JSON. 056 */ 057 private static final Gson GSON = GsonUtil.createGson() 058 .registerTypeAdapter(FastLongHistogram.class, new TypeAdapter<FastLongHistogram>() { 059 060 @Override 061 public void write(JsonWriter out, FastLongHistogram value) throws IOException { 062 AgeSnapshot snapshot = new AgeSnapshot(value); 063 out.beginObject(); 064 out.name("mean").value(snapshot.getMean()); 065 out.name("min").value(snapshot.getMin()); 066 out.name("max").value(snapshot.getMax()); 067 out.name("75thPercentile").value(snapshot.get75thPercentile()); 068 out.name("95thPercentile").value(snapshot.get95thPercentile()); 069 out.name("98thPercentile").value(snapshot.get98thPercentile()); 070 out.name("99thPercentile").value(snapshot.get99thPercentile()); 071 out.name("999thPercentile").value(snapshot.get999thPercentile()); 072 out.endObject(); 073 } 074 075 @Override 076 public FastLongHistogram read(JsonReader in) throws IOException { 077 throw new UnsupportedOperationException(); 078 } 079 }).setPrettyPrinting().create(); 080 081 /** Returns The block content as String. */ 082 public static String toString(final CachedBlock cb, final long now) { 083 return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now); 084 } 085 086 /** 087 * Little data structure to hold counts for a file. Used doing a toJSON. 088 */ 089 static class CachedBlockCountsPerFile { 090 private int count = 0; 091 private long size = 0; 092 private int countData = 0; 093 private long sizeData = 0; 094 private final String filename; 095 096 CachedBlockCountsPerFile(final String filename) { 097 this.filename = filename; 098 } 099 100 public int getCount() { 101 return count; 102 } 103 104 public long getSize() { 105 return size; 106 } 107 108 public int getCountData() { 109 return countData; 110 } 111 112 public long getSizeData() { 113 return sizeData; 114 } 115 116 public String getFilename() { 117 return filename; 118 } 119 } 120 121 /** Returns A JSON String of <code>filename</code> and counts of <code>blocks</code> */ 122 public static String toJSON(String filename, NavigableSet<CachedBlock> blocks) 123 throws IOException { 124 CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename); 125 for (CachedBlock cb : blocks) { 126 counts.count++; 127 counts.size += cb.getSize(); 128 BlockType bt = cb.getBlockType(); 129 if (bt != null && bt.isData()) { 130 counts.countData++; 131 counts.sizeData += cb.getSize(); 132 } 133 } 134 return GSON.toJson(counts); 135 } 136 137 /** Returns JSON string of <code>cbsf</code> aggregated */ 138 public static String toJSON(CachedBlocksByFile cbsbf) throws IOException { 139 return GSON.toJson(cbsbf); 140 } 141 142 /** Returns JSON string of <code>bc</code> content. */ 143 public static String toJSON(BlockCache bc) throws IOException { 144 return GSON.toJson(bc); 145 } 146 147 /** Returns The block content of <code>bc</code> as a String minus the filename. */ 148 public static String toStringMinusFileName(final CachedBlock cb, final long now) { 149 return "offset=" + cb.getOffset() + ", size=" + cb.getSize() + ", age=" 150 + (now - cb.getCachedTime()) + ", type=" + cb.getBlockType() + ", priority=" 151 + cb.getBlockPriority(); 152 } 153 154 /** 155 * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in 156 * {@link BlockCache}. 157 * @param conf Used to read configurations 158 * @param bc Block Cache to iterate. 159 * @return Laoded up instance of CachedBlocksByFile 160 */ 161 public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf, 162 final BlockCache bc) { 163 CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf); 164 for (CachedBlock cb : bc) { 165 if (cbsbf.update(cb)) break; 166 } 167 return cbsbf; 168 } 169 170 private static int compareCacheBlock(Cacheable left, Cacheable right, 171 boolean includeNextBlockMetadata) { 172 ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength()); 173 left.serialize(l, includeNextBlockMetadata); 174 ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength()); 175 right.serialize(r, includeNextBlockMetadata); 176 return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(), 177 r.limit()); 178 } 179 180 /** 181 * Validate that the existing and newBlock are the same without including the nextBlockMetadata, 182 * if not, throw an exception. If they are the same without the nextBlockMetadata, return the 183 * comparison. 184 * @param existing block that is existing in the cache. 185 * @param newBlock block that is trying to be cached. 186 * @param cacheKey the cache key of the blocks. 187 * @return comparison of the existing block to the newBlock. 188 */ 189 public static int validateBlockAddition(Cacheable existing, Cacheable newBlock, 190 BlockCacheKey cacheKey) { 191 int comparison = compareCacheBlock(existing, newBlock, false); 192 if (comparison != 0) { 193 throw new RuntimeException( 194 "Cached block contents differ, which should not have happened." + "cacheKey:" + cacheKey); 195 } 196 if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) { 197 comparison = ((HFileBlock) existing).getNextBlockOnDiskSize() 198 - ((HFileBlock) newBlock).getNextBlockOnDiskSize(); 199 } 200 return comparison; 201 } 202 203 /** 204 * Because of the region splitting, it's possible that the split key locate in the middle of a 205 * block. So it's possible that both the daughter regions load the same block from their parent 206 * HFile. When pread, we don't force the read to read all of the next block header. So when two 207 * threads try to cache the same block, it's possible that one thread read all of the next block 208 * header but the other one didn't. if the already cached block hasn't next block header but the 209 * new block to cache has, then we can replace the existing block with the new block for better 210 * performance.(HBASE-20447) 211 * @param blockCache BlockCache to check 212 * @param cacheKey the block cache key 213 * @param newBlock the new block which try to put into the block cache. 214 * @return true means need to replace existing block with new block for the same block cache key. 215 * false means just keep the existing block. 216 */ 217 public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache, 218 BlockCacheKey cacheKey, Cacheable newBlock) { 219 // NOTICE: The getBlock has retained the existingBlock inside. 220 Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false); 221 if (existingBlock == null) { 222 return true; 223 } 224 try { 225 int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey); 226 if (comparison < 0) { 227 LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has " 228 + "nextBlockOnDiskSize set. Caching new block."); 229 return true; 230 } else if (comparison > 0) { 231 LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has " 232 + "nextBlockOnDiskSize set, Keeping cached block."); 233 return false; 234 } else { 235 LOG.debug("Caching an already cached block: {}. This is harmless and can happen in rare " 236 + "cases (see HBASE-8547)", cacheKey); 237 return false; 238 } 239 } finally { 240 // Release this block to decrement the reference count. 241 existingBlock.release(); 242 } 243 } 244 245 private static final int DEFAULT_MAX = 1000000; 246 247 public static int getMaxCachedBlocksByFile(Configuration conf) { 248 return conf == null ? DEFAULT_MAX : conf.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX); 249 } 250 251 /** 252 * Similarly to HFileBlock.Writer.getBlockForCaching(), creates a HFileBlock instance without 253 * checksum for caching. This is needed for when we cache blocks via readers (either prefetch or 254 * client read), otherwise we may fail equality comparison when checking against same block that 255 * may already have been cached at write time. 256 * @param cacheConf the related CacheConfig object. 257 * @param block the HFileBlock instance to be converted. 258 * @return the resulting HFileBlock instance without checksum. 259 */ 260 public static HFileBlock getBlockForCaching(CacheConfig cacheConf, HFileBlock block) { 261 // Calculate how many bytes we need for checksum on the tail of the block. 262 int numBytes = cacheConf.shouldCacheCompressed(block.getBlockType().getCategory()) 263 ? 0 264 : (int) ChecksumUtil.numBytes(block.getOnDiskDataSizeWithHeader(), 265 block.getHFileContext().getBytesPerChecksum()); 266 ByteBuff buff = block.getBufferReadOnly(); 267 HFileBlockBuilder builder = new HFileBlockBuilder(); 268 return builder.withBlockType(block.getBlockType()) 269 .withOnDiskSizeWithoutHeader(block.getOnDiskSizeWithoutHeader()) 270 .withUncompressedSizeWithoutHeader(block.getUncompressedSizeWithoutHeader()) 271 .withPrevBlockOffset(block.getPrevBlockOffset()).withByteBuff(buff) 272 .withFillHeader(FILL_HEADER).withOffset(block.getOffset()).withNextBlockOnDiskSize(-1) 273 .withOnDiskDataSizeWithHeader(block.getOnDiskDataSizeWithHeader() + numBytes) 274 .withHFileContext(cloneContext(block.getHFileContext())) 275 .withByteBuffAllocator(cacheConf.getByteBuffAllocator()).withShared(!buff.hasArray()).build(); 276 } 277 278 public static HFileContext cloneContext(HFileContext context) { 279 HFileContext newContext = new HFileContextBuilder().withBlockSize(context.getBlocksize()) 280 .withBytesPerCheckSum(0).withChecksumType(ChecksumType.NULL) // no checksums in cached data 281 .withCompression(context.getCompression()) 282 .withDataBlockEncoding(context.getDataBlockEncoding()) 283 .withHBaseCheckSum(context.isUseHBaseChecksum()).withCompressTags(context.isCompressTags()) 284 .withIncludesMvcc(context.isIncludesMvcc()).withIncludesTags(context.isIncludesTags()) 285 .withColumnFamily(context.getColumnFamily()).withTableName(context.getTableName()).build(); 286 return newContext; 287 } 288 289 /** 290 * Use one of these to keep a running account of cached blocks by file. Throw it away when done. 291 * This is different than metrics in that it is stats on current state of a cache. See 292 * getLoadedCachedBlocksByFile 293 */ 294 public static class CachedBlocksByFile { 295 private int count; 296 private int dataBlockCount; 297 private long size; 298 private long dataSize; 299 private final long now = System.nanoTime(); 300 /** 301 * How many blocks to look at before we give up. There could be many millions of blocks. We 302 * don't want the ui to freeze while we run through 1B blocks... users will think hbase dead. UI 303 * displays warning in red when stats are incomplete. 304 */ 305 private final int max; 306 307 CachedBlocksByFile() { 308 this(null); 309 } 310 311 CachedBlocksByFile(final Configuration c) { 312 this.max = getMaxCachedBlocksByFile(c); 313 } 314 315 /** 316 * Map by filename. use concurent utils because we want our Map and contained blocks sorted. 317 */ 318 private transient NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile = 319 new ConcurrentSkipListMap<>(); 320 FastLongHistogram hist = new FastLongHistogram(); 321 322 /** Returns True if full.... if we won't be adding any more. */ 323 public boolean update(final CachedBlock cb) { 324 if (isFull()) return true; 325 NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename()); 326 if (set == null) { 327 set = new ConcurrentSkipListSet<>(); 328 this.cachedBlockByFile.put(cb.getFilename(), set); 329 } 330 set.add(cb); 331 this.size += cb.getSize(); 332 this.count++; 333 BlockType bt = cb.getBlockType(); 334 if (bt != null && bt.isData()) { 335 this.dataBlockCount++; 336 this.dataSize += cb.getSize(); 337 } 338 long age = (this.now - cb.getCachedTime()) / NANOS_PER_SECOND; 339 this.hist.add(age, 1); 340 return false; 341 } 342 343 /** 344 * @return True if full; i.e. there are more items in the cache but we only loaded up the 345 * maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code> (Default: 346 * DEFAULT_MAX). 347 */ 348 public boolean isFull() { 349 return this.count >= this.max; 350 } 351 352 public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() { 353 return this.cachedBlockByFile; 354 } 355 356 /** Returns count of blocks in the cache */ 357 public int getCount() { 358 return count; 359 } 360 361 public int getDataCount() { 362 return dataBlockCount; 363 } 364 365 /** Returns size of blocks in the cache */ 366 public long getSize() { 367 return size; 368 } 369 370 /** Returns Size of data. */ 371 public long getDataSize() { 372 return dataSize; 373 } 374 375 public AgeSnapshot getAgeInCacheSnapshot() { 376 return new AgeSnapshot(this.hist); 377 } 378 379 @Override 380 public String toString() { 381 AgeSnapshot snapshot = getAgeInCacheSnapshot(); 382 return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size 383 + ", dataSize=" + getDataSize() + ", mean age=" + snapshot.getMean() + ", min age=" 384 + snapshot.getMin() + ", max age=" + snapshot.getMax() + ", 75th percentile age=" 385 + snapshot.get75thPercentile() + ", 95th percentile age=" + snapshot.get95thPercentile() 386 + ", 98th percentile age=" + snapshot.get98thPercentile() + ", 99th percentile age=" 387 + snapshot.get99thPercentile() + ", 99.9th percentile age=" + snapshot.get99thPercentile(); 388 } 389 } 390}