001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static com.codahale.metrics.MetricRegistry.name; 021 022import com.codahale.metrics.ConsoleReporter; 023import com.codahale.metrics.Histogram; 024import com.codahale.metrics.MetricRegistry; 025import com.codahale.metrics.Snapshot; 026import com.codahale.metrics.UniformReservoir; 027import java.io.ByteArrayOutputStream; 028import java.io.DataInput; 029import java.io.IOException; 030import java.io.PrintStream; 031import java.text.DateFormat; 032import java.util.ArrayList; 033import java.util.Arrays; 034import java.util.HashMap; 035import java.util.Iterator; 036import java.util.LinkedHashSet; 037import java.util.List; 038import java.util.Locale; 039import java.util.Map; 040import java.util.Optional; 041import java.util.Set; 042import java.util.TimeZone; 043import java.util.concurrent.atomic.LongAdder; 044import org.apache.hadoop.conf.Configuration; 045import org.apache.hadoop.conf.Configured; 046import org.apache.hadoop.fs.FileSystem; 047import org.apache.hadoop.fs.Path; 048import org.apache.hadoop.hbase.Cell; 049import org.apache.hadoop.hbase.CellComparator; 050import org.apache.hadoop.hbase.CellUtil; 051import org.apache.hadoop.hbase.HBaseConfiguration; 052import org.apache.hadoop.hbase.HBaseInterfaceAudience; 053import org.apache.hadoop.hbase.HConstants; 054import org.apache.hadoop.hbase.HRegionInfo; 055import org.apache.hadoop.hbase.KeyValue; 056import org.apache.hadoop.hbase.KeyValueUtil; 057import org.apache.hadoop.hbase.PrivateCellUtil; 058import org.apache.hadoop.hbase.TableName; 059import org.apache.hadoop.hbase.Tag; 060import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 061import org.apache.hadoop.hbase.mob.MobUtils; 062import org.apache.hadoop.hbase.regionserver.HStoreFile; 063import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; 064import org.apache.hadoop.hbase.util.BloomFilter; 065import org.apache.hadoop.hbase.util.BloomFilterFactory; 066import org.apache.hadoop.hbase.util.BloomFilterUtil; 067import org.apache.hadoop.hbase.util.Bytes; 068import org.apache.hadoop.hbase.util.CommonFSUtils; 069import org.apache.hadoop.hbase.util.HFileArchiveUtil; 070import org.apache.hadoop.util.Tool; 071import org.apache.hadoop.util.ToolRunner; 072import org.apache.yetus.audience.InterfaceAudience; 073import org.apache.yetus.audience.InterfaceStability; 074import org.slf4j.Logger; 075import org.slf4j.LoggerFactory; 076 077import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 078import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 079import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 080import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 081import org.apache.hbase.thirdparty.org.apache.commons.cli.OptionGroup; 082import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 083import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException; 084import org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser; 085 086/** 087 * Implements pretty-printing functionality for {@link HFile}s. 088 */ 089@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) 090@InterfaceStability.Evolving 091public class HFilePrettyPrinter extends Configured implements Tool { 092 093 private static final Logger LOG = LoggerFactory.getLogger(HFilePrettyPrinter.class); 094 095 private Options options = new Options(); 096 097 private boolean verbose; 098 private boolean printValue; 099 private boolean printKey; 100 private boolean shouldPrintMeta; 101 private boolean printBlockIndex; 102 private boolean printBlockHeaders; 103 private boolean printStats; 104 private boolean printStatRanges; 105 private boolean checkRow; 106 private boolean checkFamily; 107 private boolean isSeekToRow = false; 108 private boolean checkMobIntegrity = false; 109 private Map<String, List<Path>> mobFileLocations; 110 private static final int FOUND_MOB_FILES_CACHE_CAPACITY = 50; 111 private static final int MISSING_MOB_FILES_CACHE_CAPACITY = 20; 112 private PrintStream out = System.out; 113 private PrintStream err = System.err; 114 115 /** 116 * The row which the user wants to specify and print all the KeyValues for. 117 */ 118 private byte[] row = null; 119 120 private List<Path> files = new ArrayList<>(); 121 private int count; 122 123 private static final String FOUR_SPACES = " "; 124 125 public HFilePrettyPrinter() { 126 super(); 127 init(); 128 } 129 130 public HFilePrettyPrinter(Configuration conf) { 131 super(conf); 132 init(); 133 } 134 135 private void init() { 136 options.addOption("v", "verbose", false, "Verbose output; emits file and meta data delimiters"); 137 options.addOption("p", "printkv", false, "Print key/value pairs"); 138 options.addOption("e", "printkey", false, "Print keys"); 139 options.addOption("m", "printmeta", false, "Print meta data of file"); 140 options.addOption("b", "printblocks", false, "Print block index meta data"); 141 options.addOption("h", "printblockheaders", false, "Print block headers for each block."); 142 options.addOption("k", "checkrow", false, 143 "Enable row order check; looks for out-of-order keys"); 144 options.addOption("a", "checkfamily", false, "Enable family check"); 145 options.addOption("w", "seekToRow", true, 146 "Seek to this row and print all the kvs for this row only"); 147 options.addOption("s", "stats", false, "Print statistics"); 148 options.addOption("d", "details", false, 149 "Print detailed statistics, including counts by range"); 150 options.addOption("i", "checkMobIntegrity", false, 151 "Print all cells whose mob files are missing"); 152 153 OptionGroup files = new OptionGroup(); 154 files.addOption(new Option("f", "file", true, 155 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34")); 156 files.addOption( 157 new Option("r", "region", true, "Region to scan. Pass region name; e.g. 'hbase:meta,,1'")); 158 options.addOptionGroup(files); 159 } 160 161 public void setPrintStreams(PrintStream out, PrintStream err) { 162 this.out = out; 163 this.err = err; 164 } 165 166 public boolean parseOptions(String args[]) throws ParseException, IOException { 167 if (args.length == 0) { 168 HelpFormatter formatter = new HelpFormatter(); 169 formatter.printHelp("hfile", options, true); 170 return false; 171 } 172 CommandLineParser parser = new PosixParser(); 173 CommandLine cmd = parser.parse(options, args); 174 175 verbose = cmd.hasOption("v"); 176 printValue = cmd.hasOption("p"); 177 printKey = cmd.hasOption("e") || printValue; 178 shouldPrintMeta = cmd.hasOption("m"); 179 printBlockIndex = cmd.hasOption("b"); 180 printBlockHeaders = cmd.hasOption("h"); 181 printStatRanges = cmd.hasOption("d"); 182 printStats = cmd.hasOption("s") || printStatRanges; 183 checkRow = cmd.hasOption("k"); 184 checkFamily = cmd.hasOption("a"); 185 checkMobIntegrity = cmd.hasOption("i"); 186 187 if (cmd.hasOption("f")) { 188 files.add(new Path(cmd.getOptionValue("f"))); 189 } 190 191 if (cmd.hasOption("w")) { 192 String key = cmd.getOptionValue("w"); 193 if (key != null && key.length() != 0) { 194 row = Bytes.toBytesBinary(key); 195 isSeekToRow = true; 196 } else { 197 err.println("Invalid row is specified."); 198 System.exit(-1); 199 } 200 } 201 202 if (cmd.hasOption("r")) { 203 String regionName = cmd.getOptionValue("r"); 204 byte[] rn = Bytes.toBytes(regionName); 205 byte[][] hri = HRegionInfo.parseRegionName(rn); 206 Path rootDir = CommonFSUtils.getRootDir(getConf()); 207 Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.valueOf(hri[0])); 208 String enc = HRegionInfo.encodeRegionName(rn); 209 Path regionDir = new Path(tableDir, enc); 210 if (verbose) out.println("region dir -> " + regionDir); 211 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()), regionDir); 212 if (verbose) out.println("Number of region files found -> " + regionFiles.size()); 213 if (verbose) { 214 int i = 1; 215 for (Path p : regionFiles) { 216 if (verbose) out.println("Found file[" + i++ + "] -> " + p); 217 } 218 } 219 files.addAll(regionFiles); 220 } 221 222 if (checkMobIntegrity) { 223 if (verbose) { 224 System.out.println("checkMobIntegrity is enabled"); 225 } 226 mobFileLocations = new HashMap<>(); 227 } 228 229 cmd.getArgList().forEach((file) -> files.add(new Path(file))); 230 231 return true; 232 } 233 234 /** 235 * Runs the command-line pretty-printer, and returns the desired command exit code (zero for 236 * success, non-zero for failure). 237 */ 238 @Override 239 public int run(String[] args) { 240 if (getConf() == null) { 241 throw new RuntimeException("A Configuration instance must be provided."); 242 } 243 try { 244 CommonFSUtils.setFsDefault(getConf(), CommonFSUtils.getRootDir(getConf())); 245 if (!parseOptions(args)) { 246 return 1; 247 } 248 } catch (IOException ex) { 249 LOG.error("Error parsing command-line options", ex); 250 return 1; 251 } catch (ParseException ex) { 252 LOG.error("Error parsing command-line options", ex); 253 return 1; 254 } 255 256 // iterate over all files found 257 for (Path fileName : files) { 258 try { 259 int exitCode = processFile(fileName, false); 260 if (exitCode != 0) { 261 return exitCode; 262 } 263 } catch (IOException ex) { 264 LOG.error("Error reading " + fileName, ex); 265 return -2; 266 } 267 } 268 269 if (verbose || printKey) { 270 out.println("Scanned kv count -> " + count); 271 } 272 273 return 0; 274 } 275 276 // HBASE-22561 introduces boolean checkRootDir for WebUI specificly 277 public int processFile(Path file, boolean checkRootDir) throws IOException { 278 if (verbose) { 279 out.println("Scanning -> " + file); 280 } 281 282 if (checkRootDir) { 283 Path rootPath = CommonFSUtils.getRootDir(getConf()); 284 String rootString = rootPath + Path.SEPARATOR; 285 if (!file.toString().startsWith(rootString)) { 286 // First we see if fully-qualified URI matches the root dir. It might 287 // also be an absolute path in the same filesystem, so we prepend the FS 288 // of the root dir and see if that fully-qualified URI matches. 289 FileSystem rootFS = rootPath.getFileSystem(getConf()); 290 String qualifiedFile = rootFS.getUri().toString() + file.toString(); 291 if (!qualifiedFile.startsWith(rootString)) { 292 err.println( 293 "ERROR, file (" + file + ") is not in HBase's root directory (" + rootString + ")"); 294 return -2; 295 } 296 } 297 } 298 299 FileSystem fs = file.getFileSystem(getConf()); 300 if (!fs.exists(file)) { 301 err.println("ERROR, file doesnt exist: " + file); 302 return -2; 303 } 304 305 HFile.Reader reader = HFile.createReader(fs, file, CacheConfig.DISABLED, true, getConf()); 306 307 Map<byte[], byte[]> fileInfo = reader.getHFileInfo(); 308 309 KeyValueStatsCollector fileStats = null; 310 311 if (verbose || printKey || checkRow || checkFamily || printStats || checkMobIntegrity) { 312 // scan over file and read key/value's and check if requested 313 HFileScanner scanner = reader.getScanner(getConf(), false, false, false); 314 fileStats = new KeyValueStatsCollector(); 315 boolean shouldScanKeysValues; 316 if (this.isSeekToRow && !Bytes.equals(row, reader.getFirstRowKey().orElse(null))) { 317 // seek to the first kv on this row 318 shouldScanKeysValues = (scanner.seekTo(PrivateCellUtil.createFirstOnRow(this.row)) != -1); 319 } else { 320 shouldScanKeysValues = scanner.seekTo(); 321 } 322 if (shouldScanKeysValues) { 323 scanKeysValues(file, fileStats, scanner, row); 324 } 325 } 326 327 // print meta data 328 if (shouldPrintMeta) { 329 printMeta(reader, fileInfo); 330 } 331 332 if (printBlockIndex) { 333 out.println("Block Index:"); 334 out.println(reader.getDataBlockIndexReader()); 335 } 336 337 if (printBlockHeaders) { 338 out.println("Block Headers:"); 339 /* 340 * TODO: this same/similar block iteration logic is used in HFileBlock#blockRange and 341 * TestLazyDataBlockDecompression. Refactor? 342 */ 343 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file); 344 long fileSize = fs.getFileStatus(file).getLen(); 345 FixedFileTrailer trailer = FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize); 346 long offset = trailer.getFirstDataBlockOffset(), max = trailer.getLastDataBlockOffset(); 347 HFileBlock block; 348 while (offset <= max) { 349 block = reader.readBlock(offset, -1, /* cacheBlock */ false, /* pread */ false, 350 /* isCompaction */ false, /* updateCacheMetrics */ false, null, null); 351 offset += block.getOnDiskSizeWithHeader(); 352 out.println(block); 353 } 354 } 355 356 if (printStats) { 357 fileStats.finish(printStatRanges); 358 out.println("Stats:\n" + fileStats); 359 } 360 361 reader.close(); 362 return 0; 363 } 364 365 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats, HFileScanner scanner, 366 byte[] row) throws IOException { 367 Cell pCell = null; 368 FileSystem fs = FileSystem.get(getConf()); 369 Set<String> foundMobFiles = new LinkedHashSet<>(FOUND_MOB_FILES_CACHE_CAPACITY); 370 Set<String> missingMobFiles = new LinkedHashSet<>(MISSING_MOB_FILES_CACHE_CAPACITY); 371 do { 372 Cell cell = scanner.getCell(); 373 if (row != null && row.length != 0) { 374 int result = CellComparator.getInstance().compareRows(cell, row, 0, row.length); 375 if (result > 0) { 376 break; 377 } else if (result < 0) { 378 continue; 379 } 380 } 381 // collect stats 382 if (printStats) { 383 fileStats.collect(cell, printStatRanges); 384 } 385 // dump key value 386 if (printKey) { 387 out.print("K: " + cell); 388 if (printValue) { 389 out.print(" V: " + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(), 390 cell.getValueLength())); 391 int i = 0; 392 List<Tag> tags = PrivateCellUtil.getTags(cell); 393 for (Tag tag : tags) { 394 out.print(String.format(" T[%d]: %s", i++, tag.toString())); 395 } 396 } 397 out.println(); 398 } 399 // check if rows are in order 400 if (checkRow && pCell != null) { 401 if (CellComparator.getInstance().compareRows(pCell, cell) > 0) { 402 err.println("WARNING, previous row is greater then" + " current row\n\tfilename -> " 403 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 404 + CellUtil.getCellKeyAsString(cell)); 405 } 406 } 407 // check if families are consistent 408 if (checkFamily) { 409 String fam = 410 Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); 411 if (!file.toString().contains(fam)) { 412 err.println("WARNING, filename does not match kv family," + "\n\tfilename -> " + file 413 + "\n\tkeyvalue -> " + CellUtil.getCellKeyAsString(cell)); 414 } 415 if (pCell != null && CellComparator.getInstance().compareFamilies(pCell, cell) != 0) { 416 err.println( 417 "WARNING, previous kv has different family" + " compared to current key\n\tfilename -> " 418 + file + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> " 419 + CellUtil.getCellKeyAsString(cell)); 420 } 421 } 422 // check if mob files are missing. 423 if (checkMobIntegrity && MobUtils.isMobReferenceCell(cell)) { 424 Optional<TableName> tn = MobUtils.getTableName(cell); 425 if (!tn.isPresent()) { 426 System.err.println( 427 "ERROR, wrong tag format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 428 } else if (!MobUtils.hasValidMobRefCellValue(cell)) { 429 System.err.println( 430 "ERROR, wrong value format in mob reference cell " + CellUtil.getCellKeyAsString(cell)); 431 } else { 432 String mobFileName = MobUtils.getMobFileName(cell); 433 boolean exist = mobFileExists(fs, tn.get(), mobFileName, 434 Bytes.toString(CellUtil.cloneFamily(cell)), foundMobFiles, missingMobFiles); 435 if (!exist) { 436 // report error 437 System.err.println("ERROR, the mob file [" + mobFileName 438 + "] is missing referenced by cell " + CellUtil.getCellKeyAsString(cell)); 439 } 440 } 441 } 442 pCell = cell; 443 ++count; 444 } while (scanner.next()); 445 } 446 447 /** 448 * Checks whether the referenced mob file exists. 449 */ 450 private boolean mobFileExists(FileSystem fs, TableName tn, String mobFileName, String family, 451 Set<String> foundMobFiles, Set<String> missingMobFiles) throws IOException { 452 if (foundMobFiles.contains(mobFileName)) { 453 return true; 454 } 455 if (missingMobFiles.contains(mobFileName)) { 456 return false; 457 } 458 String tableName = tn.getNameAsString(); 459 List<Path> locations = mobFileLocations.get(tableName); 460 if (locations == null) { 461 locations = new ArrayList<>(2); 462 locations.add(MobUtils.getMobFamilyPath(getConf(), tn, family)); 463 locations.add(HFileArchiveUtil.getStoreArchivePath(getConf(), tn, 464 MobUtils.getMobRegionInfo(tn).getEncodedName(), family)); 465 mobFileLocations.put(tn.getNameAsString(), locations); 466 } 467 boolean exist = false; 468 for (Path location : locations) { 469 Path mobFilePath = new Path(location, mobFileName); 470 if (fs.exists(mobFilePath)) { 471 exist = true; 472 break; 473 } 474 } 475 if (exist) { 476 evictMobFilesIfNecessary(foundMobFiles, FOUND_MOB_FILES_CACHE_CAPACITY); 477 foundMobFiles.add(mobFileName); 478 } else { 479 evictMobFilesIfNecessary(missingMobFiles, MISSING_MOB_FILES_CACHE_CAPACITY); 480 missingMobFiles.add(mobFileName); 481 } 482 return exist; 483 } 484 485 /** 486 * Evicts the cached mob files if the set is larger than the limit. 487 */ 488 private void evictMobFilesIfNecessary(Set<String> mobFileNames, int limit) { 489 if (mobFileNames.size() < limit) { 490 return; 491 } 492 int index = 0; 493 int evict = limit / 2; 494 Iterator<String> fileNamesItr = mobFileNames.iterator(); 495 while (index < evict && fileNamesItr.hasNext()) { 496 fileNamesItr.next(); 497 fileNamesItr.remove(); 498 index++; 499 } 500 } 501 502 /** 503 * Format a string of the form "k1=v1, k2=v2, ..." into separate lines with a four-space 504 * indentation. 505 */ 506 private static String asSeparateLines(String keyValueStr) { 507 return keyValueStr.replaceAll(", ([a-zA-Z]+=)", ",\n" + FOUR_SPACES + "$1"); 508 } 509 510 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo) throws IOException { 511 out.println("Block index size as per heapsize: " + reader.indexSize()); 512 out.println(asSeparateLines(reader.toString())); 513 out.println("Trailer:\n " + asSeparateLines(reader.getTrailer().toString())); 514 out.println("Fileinfo:"); 515 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) { 516 out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = "); 517 if ( 518 Bytes.equals(e.getKey(), HStoreFile.MAX_SEQ_ID_KEY) 519 || Bytes.equals(e.getKey(), HStoreFile.DELETE_FAMILY_COUNT) 520 || Bytes.equals(e.getKey(), HStoreFile.EARLIEST_PUT_TS) 521 || Bytes.equals(e.getKey(), HFileWriterImpl.MAX_MEMSTORE_TS_KEY) 522 || Bytes.equals(e.getKey(), HFileInfo.CREATE_TIME_TS) 523 || Bytes.equals(e.getKey(), HStoreFile.BULKLOAD_TIME_KEY) 524 ) { 525 out.println(Bytes.toLong(e.getValue())); 526 } else if (Bytes.equals(e.getKey(), HStoreFile.TIMERANGE_KEY)) { 527 TimeRangeTracker timeRangeTracker = TimeRangeTracker.parseFrom(e.getValue()); 528 out.println(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax()); 529 } else if ( 530 Bytes.equals(e.getKey(), HFileInfo.AVG_KEY_LEN) 531 || Bytes.equals(e.getKey(), HFileInfo.AVG_VALUE_LEN) 532 || Bytes.equals(e.getKey(), HFileWriterImpl.KEY_VALUE_VERSION) 533 || Bytes.equals(e.getKey(), HFileInfo.MAX_TAGS_LEN) 534 ) { 535 out.println(Bytes.toInt(e.getValue())); 536 } else if ( 537 Bytes.equals(e.getKey(), HStoreFile.MAJOR_COMPACTION_KEY) 538 || Bytes.equals(e.getKey(), HFileInfo.TAGS_COMPRESSED) 539 || Bytes.equals(e.getKey(), HStoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY) 540 ) { 541 out.println(Bytes.toBoolean(e.getValue())); 542 } else if (Bytes.equals(e.getKey(), HFileInfo.LASTKEY)) { 543 out.println(new KeyValue.KeyOnlyKeyValue(e.getValue()).toString()); 544 } else { 545 out.println(Bytes.toStringBinary(e.getValue())); 546 } 547 } 548 549 try { 550 out.println("Mid-key: " + reader.midKey().map(CellUtil::getCellKeyAsString)); 551 } catch (Exception e) { 552 out.println("Unable to retrieve the midkey"); 553 } 554 555 // Printing general bloom information 556 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata(); 557 BloomFilter bloomFilter = null; 558 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 559 560 out.println("Bloom filter:"); 561 if (bloomFilter != null) { 562 out.println(FOUR_SPACES 563 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 564 } else { 565 out.println(FOUR_SPACES + "Not present"); 566 } 567 568 // Printing delete bloom information 569 bloomMeta = reader.getDeleteBloomFilterMetadata(); 570 bloomFilter = null; 571 if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); 572 573 out.println("Delete Family Bloom filter:"); 574 if (bloomFilter != null) { 575 out.println(FOUR_SPACES 576 + bloomFilter.toString().replaceAll(BloomFilterUtil.STATS_RECORD_SEP, "\n" + FOUR_SPACES)); 577 } else { 578 out.println(FOUR_SPACES + "Not present"); 579 } 580 } 581 582 // Default reservoir is exponentially decaying, but we're doing a point-in-time analysis 583 // of a store file. It doesn't make sense to prefer keys later in the store file. 584 private static final MetricRegistry.MetricSupplier<Histogram> UNIFORM_RESERVOIR = 585 () -> new Histogram(new UniformReservoir()); 586 587 // Useful ranges for viewing distribution of small to large keys, values, and rows. 588 // we only print ranges which actually have values, so more here doesn't add much overhead 589 private static final long[] RANGES = new long[] { 1, 3, 10, 50, 100, 500, 1_000, 5_000, 10_000, 590 50_000, 100_000, 500_000, 750_000, 1_000_000, 5_000_000, 10_000_000, 50_000_000, 100_000_000 }; 591 592 /** 593 * Holds a Histogram and supporting min/max and range buckets for analyzing distribution of key 594 * bytes, value bytes, row bytes, and row columns. Supports adding values, getting the histogram, 595 * and getting counts per range. 596 */ 597 static class KeyValueStats { 598 private final Histogram histogram; 599 private final String name; 600 private long max = Long.MIN_VALUE; 601 private long min = Long.MAX_VALUE; 602 private boolean collectRanges = false; 603 private final LongAdder[] rangeCounts; 604 605 KeyValueStats(MetricRegistry metricRegistry, String statName) { 606 this.histogram = 607 metricRegistry.histogram(name(HFilePrettyPrinter.class, statName), UNIFORM_RESERVOIR); 608 this.name = statName; 609 this.rangeCounts = new LongAdder[RANGES.length]; 610 for (int i = 0; i < rangeCounts.length; i++) { 611 rangeCounts[i] = new LongAdder(); 612 } 613 } 614 615 void update(long value, boolean collectRanges) { 616 histogram.update(value); 617 min = Math.min(value, min); 618 max = Math.max(value, max); 619 620 if (collectRanges) { 621 this.collectRanges = true; 622 int result = Arrays.binarySearch(RANGES, value); 623 int idx = result >= 0 ? result : Math.abs(result) - 1; 624 rangeCounts[idx].increment(); 625 } 626 } 627 628 Histogram getHistogram() { 629 return histogram; 630 } 631 632 String getName() { 633 return name; 634 } 635 636 long getMax() { 637 return max; 638 } 639 640 long getMin() { 641 return min; 642 } 643 644 long[] getRanges() { 645 return RANGES; 646 } 647 648 long getCountAtOrBelow(long range) { 649 long count = 0; 650 for (int i = 0; i < RANGES.length; i++) { 651 if (RANGES[i] <= range) { 652 count += rangeCounts[i].sum(); 653 } else { 654 break; 655 } 656 } 657 return count; 658 } 659 660 boolean hasRangeCounts() { 661 return collectRanges; 662 } 663 } 664 665 private static class KeyValueStatsCollector { 666 private final MetricRegistry metricsRegistry = new MetricRegistry(); 667 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream(); 668 669 KeyValueStats keyLen = new KeyValueStats(metricsRegistry, "Key length"); 670 KeyValueStats valLen = new KeyValueStats(metricsRegistry, "Val length"); 671 KeyValueStats rowSizeBytes = new KeyValueStats(metricsRegistry, "Row size (bytes)"); 672 KeyValueStats rowSizeCols = new KeyValueStats(metricsRegistry, "Row size (columns)"); 673 674 private final SimpleReporter simpleReporter = 675 SimpleReporter.newBuilder().outputTo(new PrintStream(metricsOutput)).addStats(keyLen) 676 .addStats(valLen).addStats(rowSizeBytes).addStats(rowSizeCols).build(); 677 678 long curRowBytes = 0; 679 long curRowCols = 0; 680 681 byte[] biggestRow = null; 682 683 private Cell prevCell = null; 684 private long maxRowBytes = 0; 685 private long curRowKeyLength; 686 687 public void collect(Cell cell, boolean printStatRanges) { 688 valLen.update(cell.getValueLength(), printStatRanges); 689 if (prevCell != null && CellComparator.getInstance().compareRows(prevCell, cell) != 0) { 690 // new row 691 collectRow(printStatRanges); 692 } 693 curRowBytes += cell.getSerializedSize(); 694 curRowKeyLength = KeyValueUtil.keyLength(cell); 695 curRowCols++; 696 prevCell = cell; 697 } 698 699 private void collectRow(boolean printStatRanges) { 700 rowSizeBytes.update(curRowBytes, printStatRanges); 701 rowSizeCols.update(curRowCols, printStatRanges); 702 keyLen.update(curRowKeyLength, printStatRanges); 703 704 if (curRowBytes > maxRowBytes && prevCell != null) { 705 biggestRow = CellUtil.cloneRow(prevCell); 706 maxRowBytes = curRowBytes; 707 } 708 709 curRowBytes = 0; 710 curRowCols = 0; 711 } 712 713 public void finish(boolean printStatRanges) { 714 if (curRowCols > 0) { 715 collectRow(printStatRanges); 716 } 717 } 718 719 @Override 720 public String toString() { 721 if (prevCell == null) return "no data available for statistics"; 722 723 // Dump the metrics to the output stream 724 simpleReporter.report(); 725 726 return metricsOutput.toString() + "Key of biggest row: " + Bytes.toStringBinary(biggestRow); 727 } 728 } 729 730 /** 731 * Simple reporter which collects registered histograms for printing to an output stream in 732 * {@link #report()}. 733 */ 734 private static final class SimpleReporter { 735 /** 736 * Returns a new {@link Builder} for {@link SimpleReporter}. 737 * @return a {@link Builder} instance for a {@link SimpleReporter} 738 */ 739 public static Builder newBuilder() { 740 return new Builder(); 741 } 742 743 /** 744 * A builder for {@link SimpleReporter} instances. Defaults to using the default locale and time 745 * zone, writing to {@code System.out}. 746 */ 747 public static class Builder { 748 private final List<KeyValueStats> stats = new ArrayList<>(); 749 private PrintStream output; 750 private Locale locale; 751 private TimeZone timeZone; 752 753 private Builder() { 754 this.output = System.out; 755 this.locale = Locale.getDefault(); 756 this.timeZone = TimeZone.getDefault(); 757 } 758 759 /** 760 * Write to the given {@link PrintStream}. 761 * @param output a {@link PrintStream} instance. 762 * @return {@code this} 763 */ 764 public Builder outputTo(PrintStream output) { 765 this.output = output; 766 return this; 767 } 768 769 /** 770 * Add the given {@link KeyValueStats} to be reported 771 * @param stat the stat to be reported 772 * @return {@code this} 773 */ 774 public Builder addStats(KeyValueStats stat) { 775 this.stats.add(stat); 776 return this; 777 } 778 779 /** 780 * Builds a {@link ConsoleReporter} with the given properties. 781 * @return a {@link ConsoleReporter} 782 */ 783 public SimpleReporter build() { 784 return new SimpleReporter(output, stats, locale, timeZone); 785 } 786 } 787 788 private final PrintStream output; 789 private final List<KeyValueStats> stats; 790 private final Locale locale; 791 private final DateFormat dateFormat; 792 793 private SimpleReporter(PrintStream output, List<KeyValueStats> stats, Locale locale, 794 TimeZone timeZone) { 795 this.output = output; 796 this.stats = stats; 797 this.locale = locale; 798 this.dateFormat = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.MEDIUM, locale); 799 dateFormat.setTimeZone(timeZone); 800 } 801 802 public void report() { 803 // we know we only have histograms 804 if (!stats.isEmpty()) { 805 for (KeyValueStats stat : stats) { 806 output.print(" " + stat.getName()); 807 output.println(':'); 808 printHistogram(stat); 809 } 810 output.println(); 811 } 812 813 output.println(); 814 output.flush(); 815 } 816 817 private void printHistogram(KeyValueStats stats) { 818 Histogram histogram = stats.getHistogram(); 819 Snapshot snapshot = histogram.getSnapshot(); 820 821 output.printf(locale, " min = %d%n", stats.getMin()); 822 output.printf(locale, " max = %d%n", stats.getMax()); 823 output.printf(locale, " mean = %2.2f%n", snapshot.getMean()); 824 output.printf(locale, " stddev = %2.2f%n", snapshot.getStdDev()); 825 output.printf(locale, " median = %2.2f%n", snapshot.getMedian()); 826 output.printf(locale, " 75%% <= %2.2f%n", snapshot.get75thPercentile()); 827 output.printf(locale, " 95%% <= %2.2f%n", snapshot.get95thPercentile()); 828 output.printf(locale, " 98%% <= %2.2f%n", snapshot.get98thPercentile()); 829 output.printf(locale, " 99%% <= %2.2f%n", snapshot.get99thPercentile()); 830 output.printf(locale, " 99.9%% <= %2.2f%n", snapshot.get999thPercentile()); 831 output.printf(locale, " count = %d%n", histogram.getCount()); 832 833 // if printStatRanges was enabled with -d arg, below we'll create an approximate histogram 834 // of counts based on the configured ranges in RANGES. Each range of sizes (i.e. <= 50, <= 835 // 100, etc) will have a count printed if any values were seen in that range. If no values 836 // were seen for a range, that range will be excluded to keep the output small. 837 if (stats.hasRangeCounts()) { 838 output.printf(locale, " (range <= count):%n"); 839 long lastVal = 0; 840 long lastRange = 0; 841 for (long range : stats.getRanges()) { 842 long val = stats.getCountAtOrBelow(range); 843 if (val - lastVal > 0) { 844 // print the last zero value before this one, to give context 845 if (lastVal == 0 && lastRange != 0) { 846 printRangeCount(lastRange, lastVal); 847 } 848 printRangeCount(range, val - lastVal); 849 } 850 lastVal = val; 851 lastRange = range; 852 } 853 if (histogram.getCount() - lastVal > 0) { 854 // print any remaining that might have been outside our buckets 855 printRangeCount(Long.MAX_VALUE, histogram.getCount() - lastVal); 856 } 857 } 858 } 859 860 private void printRangeCount(long range, long countAtOrBelow) { 861 String rangeString = range == Long.MAX_VALUE ? "inf" : Long.toString(range); 862 output.printf(locale, "%17s <= %d%n", rangeString, countAtOrBelow); 863 } 864 } 865 866 public static void main(String[] args) throws Exception { 867 Configuration conf = HBaseConfiguration.create(); 868 // no need for a block cache 869 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0); 870 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args); 871 System.exit(ret); 872 } 873}