001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.coprocessor.example.row.stats; 019 020import java.util.Map; 021import org.apache.commons.lang3.builder.ToStringBuilder; 022import org.apache.commons.lang3.builder.ToStringStyle; 023import org.apache.hadoop.hbase.Cell; 024import org.apache.hadoop.hbase.CellUtil; 025import org.apache.hadoop.hbase.RawCellBuilder; 026import org.apache.hadoop.hbase.coprocessor.example.row.stats.utils.RowStatisticsUtil; 027import org.apache.hadoop.hbase.regionserver.Shipper; 028import org.apache.hadoop.hbase.util.Bytes; 029import org.apache.hadoop.hbase.util.GsonUtil; 030import org.apache.yetus.audience.InterfaceAudience; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034import org.apache.hbase.thirdparty.com.google.gson.Gson; 035import org.apache.hbase.thirdparty.com.google.gson.JsonObject; 036 037/** 038 * Holder for accumulating row statistics in {@link RowStatisticsCompactionObserver} Creates various 039 * cell, row, and total stats. 040 */ 041@InterfaceAudience.Private 042public class RowStatisticsImpl implements RowStatistics { 043 044 private static final Logger LOG = LoggerFactory.getLogger(RowStatisticsImpl.class); 045 private static final Gson GSON = GsonUtil.createGson().create(); 046 047 // 048 // Transient fields which are not included in gson serialization 049 // 050 private final transient long blockSize; 051 private final transient long maxCacheSize; 052 private transient int rowCells; 053 private transient long rowBytes; 054 private transient byte[] largestRow; 055 private transient Cell largestCell; 056 private final transient boolean isMajor; 057 private final transient SizeBucketTracker rowSizeBuckets; 058 private final transient SizeBucketTracker valueSizeBuckets; 059 060 // We don't need to clone anything until shipped() is called on scanner. 061 // To avoid allocations, we keep a reference until that point 062 private transient Cell largestRowRef; 063 private transient Cell largestCellRef; 064 // 065 // Non-transient fields which are included in gson 066 // 067 private final String table; 068 private final String region; 069 private final String columnFamily; 070 private long largestRowNumBytes; 071 private int largestRowCellsCount; 072 private long largestCellNumBytes; 073 private int cellsLargerThanOneBlockCount; 074 private int rowsLargerThanOneBlockCount; 075 private int cellsLargerThanMaxCacheSizeCount; 076 private int totalDeletesCount; 077 private int totalCellsCount; 078 private int totalRowsCount; 079 private long totalBytesCount; 080 081 RowStatisticsImpl(String table, String encodedRegion, String columnFamily, long blockSize, 082 long maxCacheSize, boolean isMajor) { 083 this.table = table; 084 this.region = encodedRegion; 085 this.columnFamily = columnFamily; 086 this.blockSize = blockSize; 087 this.maxCacheSize = maxCacheSize; 088 this.isMajor = isMajor; 089 this.rowSizeBuckets = new SizeBucketTracker(); 090 this.valueSizeBuckets = new SizeBucketTracker(); 091 } 092 093 public void handleRowChanged(Cell lastCell) { 094 if (rowBytes > largestRowNumBytes) { 095 largestRowRef = lastCell; 096 largestRowNumBytes = rowBytes; 097 largestRowCellsCount = rowCells; 098 } 099 if (rowBytes > blockSize) { 100 if (LOG.isDebugEnabled()) { 101 LOG.debug("RowTooLarge: rowBytes={}, blockSize={}, table={}, rowKey={}", rowBytes, 102 blockSize, table, Bytes.toStringBinary(lastCell.getRowArray(), lastCell.getRowOffset(), 103 lastCell.getRowLength())); 104 } 105 rowsLargerThanOneBlockCount++; 106 } 107 rowSizeBuckets.add(rowBytes); 108 rowBytes = 0; 109 rowCells = 0; 110 totalRowsCount++; 111 } 112 113 public void consumeCell(Cell cell) { 114 int cellSize = cell.getSerializedSize(); 115 116 rowBytes += cellSize; 117 rowCells++; 118 119 boolean tooLarge = false; 120 if (cellSize > maxCacheSize) { 121 cellsLargerThanMaxCacheSizeCount++; 122 tooLarge = true; 123 } 124 if (cellSize > blockSize) { 125 cellsLargerThanOneBlockCount++; 126 tooLarge = true; 127 } 128 129 if (tooLarge && LOG.isDebugEnabled()) { 130 LOG.debug("CellTooLarge: size={}, blockSize={}, maxCacheSize={}, table={}, cell={}", cellSize, 131 blockSize, maxCacheSize, table, CellUtil.toString(cell, false)); 132 } 133 134 if (cellSize > largestCellNumBytes) { 135 largestCellRef = cell; 136 largestCellNumBytes = cellSize; 137 } 138 valueSizeBuckets.add(cell.getValueLength()); 139 140 totalCellsCount++; 141 if (CellUtil.isDelete(cell)) { 142 totalDeletesCount++; 143 } 144 totalBytesCount += cellSize; 145 } 146 147 /** 148 * Clone the cell refs so they can be cleaned up by {@link Shipper#shipped()}. Doing this lazily 149 * here, rather than eagerly in the above two methods can save us on some allocations. We might 150 * change the largestCell/largestRow multiple times between shipped() calls. 151 */ 152 public void shipped(RawCellBuilder cellBuilder) { 153 if (largestRowRef != null) { 154 largestRow = CellUtil.cloneRow(largestRowRef); 155 largestRowRef = null; 156 } 157 if (largestCellRef != null) { 158 largestCell = RowStatisticsUtil.cloneWithoutValue(cellBuilder, largestCellRef); 159 largestCellRef = null; 160 } 161 } 162 163 @Override 164 public String getTable() { 165 return table; 166 } 167 168 @Override 169 public String getRegion() { 170 return region; 171 } 172 173 @Override 174 public String getColumnFamily() { 175 return columnFamily; 176 } 177 178 @Override 179 public boolean isMajor() { 180 return isMajor; 181 } 182 183 public byte[] getLargestRow() { 184 return largestRow; 185 } 186 187 @Override 188 public String getLargestRowAsString() { 189 return Bytes.toStringBinary(getLargestRow()); 190 } 191 192 @Override 193 public long getLargestRowNumBytes() { 194 return largestRowNumBytes; 195 } 196 197 @Override 198 public int getLargestRowCellsCount() { 199 return largestRowCellsCount; 200 } 201 202 public Cell getLargestCell() { 203 return largestCell; 204 } 205 206 @Override 207 public String getLargestCellAsString() { 208 return CellUtil.toString(getLargestCell(), false); 209 } 210 211 @Override 212 public long getLargestCellNumBytes() { 213 return largestCellNumBytes; 214 } 215 216 @Override 217 public int getCellsLargerThanOneBlockCount() { 218 return cellsLargerThanOneBlockCount; 219 } 220 221 @Override 222 public int getRowsLargerThanOneBlockCount() { 223 return rowsLargerThanOneBlockCount; 224 } 225 226 @Override 227 public int getCellsLargerThanMaxCacheSizeCount() { 228 return cellsLargerThanMaxCacheSizeCount; 229 } 230 231 @Override 232 public int getTotalDeletesCount() { 233 return totalDeletesCount; 234 } 235 236 @Override 237 public int getTotalCellsCount() { 238 return totalCellsCount; 239 } 240 241 @Override 242 public int getTotalRowsCount() { 243 return totalRowsCount; 244 } 245 246 @Override 247 public long getTotalBytes() { 248 return totalBytesCount; 249 } 250 251 @Override 252 public Map<String, Long> getRowSizeBuckets() { 253 return rowSizeBuckets.toMap(); 254 } 255 256 @Override 257 public Map<String, Long> getValueSizeBuckets() { 258 return valueSizeBuckets.toMap(); 259 } 260 261 @Override 262 public String toString() { 263 return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) 264 .append("largestRowAsString", Bytes.toStringBinary(largestRow)) 265 .append("largestCellAsString", largestCell).append("largestRowNumBytes", largestRowNumBytes) 266 .append("largestRowCellsCount", largestRowCellsCount) 267 .append("largestCellNumBytes", largestCellNumBytes) 268 .append("cellsLargerThanOneBlockCount", cellsLargerThanOneBlockCount) 269 .append("rowsLargerThanOneBlockCount", rowsLargerThanOneBlockCount) 270 .append("cellsLargerThanMaxCacheSizeCount", cellsLargerThanMaxCacheSizeCount) 271 .append("totalDeletesCount", totalDeletesCount).append("totalCellsCount", totalCellsCount) 272 .append("totalRowsCount", totalRowsCount).append("totalBytesCount", totalBytesCount) 273 .append("rowSizeBuckets", getRowSizeBuckets()) 274 .append("valueSizeBuckets", getValueSizeBuckets()).append("isMajor", isMajor).toString(); 275 } 276 277 @Override 278 public String getJsonString() { 279 JsonObject json = (JsonObject) GSON.toJsonTree(this); 280 json.add("largestCellParts", buildLargestCellPartsJson()); 281 json.addProperty("largestRowAsString", getLargestRowAsString()); 282 json.add("rowSizeBuckets", rowSizeBuckets.toJsonObject()); 283 json.add("valueSizeBuckets", valueSizeBuckets.toJsonObject()); 284 return json.toString(); 285 } 286 287 private JsonObject buildLargestCellPartsJson() { 288 JsonObject cellJson = new JsonObject(); 289 Cell cell = getLargestCell(); 290 cellJson.addProperty("rowKey", 291 Bytes.toStringBinary(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())); 292 cellJson.addProperty("family", 293 Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength())); 294 cellJson.addProperty("qualifier", Bytes.toStringBinary(cell.getQualifierArray(), 295 cell.getQualifierOffset(), cell.getQualifierLength())); 296 cellJson.addProperty("timestamp", cell.getTimestamp()); 297 cellJson.addProperty("type", cell.getType().toString()); 298 return cellJson; 299 } 300}