001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.coprocessor.example.row.stats;
019
020import java.util.Map;
021import org.apache.commons.lang3.builder.ToStringBuilder;
022import org.apache.commons.lang3.builder.ToStringStyle;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellUtil;
025import org.apache.hadoop.hbase.RawCellBuilder;
026import org.apache.hadoop.hbase.coprocessor.example.row.stats.utils.RowStatisticsUtil;
027import org.apache.hadoop.hbase.regionserver.Shipper;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.hadoop.hbase.util.GsonUtil;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import org.apache.hbase.thirdparty.com.google.gson.Gson;
035import org.apache.hbase.thirdparty.com.google.gson.JsonObject;
036
037/**
038 * Holder for accumulating row statistics in {@link RowStatisticsCompactionObserver} Creates various
039 * cell, row, and total stats.
040 */
041@InterfaceAudience.Private
042public class RowStatisticsImpl implements RowStatistics {
043
044  private static final Logger LOG = LoggerFactory.getLogger(RowStatisticsImpl.class);
045  private static final Gson GSON = GsonUtil.createGson().create();
046
047  //
048  // Transient fields which are not included in gson serialization
049  //
050  private final transient long blockSize;
051  private final transient long maxCacheSize;
052  private transient int rowCells;
053  private transient long rowBytes;
054  private transient byte[] largestRow;
055  private transient Cell largestCell;
056  private final transient boolean isMajor;
057  private final transient SizeBucketTracker rowSizeBuckets;
058  private final transient SizeBucketTracker valueSizeBuckets;
059
060  // We don't need to clone anything until shipped() is called on scanner.
061  // To avoid allocations, we keep a reference until that point
062  private transient Cell largestRowRef;
063  private transient Cell largestCellRef;
064  //
065  // Non-transient fields which are included in gson
066  //
067  private final String table;
068  private final String region;
069  private final String columnFamily;
070  private long largestRowNumBytes;
071  private int largestRowCellsCount;
072  private long largestCellNumBytes;
073  private int cellsLargerThanOneBlockCount;
074  private int rowsLargerThanOneBlockCount;
075  private int cellsLargerThanMaxCacheSizeCount;
076  private int totalDeletesCount;
077  private int totalCellsCount;
078  private int totalRowsCount;
079  private long totalBytesCount;
080
081  RowStatisticsImpl(String table, String encodedRegion, String columnFamily, long blockSize,
082    long maxCacheSize, boolean isMajor) {
083    this.table = table;
084    this.region = encodedRegion;
085    this.columnFamily = columnFamily;
086    this.blockSize = blockSize;
087    this.maxCacheSize = maxCacheSize;
088    this.isMajor = isMajor;
089    this.rowSizeBuckets = new SizeBucketTracker();
090    this.valueSizeBuckets = new SizeBucketTracker();
091  }
092
093  public void handleRowChanged(Cell lastCell) {
094    if (rowBytes > largestRowNumBytes) {
095      largestRowRef = lastCell;
096      largestRowNumBytes = rowBytes;
097      largestRowCellsCount = rowCells;
098    }
099    if (rowBytes > blockSize) {
100      if (LOG.isDebugEnabled()) {
101        LOG.debug("RowTooLarge: rowBytes={}, blockSize={}, table={}, rowKey={}", rowBytes,
102          blockSize, table, Bytes.toStringBinary(lastCell.getRowArray(), lastCell.getRowOffset(),
103            lastCell.getRowLength()));
104      }
105      rowsLargerThanOneBlockCount++;
106    }
107    rowSizeBuckets.add(rowBytes);
108    rowBytes = 0;
109    rowCells = 0;
110    totalRowsCount++;
111  }
112
113  public void consumeCell(Cell cell) {
114    int cellSize = cell.getSerializedSize();
115
116    rowBytes += cellSize;
117    rowCells++;
118
119    boolean tooLarge = false;
120    if (cellSize > maxCacheSize) {
121      cellsLargerThanMaxCacheSizeCount++;
122      tooLarge = true;
123    }
124    if (cellSize > blockSize) {
125      cellsLargerThanOneBlockCount++;
126      tooLarge = true;
127    }
128
129    if (tooLarge && LOG.isDebugEnabled()) {
130      LOG.debug("CellTooLarge: size={}, blockSize={}, maxCacheSize={}, table={}, cell={}", cellSize,
131        blockSize, maxCacheSize, table, CellUtil.toString(cell, false));
132    }
133
134    if (cellSize > largestCellNumBytes) {
135      largestCellRef = cell;
136      largestCellNumBytes = cellSize;
137    }
138    valueSizeBuckets.add(cell.getValueLength());
139
140    totalCellsCount++;
141    if (CellUtil.isDelete(cell)) {
142      totalDeletesCount++;
143    }
144    totalBytesCount += cellSize;
145  }
146
147  /**
148   * Clone the cell refs so they can be cleaned up by {@link Shipper#shipped()}. Doing this lazily
149   * here, rather than eagerly in the above two methods can save us on some allocations. We might
150   * change the largestCell/largestRow multiple times between shipped() calls.
151   */
152  public void shipped(RawCellBuilder cellBuilder) {
153    if (largestRowRef != null) {
154      largestRow = CellUtil.cloneRow(largestRowRef);
155      largestRowRef = null;
156    }
157    if (largestCellRef != null) {
158      largestCell = RowStatisticsUtil.cloneWithoutValue(cellBuilder, largestCellRef);
159      largestCellRef = null;
160    }
161  }
162
163  @Override
164  public String getTable() {
165    return table;
166  }
167
168  @Override
169  public String getRegion() {
170    return region;
171  }
172
173  @Override
174  public String getColumnFamily() {
175    return columnFamily;
176  }
177
178  @Override
179  public boolean isMajor() {
180    return isMajor;
181  }
182
183  public byte[] getLargestRow() {
184    return largestRow;
185  }
186
187  @Override
188  public String getLargestRowAsString() {
189    return Bytes.toStringBinary(getLargestRow());
190  }
191
192  @Override
193  public long getLargestRowNumBytes() {
194    return largestRowNumBytes;
195  }
196
197  @Override
198  public int getLargestRowCellsCount() {
199    return largestRowCellsCount;
200  }
201
202  public Cell getLargestCell() {
203    return largestCell;
204  }
205
206  @Override
207  public String getLargestCellAsString() {
208    return CellUtil.toString(getLargestCell(), false);
209  }
210
211  @Override
212  public long getLargestCellNumBytes() {
213    return largestCellNumBytes;
214  }
215
216  @Override
217  public int getCellsLargerThanOneBlockCount() {
218    return cellsLargerThanOneBlockCount;
219  }
220
221  @Override
222  public int getRowsLargerThanOneBlockCount() {
223    return rowsLargerThanOneBlockCount;
224  }
225
226  @Override
227  public int getCellsLargerThanMaxCacheSizeCount() {
228    return cellsLargerThanMaxCacheSizeCount;
229  }
230
231  @Override
232  public int getTotalDeletesCount() {
233    return totalDeletesCount;
234  }
235
236  @Override
237  public int getTotalCellsCount() {
238    return totalCellsCount;
239  }
240
241  @Override
242  public int getTotalRowsCount() {
243    return totalRowsCount;
244  }
245
246  @Override
247  public long getTotalBytes() {
248    return totalBytesCount;
249  }
250
251  @Override
252  public Map<String, Long> getRowSizeBuckets() {
253    return rowSizeBuckets.toMap();
254  }
255
256  @Override
257  public Map<String, Long> getValueSizeBuckets() {
258    return valueSizeBuckets.toMap();
259  }
260
261  @Override
262  public String toString() {
263    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
264      .append("largestRowAsString", Bytes.toStringBinary(largestRow))
265      .append("largestCellAsString", largestCell).append("largestRowNumBytes", largestRowNumBytes)
266      .append("largestRowCellsCount", largestRowCellsCount)
267      .append("largestCellNumBytes", largestCellNumBytes)
268      .append("cellsLargerThanOneBlockCount", cellsLargerThanOneBlockCount)
269      .append("rowsLargerThanOneBlockCount", rowsLargerThanOneBlockCount)
270      .append("cellsLargerThanMaxCacheSizeCount", cellsLargerThanMaxCacheSizeCount)
271      .append("totalDeletesCount", totalDeletesCount).append("totalCellsCount", totalCellsCount)
272      .append("totalRowsCount", totalRowsCount).append("totalBytesCount", totalBytesCount)
273      .append("rowSizeBuckets", getRowSizeBuckets())
274      .append("valueSizeBuckets", getValueSizeBuckets()).append("isMajor", isMajor).toString();
275  }
276
277  @Override
278  public String getJsonString() {
279    JsonObject json = (JsonObject) GSON.toJsonTree(this);
280    json.add("largestCellParts", buildLargestCellPartsJson());
281    json.addProperty("largestRowAsString", getLargestRowAsString());
282    json.add("rowSizeBuckets", rowSizeBuckets.toJsonObject());
283    json.add("valueSizeBuckets", valueSizeBuckets.toJsonObject());
284    return json.toString();
285  }
286
287  private JsonObject buildLargestCellPartsJson() {
288    JsonObject cellJson = new JsonObject();
289    Cell cell = getLargestCell();
290    cellJson.addProperty("rowKey",
291      Bytes.toStringBinary(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()));
292    cellJson.addProperty("family",
293      Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()));
294    cellJson.addProperty("qualifier", Bytes.toStringBinary(cell.getQualifierArray(),
295      cell.getQualifierOffset(), cell.getQualifierLength()));
296    cellJson.addProperty("timestamp", cell.getTimestamp());
297    cellJson.addProperty("type", cell.getType().toString());
298    return cellJson;
299  }
300}