001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.DataInput;
021import java.io.DataOutput;
022import java.io.EOFException;
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.OutputStream;
026import java.nio.ByteBuffer;
027import java.util.ArrayList;
028import java.util.List;
029import org.apache.hadoop.hbase.io.util.StreamUtils;
030import org.apache.hadoop.hbase.util.ByteBufferUtils;
031import org.apache.hadoop.hbase.util.Bytes;
032import org.apache.hadoop.io.IOUtils;
033import org.apache.hadoop.io.WritableUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hbase.thirdparty.com.google.common.base.Function;
039import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
040import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils;
041
042/**
043 * static convenience methods for dealing with KeyValues and collections of KeyValues
044 */
045@InterfaceAudience.Private
046public class KeyValueUtil {
047
048  private static final Logger LOG = LoggerFactory.getLogger(KeyValueUtil.class);
049
050  /**************** length *********************/
051
052  public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) {
053    if (withTags) {
054      return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen);
055    }
056    return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen);
057  }
058
059  /**
060   * Returns number of bytes this cell's key part would have been used if serialized as in
061   * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type.
062   * @return the key length
063   */
064  public static int keyLength(final Cell cell) {
065    return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength());
066  }
067
068  private static int keyLength(short rlen, byte flen, int qlen) {
069    return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen);
070  }
071
072  public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
073    int length = kv.getLength();
074    if (includeMvccVersion) {
075      length += WritableUtils.getVIntSize(kv.getSequenceId());
076    }
077    return length;
078  }
079
080  public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
081    final boolean includeMvccVersion) {
082    int length = 0;
083    for (KeyValue kv : IterableUtils.emptyIfNull(kvs)) {
084      length += lengthWithMvccVersion(kv, includeMvccVersion);
085    }
086    return length;
087  }
088
089  /**************** copy the cell to create a new keyvalue *********************/
090
091  public static KeyValue copyToNewKeyValue(final Cell cell) {
092    byte[] bytes = copyToNewByteArray(cell);
093    KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
094    kvCell.setSequenceId(cell.getSequenceId());
095    return kvCell;
096  }
097
098  /**
099   * The position will be set to the beginning of the new ByteBuffer
100   * @return the Bytebuffer containing the key part of the cell
101   */
102  public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
103    byte[] bytes = new byte[keyLength(cell)];
104    appendKeyTo(cell, bytes, 0);
105    ByteBuffer buffer = ByteBuffer.wrap(bytes);
106    return buffer;
107  }
108
109  /**
110   * Copies the key to a new KeyValue
111   * @return the KeyValue that consists only the key part of the incoming cell
112   */
113  public static KeyValue toNewKeyCell(final Cell cell) {
114    byte[] bytes = new byte[keyLength(cell)];
115    appendKeyTo(cell, bytes, 0);
116    KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length);
117    // Set the seq id. The new key cell could be used in comparisons so it
118    // is important that it uses the seqid also. If not the comparsion would fail
119    kv.setSequenceId(cell.getSequenceId());
120    return kv;
121  }
122
123  public static byte[] copyToNewByteArray(final Cell cell) {
124    // Cell#getSerializedSize returns the serialized size of the Source cell, which may
125    // not serialize all fields. We are constructing a KeyValue backing array here,
126    // which does include all fields, and must allocate accordingly.
127    // TODO we could probably use Cell#getSerializedSize safely, the errors were
128    // caused by cells corrupted by use-after-free bugs
129    int v1Length = length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
130      cell.getValueLength(), cell.getTagsLength(), true);
131    byte[] backingBytes = new byte[v1Length];
132    appendToByteArray(cell, backingBytes, 0, true);
133    return backingBytes;
134  }
135
136  public static int appendKeyTo(final Cell cell, final byte[] output, final int offset) {
137    int nextOffset = offset;
138    nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
139    nextOffset = CellUtil.copyRowTo(cell, output, nextOffset);
140    nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
141    nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset);
142    nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset);
143    nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
144    nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
145    return nextOffset;
146  }
147
148  /**************** copy key and value *********************/
149
150  public static int appendToByteArray(Cell cell, byte[] output, int offset, boolean withTags) {
151    int pos = offset;
152    pos = Bytes.putInt(output, pos, keyLength(cell));
153    pos = Bytes.putInt(output, pos, cell.getValueLength());
154    pos = appendKeyTo(cell, output, pos);
155    pos = CellUtil.copyValueTo(cell, output, pos);
156    if (withTags && (cell.getTagsLength() > 0)) {
157      pos = Bytes.putAsShort(output, pos, cell.getTagsLength());
158      pos = PrivateCellUtil.copyTagsTo(cell, output, pos);
159    }
160    return pos;
161  }
162
163  /**
164   * Copy the Cell content into the passed buf in KeyValue serialization format.
165   */
166  public static int appendTo(Cell cell, ByteBuffer buf, int offset, boolean withTags) {
167    offset = ByteBufferUtils.putInt(buf, offset, keyLength(cell));// Key length
168    offset = ByteBufferUtils.putInt(buf, offset, cell.getValueLength());// Value length
169    offset = appendKeyTo(cell, buf, offset);
170    offset = CellUtil.copyValueTo(cell, buf, offset);// Value bytes
171    int tagsLength = cell.getTagsLength();
172    if (withTags && (tagsLength > 0)) {
173      offset = ByteBufferUtils.putAsShort(buf, offset, tagsLength);// Tags length
174      offset = PrivateCellUtil.copyTagsTo(cell, buf, offset);// Tags bytes
175    }
176    return offset;
177  }
178
179  public static int appendKeyTo(Cell cell, ByteBuffer buf, int offset) {
180    offset = ByteBufferUtils.putShort(buf, offset, cell.getRowLength());// RK length
181    offset = CellUtil.copyRowTo(cell, buf, offset);// Row bytes
182    offset = ByteBufferUtils.putByte(buf, offset, cell.getFamilyLength());// CF length
183    offset = CellUtil.copyFamilyTo(cell, buf, offset);// CF bytes
184    offset = CellUtil.copyQualifierTo(cell, buf, offset);// Qualifier bytes
185    offset = ByteBufferUtils.putLong(buf, offset, cell.getTimestamp());// TS
186    offset = ByteBufferUtils.putByte(buf, offset, cell.getTypeByte());// Type
187    return offset;
188  }
189
190  public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
191    final boolean includeMvccVersion) {
192    // keep pushing the limit out. assume enough capacity
193    bb.limit(bb.position() + kv.getLength());
194    bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
195    if (includeMvccVersion) {
196      int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId());
197      ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
198      ByteBufferUtils.writeVLong(bb, kv.getSequenceId());
199    }
200  }
201
202  /**************** iterating *******************************/
203
204  /**
205   * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
206   * position to the start of the next KeyValue. Does not allocate a new array or copy data.
207   */
208  public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
209    boolean includesTags) {
210    if (bb.isDirect()) {
211      throw new IllegalArgumentException("only supports heap buffers");
212    }
213    if (bb.remaining() < 1) {
214      return null;
215    }
216    int underlyingArrayOffset = bb.arrayOffset() + bb.position();
217    int keyLength = bb.getInt();
218    int valueLength = bb.getInt();
219    ByteBufferUtils.skip(bb, keyLength + valueLength);
220    int tagsLength = 0;
221    if (includesTags) {
222      // Read short as unsigned, high byte first
223      tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff);
224      ByteBufferUtils.skip(bb, tagsLength);
225    }
226    int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
227    KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
228    if (includesMvccVersion) {
229      long mvccVersion = ByteBufferUtils.readVLong(bb);
230      keyValue.setSequenceId(mvccVersion);
231    }
232    return keyValue;
233  }
234
235  /*************** next/previous **********************************/
236
237  /**
238   * Decrement the timestamp. For tests (currently wasteful) Remember timestamps are sorted reverse
239   * chronologically.
240   * @return previous key
241   */
242  public static KeyValue previousKey(final KeyValue in) {
243    return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in),
244      CellUtil.cloneQualifier(in), in.getTimestamp() - 1);
245  }
246
247  /**
248   * Create a KeyValue for the specified row, family and qualifier that would be larger than or
249   * equal to all other possible KeyValues that have the same row, family, qualifier. Used for
250   * reseeking. Should NEVER be returned to a client. row key row offset row length family name
251   * family offset family length column qualifier qualifier offset qualifier length
252   * @return Last possible key on passed row, family, qualifier.
253   */
254  public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength,
255    final byte[] family, final int foffset, final int flength, final byte[] qualifier,
256    final int qoffset, final int qlength) {
257    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
258      qlength, HConstants.OLDEST_TIMESTAMP, KeyValue.Type.Minimum, null, 0, 0);
259  }
260
261  /**
262   * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is
263   * any (valid) KeyValue on 'row' would sort _after_ the result.
264   * @param row - row key (arbitrary byte array)
265   * @return First possible KeyValue on passed <code>row</code>
266   */
267  public static KeyValue createFirstOnRow(final byte[] row, int roffset, short rlength) {
268    return new KeyValue(row, roffset, rlength, null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP,
269      KeyValue.Type.Maximum, null, 0, 0);
270  }
271
272  /**
273   * Creates a KeyValue that is last on the specified row id. That is, every other possible KeyValue
274   * for the given row would compareTo() less than the result of this call.
275   * @param row row key
276   * @return Last possible KeyValue on passed <code>row</code>
277   */
278  public static KeyValue createLastOnRow(final byte[] row) {
279    return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Minimum);
280  }
281
282  /**
283   * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is
284   * any (valid) KeyValue on 'row' would sort _after_ the result.
285   * @param row - row key (arbitrary byte array)
286   * @return First possible KeyValue on passed <code>row</code>
287   */
288  public static KeyValue createFirstOnRow(final byte[] row) {
289    return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP);
290  }
291
292  /**
293   * Creates a KeyValue that is smaller than all other KeyValues that are older than the passed
294   * timestamp.
295   * @param row - row key (arbitrary byte array)
296   * @param ts  - timestamp
297   * @return First possible key on passed <code>row</code> and timestamp.
298   */
299  public static KeyValue createFirstOnRow(final byte[] row, final long ts) {
300    return new KeyValue(row, null, null, ts, KeyValue.Type.Maximum);
301  }
302
303  /**
304   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
305   * other possible KeyValues that have the same row,family,qualifier. Used for seeking.
306   * @param row       - row key (arbitrary byte array)
307   * @param family    - family name
308   * @param qualifier - column qualifier
309   * @return First possible key on passed <code>row</code>, and column.
310   */
311  public static KeyValue createFirstOnRow(final byte[] row, final byte[] family,
312    final byte[] qualifier) {
313    return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum);
314  }
315
316  /**
317   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
318   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
319   * @param row - row key (arbitrary byte array)
320   * @param f   - family name
321   * @param q   - column qualifier
322   * @param ts  - timestamp
323   * @return First possible key on passed <code>row</code>, column and timestamp
324   */
325  public static KeyValue createFirstOnRow(final byte[] row, final byte[] f, final byte[] q,
326    final long ts) {
327    return new KeyValue(row, f, q, ts, KeyValue.Type.Maximum);
328  }
329
330  /**
331   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
332   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
333   * @param row       row key
334   * @param roffset   row offset
335   * @param rlength   row length
336   * @param family    family name
337   * @param foffset   family offset
338   * @param flength   family length
339   * @param qualifier column qualifier
340   * @param qoffset   qualifier offset
341   * @param qlength   qualifier length
342   * @return First possible key on passed Row, Family, Qualifier.
343   */
344  public static KeyValue createFirstOnRow(final byte[] row, final int roffset, final int rlength,
345    final byte[] family, final int foffset, final int flength, final byte[] qualifier,
346    final int qoffset, final int qlength) {
347    return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
348      qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, null, 0, 0);
349  }
350
351  /**
352   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
353   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
354   * @param buffer    the buffer to use for the new <code>KeyValue</code> object
355   * @param row       the value key
356   * @param family    family name
357   * @param qualifier column qualifier
358   * @return First possible key on passed Row, Family, Qualifier.
359   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
360   *                                  than the provided buffer or than
361   *                                  <code>Integer.MAX_VALUE</code>
362   */
363  public static KeyValue createFirstOnRow(byte[] buffer, final byte[] row, final byte[] family,
364    final byte[] qualifier) throws IllegalArgumentException {
365    return createFirstOnRow(buffer, 0, row, 0, row.length, family, 0, family.length, qualifier, 0,
366      qualifier.length);
367  }
368
369  /**
370   * Create a KeyValue for the specified row, family and qualifier that would be smaller than all
371   * other possible KeyValues that have the same row, family, qualifier. Used for seeking.
372   * @param buffer    the buffer to use for the new <code>KeyValue</code> object
373   * @param boffset   buffer offset
374   * @param row       the value key
375   * @param roffset   row offset
376   * @param rlength   row length
377   * @param family    family name
378   * @param foffset   family offset
379   * @param flength   family length
380   * @param qualifier column qualifier
381   * @param qoffset   qualifier offset
382   * @param qlength   qualifier length
383   * @return First possible key on passed Row, Family, Qualifier.
384   * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger
385   *                                  than the provided buffer or than
386   *                                  <code>Integer.MAX_VALUE</code>
387   */
388  public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row,
389    final int roffset, final int rlength, final byte[] family, final int foffset, final int flength,
390    final byte[] qualifier, final int qoffset, final int qlength) throws IllegalArgumentException {
391
392    long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0);
393
394    if (lLength > Integer.MAX_VALUE) {
395      throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE);
396    }
397    int iLength = (int) lLength;
398    if (buffer.length - boffset < iLength) {
399      throw new IllegalArgumentException(
400        "Buffer size " + (buffer.length - boffset) + " < " + iLength);
401    }
402
403    int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset,
404      flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
405      null, 0, 0, null);
406    return new KeyValue(buffer, boffset, len);
407  }
408
409  /*************** misc **********************************/
410  /**
411   * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return a
412   *         new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an
413   *         object of any of the subclass of {@link KeyValue}, we will create a new
414   *         {@link KeyValue} object wrapping same buffer. This API is used only with MR based tools
415   *         which expect the type to be exactly KeyValue. That is the reason for doing this way.
416   * @deprecated without any replacement.
417   */
418  @Deprecated
419  public static KeyValue ensureKeyValue(final Cell cell) {
420    if (cell == null) return null;
421    if (cell instanceof KeyValue) {
422      if (cell.getClass().getName().equals(KeyValue.class.getName())) {
423        return (KeyValue) cell;
424      }
425      // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the
426      // same byte[]
427      KeyValue kv = (KeyValue) cell;
428      KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length);
429      newKv.setSequenceId(kv.getSequenceId());
430      return newKv;
431    }
432    return copyToNewKeyValue(cell);
433  }
434
435  @Deprecated
436  public static List<KeyValue> ensureKeyValues(List<Cell> cells) {
437    List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() {
438      @Override
439      public KeyValue apply(Cell arg0) {
440        return KeyValueUtil.ensureKeyValue(arg0);
441      }
442    });
443    return new ArrayList<>(lazyList);
444  }
445
446  /**
447   * Write out a KeyValue in the manner in which we used to when KeyValue was a Writable.
448   * @return Length written on stream
449   * @see #create(DataInput) for the inverse function
450   */
451  public static long write(final KeyValue kv, final DataOutput out) throws IOException {
452    // This is how the old Writables write used to serialize KVs. Need to figure
453    // way to make it
454    // work for all implementations.
455    int length = kv.getLength();
456    out.writeInt(length);
457    out.write(kv.getBuffer(), kv.getOffset(), length);
458    return (long) length + Bytes.SIZEOF_INT;
459  }
460
461  static String bytesToHex(byte[] buf, int offset, int length) {
462    String bufferContents = buf != null ? Bytes.toStringBinary(buf, offset, length) : "<null>";
463    return ", KeyValueBytesHex=" + bufferContents + ", offset=" + offset + ", length=" + length;
464  }
465
466  static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) {
467    if (buf == null) {
468      String msg = "Invalid to have null byte array in KeyValue.";
469      LOG.warn(msg);
470      throw new IllegalArgumentException(msg);
471    }
472
473    int pos = offset, endOffset = offset + length;
474    // check the key
475    if (pos + Bytes.SIZEOF_INT > endOffset) {
476      String msg =
477        "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length);
478      LOG.warn(msg);
479      throw new IllegalArgumentException(msg);
480    }
481    int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
482    pos += Bytes.SIZEOF_INT;
483    if (keyLen <= 0 || pos + keyLen > endOffset) {
484      String msg =
485        "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length);
486      LOG.warn(msg);
487      throw new IllegalArgumentException(msg);
488    }
489    // check the value
490    if (pos + Bytes.SIZEOF_INT > endOffset) {
491      String msg =
492        "Overflow when reading value length at position=" + pos + bytesToHex(buf, offset, length);
493      LOG.warn(msg);
494      throw new IllegalArgumentException(msg);
495    }
496    int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT);
497    pos += Bytes.SIZEOF_INT;
498    if (valLen < 0 || pos + valLen > endOffset) {
499      String msg =
500        "Invalid value length in KeyValue, valueLength=" + valLen + bytesToHex(buf, offset, length);
501      LOG.warn(msg);
502      throw new IllegalArgumentException(msg);
503    }
504    // check the row
505    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
506      String msg =
507        "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length);
508      LOG.warn(msg);
509      throw new IllegalArgumentException(msg);
510    }
511    short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT);
512    pos += Bytes.SIZEOF_SHORT;
513    if (rowLen < 0 || pos + rowLen > endOffset) {
514      String msg =
515        "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length);
516      LOG.warn(msg);
517      throw new IllegalArgumentException(msg);
518    }
519    pos += rowLen;
520    // check the family
521    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
522      String msg =
523        "Overflow when reading family length at position=" + pos + bytesToHex(buf, offset, length);
524      LOG.warn(msg);
525      throw new IllegalArgumentException(msg);
526    }
527    int familyLen = buf[pos];
528    pos += Bytes.SIZEOF_BYTE;
529    if (familyLen < 0 || pos + familyLen > endOffset) {
530      String msg = "Invalid family length in KeyValue, familyLength=" + familyLen
531        + bytesToHex(buf, offset, length);
532      LOG.warn(msg);
533      throw new IllegalArgumentException(msg);
534    }
535    pos += familyLen;
536    // check the qualifier
537    int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen
538      - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE;
539    if (qualifierLen < 0 || pos + qualifierLen > endOffset) {
540      String msg = "Invalid qualifier length in KeyValue, qualifierLen=" + qualifierLen
541        + bytesToHex(buf, offset, length);
542      LOG.warn(msg);
543      throw new IllegalArgumentException(msg);
544    }
545    pos += qualifierLen;
546    // check the timestamp
547    if (pos + Bytes.SIZEOF_LONG > endOffset) {
548      String msg =
549        "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length);
550      LOG.warn(msg);
551      throw new IllegalArgumentException(msg);
552    }
553    long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG);
554    if (timestamp < 0) {
555      String msg =
556        "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length);
557      LOG.warn(msg);
558      throw new IllegalArgumentException(msg);
559    }
560    pos += Bytes.SIZEOF_LONG;
561    // check the type
562    if (pos + Bytes.SIZEOF_BYTE > endOffset) {
563      String msg =
564        "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length);
565      LOG.warn(msg);
566      throw new IllegalArgumentException(msg);
567    }
568    byte type = buf[pos];
569    if (!KeyValue.Type.isValidType(type)) {
570      String msg = "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length);
571      LOG.warn(msg);
572      throw new IllegalArgumentException(msg);
573    }
574    pos += Bytes.SIZEOF_BYTE;
575    // check the value
576    if (pos + valLen > endOffset) {
577      String msg =
578        "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length);
579      LOG.warn(msg);
580      throw new IllegalArgumentException(msg);
581    }
582    pos += valLen;
583    // check the tags
584    if (withTags) {
585      if (pos == endOffset) {
586        // withTags is true but no tag in the cell.
587        return;
588      }
589      pos = checkKeyValueTagBytes(buf, offset, length, pos, endOffset);
590    }
591    if (pos != endOffset) {
592      String msg = "Some redundant bytes in KeyValue's buffer, startOffset=" + pos + ", endOffset="
593        + endOffset + bytesToHex(buf, offset, length);
594      LOG.warn(msg);
595      throw new IllegalArgumentException(msg);
596    }
597  }
598
599  private static int checkKeyValueTagBytes(byte[] buf, int offset, int length, int pos,
600    int endOffset) {
601    if (pos + Bytes.SIZEOF_SHORT > endOffset) {
602      String msg =
603        "Overflow when reading tags length at position=" + pos + bytesToHex(buf, offset, length);
604      LOG.warn(msg);
605      throw new IllegalArgumentException(msg);
606    }
607    short tagsLen = Bytes.toShort(buf, pos);
608    pos += Bytes.SIZEOF_SHORT;
609    if (tagsLen < 0 || pos + tagsLen > endOffset) {
610      String msg = "Invalid tags length in KeyValue at position=" + (pos - Bytes.SIZEOF_SHORT)
611        + bytesToHex(buf, offset, length);
612      LOG.warn(msg);
613      throw new IllegalArgumentException(msg);
614    }
615    int tagsEndOffset = pos + tagsLen;
616    for (; pos < tagsEndOffset;) {
617      if (pos + Tag.TAG_LENGTH_SIZE > endOffset) {
618        String msg =
619          "Overflow when reading tag length at position=" + pos + bytesToHex(buf, offset, length);
620        LOG.warn(msg);
621        throw new IllegalArgumentException(msg);
622      }
623      short tagLen = Bytes.toShort(buf, pos);
624      pos += Tag.TAG_LENGTH_SIZE;
625      // tagLen contains one byte tag type, so must be not less than 1.
626      if (tagLen < 1 || pos + tagLen > endOffset) {
627        String msg = "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE)
628          + ", tagLength=" + tagLen + bytesToHex(buf, offset, length);
629        LOG.warn(msg);
630        throw new IllegalArgumentException(msg);
631      }
632      pos += tagLen;
633    }
634    return pos;
635  }
636
637  /**
638   * Create a KeyValue reading from the raw InputStream. Named
639   * <code>createKeyValueFromInputStream</code> so doesn't clash with {@link #create(DataInput)}
640   * @param in       inputStream to read.
641   * @param withTags whether the keyvalue should include tags are not
642   * @return Created KeyValue OR if we find a length of zero, we will return null which can be
643   *         useful marking a stream as done.
644   */
645  public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags)
646    throws IOException {
647    byte[] intBytes = new byte[Bytes.SIZEOF_INT];
648    int bytesRead = 0;
649    while (bytesRead < intBytes.length) {
650      int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead);
651      if (n < 0) {
652        if (bytesRead == 0) {
653          throw new EOFException();
654        }
655        throw new IOException("Failed read of int, read " + bytesRead + " bytes");
656      }
657      bytesRead += n;
658    }
659    byte[] bytes = new byte[Bytes.toInt(intBytes)];
660    IOUtils.readFully(in, bytes, 0, bytes.length);
661    return withTags
662      ? new KeyValue(bytes, 0, bytes.length)
663      : new NoTagsKeyValue(bytes, 0, bytes.length);
664  }
665
666  /**
667   * Returns a KeyValue made of a byte array that holds the key-only part. Needed to convert hfile
668   * index members to KeyValues.
669   */
670  public static KeyValue createKeyValueFromKey(final byte[] b) {
671    return createKeyValueFromKey(b, 0, b.length);
672  }
673
674  /**
675   * Return a KeyValue made of a byte buffer that holds the key-only part. Needed to convert hfile
676   * index members to KeyValues.
677   */
678  public static KeyValue createKeyValueFromKey(final ByteBuffer bb) {
679    return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit());
680  }
681
682  /**
683   * Return a KeyValue made of a byte array that holds the key-only part. Needed to convert hfile
684   * index members to KeyValues.
685   */
686  public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) {
687    byte[] newb = new byte[l + KeyValue.ROW_OFFSET];
688    System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l);
689    Bytes.putInt(newb, 0, l);
690    Bytes.putInt(newb, Bytes.SIZEOF_INT, 0);
691    return new KeyValue(newb);
692  }
693
694  /**
695   * Where to read bytes from. Creates a byte array to hold the KeyValue backing bytes copied from
696   * the steam.
697   * @return KeyValue created by deserializing from <code>in</code> OR if we find a length of zero,
698   *         we will return null which can be useful marking a stream as done.
699   */
700  public static KeyValue create(final DataInput in) throws IOException {
701    return create(in.readInt(), in);
702  }
703
704  /**
705   * Create a KeyValue reading <code>length</code> from <code>in</code>
706   * @return Created KeyValue OR if we find a length of zero, we will return null which can be
707   *         useful marking a stream as done.
708   */
709  public static KeyValue create(int length, final DataInput in) throws IOException {
710
711    if (length <= 0) {
712      if (length == 0) return null;
713      throw new IOException("Failed read " + length + " bytes, stream corrupt?");
714    }
715
716    // This is how the old Writables.readFrom used to deserialize. Didn't even
717    // vint.
718    byte[] bytes = new byte[length];
719    in.readFully(bytes);
720    return new KeyValue(bytes, 0, length);
721  }
722
723  public static int getSerializedSize(Cell cell, boolean withTags) {
724    if (withTags) {
725      return cell.getSerializedSize();
726    }
727    if (cell instanceof ExtendedCell) {
728      return ((ExtendedCell) cell).getSerializedSize(withTags);
729    }
730    return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(),
731      cell.getValueLength(), cell.getTagsLength(), withTags);
732  }
733
734  public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags)
735    throws IOException {
736    if (cell instanceof ExtendedCell) {
737      return ((ExtendedCell) cell).write(out, withTags);
738    } else {
739      short rlen = cell.getRowLength();
740      byte flen = cell.getFamilyLength();
741      int qlen = cell.getQualifierLength();
742      int vlen = cell.getValueLength();
743      int tlen = cell.getTagsLength();
744      // write key length
745      int klen = keyLength(rlen, flen, qlen);
746      ByteBufferUtils.putInt(out, klen);
747      // write value length
748      ByteBufferUtils.putInt(out, vlen);
749      // Write rowkey - 2 bytes rk length followed by rowkey bytes
750      StreamUtils.writeShort(out, rlen);
751      out.write(cell.getRowArray(), cell.getRowOffset(), rlen);
752      // Write cf - 1 byte of cf length followed by the family bytes
753      out.write(flen);
754      out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen);
755      // write qualifier
756      out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen);
757      // write timestamp
758      StreamUtils.writeLong(out, cell.getTimestamp());
759      // write the type
760      out.write(cell.getTypeByte());
761      // write value
762      out.write(cell.getValueArray(), cell.getValueOffset(), vlen);
763      int size = klen + vlen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE;
764      // write tags if we have to
765      if (withTags && tlen > 0) {
766        // 2 bytes tags length followed by tags bytes
767        // tags length is serialized with 2 bytes only(short way) even if the
768        // type is int. As this
769        // is non -ve numbers, we save the sign bit. See HBASE-11437
770        out.write((byte) (0xff & (tlen >> 8)));
771        out.write((byte) (0xff & tlen));
772        out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen);
773        size += tlen + KeyValue.TAGS_LENGTH_SIZE;
774      }
775      return size;
776    }
777  }
778}