001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.DataInput; 021import java.io.DataOutput; 022import java.io.EOFException; 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.OutputStream; 026import java.nio.ByteBuffer; 027import java.util.ArrayList; 028import java.util.List; 029import org.apache.hadoop.hbase.io.util.StreamUtils; 030import org.apache.hadoop.hbase.util.ByteBufferUtils; 031import org.apache.hadoop.hbase.util.Bytes; 032import org.apache.hadoop.io.IOUtils; 033import org.apache.hadoop.io.WritableUtils; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import org.apache.hbase.thirdparty.com.google.common.base.Function; 039import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 040import org.apache.hbase.thirdparty.org.apache.commons.collections4.IterableUtils; 041 042/** 043 * static convenience methods for dealing with KeyValues and collections of KeyValues 044 */ 045@InterfaceAudience.Private 046public class KeyValueUtil { 047 048 private static final Logger LOG = LoggerFactory.getLogger(KeyValueUtil.class); 049 050 /**************** length *********************/ 051 052 public static int length(short rlen, byte flen, int qlen, int vlen, int tlen, boolean withTags) { 053 if (withTags) { 054 return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen, tlen); 055 } 056 return (int) KeyValue.getKeyValueDataStructureSize(rlen, flen, qlen, vlen); 057 } 058 059 /** 060 * Returns number of bytes this cell's key part would have been used if serialized as in 061 * {@link KeyValue}. Key includes rowkey, family, qualifier, timestamp and type. 062 * @return the key length 063 */ 064 public static int keyLength(final Cell cell) { 065 return keyLength(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength()); 066 } 067 068 private static int keyLength(short rlen, byte flen, int qlen) { 069 return (int) KeyValue.getKeyDataStructureSize(rlen, flen, qlen); 070 } 071 072 public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) { 073 int length = kv.getLength(); 074 if (includeMvccVersion) { 075 length += WritableUtils.getVIntSize(kv.getSequenceId()); 076 } 077 return length; 078 } 079 080 public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs, 081 final boolean includeMvccVersion) { 082 int length = 0; 083 for (KeyValue kv : IterableUtils.emptyIfNull(kvs)) { 084 length += lengthWithMvccVersion(kv, includeMvccVersion); 085 } 086 return length; 087 } 088 089 /**************** copy the cell to create a new keyvalue *********************/ 090 091 public static KeyValue copyToNewKeyValue(final Cell cell) { 092 byte[] bytes = copyToNewByteArray(cell); 093 KeyValue kvCell = new KeyValue(bytes, 0, bytes.length); 094 kvCell.setSequenceId(cell.getSequenceId()); 095 return kvCell; 096 } 097 098 /** 099 * The position will be set to the beginning of the new ByteBuffer 100 * @return the Bytebuffer containing the key part of the cell 101 */ 102 public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) { 103 byte[] bytes = new byte[keyLength(cell)]; 104 appendKeyTo(cell, bytes, 0); 105 ByteBuffer buffer = ByteBuffer.wrap(bytes); 106 return buffer; 107 } 108 109 /** 110 * Copies the key to a new KeyValue 111 * @return the KeyValue that consists only the key part of the incoming cell 112 */ 113 public static KeyValue toNewKeyCell(final Cell cell) { 114 byte[] bytes = new byte[keyLength(cell)]; 115 appendKeyTo(cell, bytes, 0); 116 KeyValue kv = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length); 117 // Set the seq id. The new key cell could be used in comparisons so it 118 // is important that it uses the seqid also. If not the comparsion would fail 119 kv.setSequenceId(cell.getSequenceId()); 120 return kv; 121 } 122 123 public static byte[] copyToNewByteArray(final Cell cell) { 124 // Cell#getSerializedSize returns the serialized size of the Source cell, which may 125 // not serialize all fields. We are constructing a KeyValue backing array here, 126 // which does include all fields, and must allocate accordingly. 127 // TODO we could probably use Cell#getSerializedSize safely, the errors were 128 // caused by cells corrupted by use-after-free bugs 129 int v1Length = length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(), 130 cell.getValueLength(), cell.getTagsLength(), true); 131 byte[] backingBytes = new byte[v1Length]; 132 appendToByteArray(cell, backingBytes, 0, true); 133 return backingBytes; 134 } 135 136 public static int appendKeyTo(final Cell cell, final byte[] output, final int offset) { 137 int nextOffset = offset; 138 nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength()); 139 nextOffset = CellUtil.copyRowTo(cell, output, nextOffset); 140 nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength()); 141 nextOffset = CellUtil.copyFamilyTo(cell, output, nextOffset); 142 nextOffset = CellUtil.copyQualifierTo(cell, output, nextOffset); 143 nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp()); 144 nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte()); 145 return nextOffset; 146 } 147 148 /**************** copy key and value *********************/ 149 150 public static int appendToByteArray(Cell cell, byte[] output, int offset, boolean withTags) { 151 int pos = offset; 152 pos = Bytes.putInt(output, pos, keyLength(cell)); 153 pos = Bytes.putInt(output, pos, cell.getValueLength()); 154 pos = appendKeyTo(cell, output, pos); 155 pos = CellUtil.copyValueTo(cell, output, pos); 156 if (withTags && (cell.getTagsLength() > 0)) { 157 pos = Bytes.putAsShort(output, pos, cell.getTagsLength()); 158 pos = PrivateCellUtil.copyTagsTo(cell, output, pos); 159 } 160 return pos; 161 } 162 163 /** 164 * Copy the Cell content into the passed buf in KeyValue serialization format. 165 */ 166 public static int appendTo(Cell cell, ByteBuffer buf, int offset, boolean withTags) { 167 offset = ByteBufferUtils.putInt(buf, offset, keyLength(cell));// Key length 168 offset = ByteBufferUtils.putInt(buf, offset, cell.getValueLength());// Value length 169 offset = appendKeyTo(cell, buf, offset); 170 offset = CellUtil.copyValueTo(cell, buf, offset);// Value bytes 171 int tagsLength = cell.getTagsLength(); 172 if (withTags && (tagsLength > 0)) { 173 offset = ByteBufferUtils.putAsShort(buf, offset, tagsLength);// Tags length 174 offset = PrivateCellUtil.copyTagsTo(cell, buf, offset);// Tags bytes 175 } 176 return offset; 177 } 178 179 public static int appendKeyTo(Cell cell, ByteBuffer buf, int offset) { 180 offset = ByteBufferUtils.putShort(buf, offset, cell.getRowLength());// RK length 181 offset = CellUtil.copyRowTo(cell, buf, offset);// Row bytes 182 offset = ByteBufferUtils.putByte(buf, offset, cell.getFamilyLength());// CF length 183 offset = CellUtil.copyFamilyTo(cell, buf, offset);// CF bytes 184 offset = CellUtil.copyQualifierTo(cell, buf, offset);// Qualifier bytes 185 offset = ByteBufferUtils.putLong(buf, offset, cell.getTimestamp());// TS 186 offset = ByteBufferUtils.putByte(buf, offset, cell.getTypeByte());// Type 187 return offset; 188 } 189 190 public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv, 191 final boolean includeMvccVersion) { 192 // keep pushing the limit out. assume enough capacity 193 bb.limit(bb.position() + kv.getLength()); 194 bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); 195 if (includeMvccVersion) { 196 int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getSequenceId()); 197 ByteBufferUtils.extendLimit(bb, numMvccVersionBytes); 198 ByteBufferUtils.writeVLong(bb, kv.getSequenceId()); 199 } 200 } 201 202 /**************** iterating *******************************/ 203 204 /** 205 * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's 206 * position to the start of the next KeyValue. Does not allocate a new array or copy data. 207 */ 208 public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion, 209 boolean includesTags) { 210 if (bb.isDirect()) { 211 throw new IllegalArgumentException("only supports heap buffers"); 212 } 213 if (bb.remaining() < 1) { 214 return null; 215 } 216 int underlyingArrayOffset = bb.arrayOffset() + bb.position(); 217 int keyLength = bb.getInt(); 218 int valueLength = bb.getInt(); 219 ByteBufferUtils.skip(bb, keyLength + valueLength); 220 int tagsLength = 0; 221 if (includesTags) { 222 // Read short as unsigned, high byte first 223 tagsLength = ((bb.get() & 0xff) << 8) ^ (bb.get() & 0xff); 224 ByteBufferUtils.skip(bb, tagsLength); 225 } 226 int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); 227 KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); 228 if (includesMvccVersion) { 229 long mvccVersion = ByteBufferUtils.readVLong(bb); 230 keyValue.setSequenceId(mvccVersion); 231 } 232 return keyValue; 233 } 234 235 /*************** next/previous **********************************/ 236 237 /** 238 * Decrement the timestamp. For tests (currently wasteful) Remember timestamps are sorted reverse 239 * chronologically. 240 * @return previous key 241 */ 242 public static KeyValue previousKey(final KeyValue in) { 243 return createFirstOnRow(CellUtil.cloneRow(in), CellUtil.cloneFamily(in), 244 CellUtil.cloneQualifier(in), in.getTimestamp() - 1); 245 } 246 247 /** 248 * Create a KeyValue for the specified row, family and qualifier that would be larger than or 249 * equal to all other possible KeyValues that have the same row, family, qualifier. Used for 250 * reseeking. Should NEVER be returned to a client. row key row offset row length family name 251 * family offset family length column qualifier qualifier offset qualifier length 252 * @return Last possible key on passed row, family, qualifier. 253 */ 254 public static KeyValue createLastOnRow(final byte[] row, final int roffset, final int rlength, 255 final byte[] family, final int foffset, final int flength, final byte[] qualifier, 256 final int qoffset, final int qlength) { 257 return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, 258 qlength, HConstants.OLDEST_TIMESTAMP, KeyValue.Type.Minimum, null, 0, 0); 259 } 260 261 /** 262 * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is 263 * any (valid) KeyValue on 'row' would sort _after_ the result. 264 * @param row - row key (arbitrary byte array) 265 * @return First possible KeyValue on passed <code>row</code> 266 */ 267 public static KeyValue createFirstOnRow(final byte[] row, int roffset, short rlength) { 268 return new KeyValue(row, roffset, rlength, null, 0, 0, null, 0, 0, HConstants.LATEST_TIMESTAMP, 269 KeyValue.Type.Maximum, null, 0, 0); 270 } 271 272 /** 273 * Creates a KeyValue that is last on the specified row id. That is, every other possible KeyValue 274 * for the given row would compareTo() less than the result of this call. 275 * @param row row key 276 * @return Last possible KeyValue on passed <code>row</code> 277 */ 278 public static KeyValue createLastOnRow(final byte[] row) { 279 return new KeyValue(row, null, null, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Minimum); 280 } 281 282 /** 283 * Create a KeyValue that is smaller than all other possible KeyValues for the given row. That is 284 * any (valid) KeyValue on 'row' would sort _after_ the result. 285 * @param row - row key (arbitrary byte array) 286 * @return First possible KeyValue on passed <code>row</code> 287 */ 288 public static KeyValue createFirstOnRow(final byte[] row) { 289 return createFirstOnRow(row, HConstants.LATEST_TIMESTAMP); 290 } 291 292 /** 293 * Creates a KeyValue that is smaller than all other KeyValues that are older than the passed 294 * timestamp. 295 * @param row - row key (arbitrary byte array) 296 * @param ts - timestamp 297 * @return First possible key on passed <code>row</code> and timestamp. 298 */ 299 public static KeyValue createFirstOnRow(final byte[] row, final long ts) { 300 return new KeyValue(row, null, null, ts, KeyValue.Type.Maximum); 301 } 302 303 /** 304 * Create a KeyValue for the specified row, family and qualifier that would be smaller than all 305 * other possible KeyValues that have the same row,family,qualifier. Used for seeking. 306 * @param row - row key (arbitrary byte array) 307 * @param family - family name 308 * @param qualifier - column qualifier 309 * @return First possible key on passed <code>row</code>, and column. 310 */ 311 public static KeyValue createFirstOnRow(final byte[] row, final byte[] family, 312 final byte[] qualifier) { 313 return new KeyValue(row, family, qualifier, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum); 314 } 315 316 /** 317 * Create a KeyValue for the specified row, family and qualifier that would be smaller than all 318 * other possible KeyValues that have the same row, family, qualifier. Used for seeking. 319 * @param row - row key (arbitrary byte array) 320 * @param f - family name 321 * @param q - column qualifier 322 * @param ts - timestamp 323 * @return First possible key on passed <code>row</code>, column and timestamp 324 */ 325 public static KeyValue createFirstOnRow(final byte[] row, final byte[] f, final byte[] q, 326 final long ts) { 327 return new KeyValue(row, f, q, ts, KeyValue.Type.Maximum); 328 } 329 330 /** 331 * Create a KeyValue for the specified row, family and qualifier that would be smaller than all 332 * other possible KeyValues that have the same row, family, qualifier. Used for seeking. 333 * @param row row key 334 * @param roffset row offset 335 * @param rlength row length 336 * @param family family name 337 * @param foffset family offset 338 * @param flength family length 339 * @param qualifier column qualifier 340 * @param qoffset qualifier offset 341 * @param qlength qualifier length 342 * @return First possible key on passed Row, Family, Qualifier. 343 */ 344 public static KeyValue createFirstOnRow(final byte[] row, final int roffset, final int rlength, 345 final byte[] family, final int foffset, final int flength, final byte[] qualifier, 346 final int qoffset, final int qlength) { 347 return new KeyValue(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, 348 qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, null, 0, 0); 349 } 350 351 /** 352 * Create a KeyValue for the specified row, family and qualifier that would be smaller than all 353 * other possible KeyValues that have the same row, family, qualifier. Used for seeking. 354 * @param buffer the buffer to use for the new <code>KeyValue</code> object 355 * @param row the value key 356 * @param family family name 357 * @param qualifier column qualifier 358 * @return First possible key on passed Row, Family, Qualifier. 359 * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger 360 * than the provided buffer or than 361 * <code>Integer.MAX_VALUE</code> 362 */ 363 public static KeyValue createFirstOnRow(byte[] buffer, final byte[] row, final byte[] family, 364 final byte[] qualifier) throws IllegalArgumentException { 365 return createFirstOnRow(buffer, 0, row, 0, row.length, family, 0, family.length, qualifier, 0, 366 qualifier.length); 367 } 368 369 /** 370 * Create a KeyValue for the specified row, family and qualifier that would be smaller than all 371 * other possible KeyValues that have the same row, family, qualifier. Used for seeking. 372 * @param buffer the buffer to use for the new <code>KeyValue</code> object 373 * @param boffset buffer offset 374 * @param row the value key 375 * @param roffset row offset 376 * @param rlength row length 377 * @param family family name 378 * @param foffset family offset 379 * @param flength family length 380 * @param qualifier column qualifier 381 * @param qoffset qualifier offset 382 * @param qlength qualifier length 383 * @return First possible key on passed Row, Family, Qualifier. 384 * @throws IllegalArgumentException The resulting <code>KeyValue</code> object would be larger 385 * than the provided buffer or than 386 * <code>Integer.MAX_VALUE</code> 387 */ 388 public static KeyValue createFirstOnRow(byte[] buffer, final int boffset, final byte[] row, 389 final int roffset, final int rlength, final byte[] family, final int foffset, final int flength, 390 final byte[] qualifier, final int qoffset, final int qlength) throws IllegalArgumentException { 391 392 long lLength = KeyValue.getKeyValueDataStructureSize(rlength, flength, qlength, 0); 393 394 if (lLength > Integer.MAX_VALUE) { 395 throw new IllegalArgumentException("KeyValue length " + lLength + " > " + Integer.MAX_VALUE); 396 } 397 int iLength = (int) lLength; 398 if (buffer.length - boffset < iLength) { 399 throw new IllegalArgumentException( 400 "Buffer size " + (buffer.length - boffset) + " < " + iLength); 401 } 402 403 int len = KeyValue.writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset, 404 flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, 405 null, 0, 0, null); 406 return new KeyValue(buffer, boffset, len); 407 } 408 409 /*************** misc **********************************/ 410 /** 411 * @return <code>cell</code> if it is an object of class {@link KeyValue} else we will return a 412 * new {@link KeyValue} instance made from <code>cell</code> Note: Even if the cell is an 413 * object of any of the subclass of {@link KeyValue}, we will create a new 414 * {@link KeyValue} object wrapping same buffer. This API is used only with MR based tools 415 * which expect the type to be exactly KeyValue. That is the reason for doing this way. 416 * @deprecated without any replacement. 417 */ 418 @Deprecated 419 public static KeyValue ensureKeyValue(final Cell cell) { 420 if (cell == null) return null; 421 if (cell instanceof KeyValue) { 422 if (cell.getClass().getName().equals(KeyValue.class.getName())) { 423 return (KeyValue) cell; 424 } 425 // Cell is an Object of any of the sub classes of KeyValue. Make a new KeyValue wrapping the 426 // same byte[] 427 KeyValue kv = (KeyValue) cell; 428 KeyValue newKv = new KeyValue(kv.bytes, kv.offset, kv.length); 429 newKv.setSequenceId(kv.getSequenceId()); 430 return newKv; 431 } 432 return copyToNewKeyValue(cell); 433 } 434 435 @Deprecated 436 public static List<KeyValue> ensureKeyValues(List<Cell> cells) { 437 List<KeyValue> lazyList = Lists.transform(cells, new Function<Cell, KeyValue>() { 438 @Override 439 public KeyValue apply(Cell arg0) { 440 return KeyValueUtil.ensureKeyValue(arg0); 441 } 442 }); 443 return new ArrayList<>(lazyList); 444 } 445 446 /** 447 * Write out a KeyValue in the manner in which we used to when KeyValue was a Writable. 448 * @return Length written on stream 449 * @see #create(DataInput) for the inverse function 450 */ 451 public static long write(final KeyValue kv, final DataOutput out) throws IOException { 452 // This is how the old Writables write used to serialize KVs. Need to figure 453 // way to make it 454 // work for all implementations. 455 int length = kv.getLength(); 456 out.writeInt(length); 457 out.write(kv.getBuffer(), kv.getOffset(), length); 458 return (long) length + Bytes.SIZEOF_INT; 459 } 460 461 static String bytesToHex(byte[] buf, int offset, int length) { 462 String bufferContents = buf != null ? Bytes.toStringBinary(buf, offset, length) : "<null>"; 463 return ", KeyValueBytesHex=" + bufferContents + ", offset=" + offset + ", length=" + length; 464 } 465 466 static void checkKeyValueBytes(byte[] buf, int offset, int length, boolean withTags) { 467 if (buf == null) { 468 String msg = "Invalid to have null byte array in KeyValue."; 469 LOG.warn(msg); 470 throw new IllegalArgumentException(msg); 471 } 472 473 int pos = offset, endOffset = offset + length; 474 // check the key 475 if (pos + Bytes.SIZEOF_INT > endOffset) { 476 String msg = 477 "Overflow when reading key length at position=" + pos + bytesToHex(buf, offset, length); 478 LOG.warn(msg); 479 throw new IllegalArgumentException(msg); 480 } 481 int keyLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT); 482 pos += Bytes.SIZEOF_INT; 483 if (keyLen <= 0 || pos + keyLen > endOffset) { 484 String msg = 485 "Invalid key length in KeyValue. keyLength=" + keyLen + bytesToHex(buf, offset, length); 486 LOG.warn(msg); 487 throw new IllegalArgumentException(msg); 488 } 489 // check the value 490 if (pos + Bytes.SIZEOF_INT > endOffset) { 491 String msg = 492 "Overflow when reading value length at position=" + pos + bytesToHex(buf, offset, length); 493 LOG.warn(msg); 494 throw new IllegalArgumentException(msg); 495 } 496 int valLen = Bytes.toInt(buf, pos, Bytes.SIZEOF_INT); 497 pos += Bytes.SIZEOF_INT; 498 if (valLen < 0 || pos + valLen > endOffset) { 499 String msg = 500 "Invalid value length in KeyValue, valueLength=" + valLen + bytesToHex(buf, offset, length); 501 LOG.warn(msg); 502 throw new IllegalArgumentException(msg); 503 } 504 // check the row 505 if (pos + Bytes.SIZEOF_SHORT > endOffset) { 506 String msg = 507 "Overflow when reading row length at position=" + pos + bytesToHex(buf, offset, length); 508 LOG.warn(msg); 509 throw new IllegalArgumentException(msg); 510 } 511 short rowLen = Bytes.toShort(buf, pos, Bytes.SIZEOF_SHORT); 512 pos += Bytes.SIZEOF_SHORT; 513 if (rowLen < 0 || pos + rowLen > endOffset) { 514 String msg = 515 "Invalid row length in KeyValue, rowLength=" + rowLen + bytesToHex(buf, offset, length); 516 LOG.warn(msg); 517 throw new IllegalArgumentException(msg); 518 } 519 pos += rowLen; 520 // check the family 521 if (pos + Bytes.SIZEOF_BYTE > endOffset) { 522 String msg = 523 "Overflow when reading family length at position=" + pos + bytesToHex(buf, offset, length); 524 LOG.warn(msg); 525 throw new IllegalArgumentException(msg); 526 } 527 int familyLen = buf[pos]; 528 pos += Bytes.SIZEOF_BYTE; 529 if (familyLen < 0 || pos + familyLen > endOffset) { 530 String msg = "Invalid family length in KeyValue, familyLength=" + familyLen 531 + bytesToHex(buf, offset, length); 532 LOG.warn(msg); 533 throw new IllegalArgumentException(msg); 534 } 535 pos += familyLen; 536 // check the qualifier 537 int qualifierLen = keyLen - Bytes.SIZEOF_SHORT - rowLen - Bytes.SIZEOF_BYTE - familyLen 538 - Bytes.SIZEOF_LONG - Bytes.SIZEOF_BYTE; 539 if (qualifierLen < 0 || pos + qualifierLen > endOffset) { 540 String msg = "Invalid qualifier length in KeyValue, qualifierLen=" + qualifierLen 541 + bytesToHex(buf, offset, length); 542 LOG.warn(msg); 543 throw new IllegalArgumentException(msg); 544 } 545 pos += qualifierLen; 546 // check the timestamp 547 if (pos + Bytes.SIZEOF_LONG > endOffset) { 548 String msg = 549 "Overflow when reading timestamp at position=" + pos + bytesToHex(buf, offset, length); 550 LOG.warn(msg); 551 throw new IllegalArgumentException(msg); 552 } 553 long timestamp = Bytes.toLong(buf, pos, Bytes.SIZEOF_LONG); 554 if (timestamp < 0) { 555 String msg = 556 "Timestamp cannot be negative, ts=" + timestamp + bytesToHex(buf, offset, length); 557 LOG.warn(msg); 558 throw new IllegalArgumentException(msg); 559 } 560 pos += Bytes.SIZEOF_LONG; 561 // check the type 562 if (pos + Bytes.SIZEOF_BYTE > endOffset) { 563 String msg = 564 "Overflow when reading type at position=" + pos + bytesToHex(buf, offset, length); 565 LOG.warn(msg); 566 throw new IllegalArgumentException(msg); 567 } 568 byte type = buf[pos]; 569 if (!KeyValue.Type.isValidType(type)) { 570 String msg = "Invalid type in KeyValue, type=" + type + bytesToHex(buf, offset, length); 571 LOG.warn(msg); 572 throw new IllegalArgumentException(msg); 573 } 574 pos += Bytes.SIZEOF_BYTE; 575 // check the value 576 if (pos + valLen > endOffset) { 577 String msg = 578 "Overflow when reading value part at position=" + pos + bytesToHex(buf, offset, length); 579 LOG.warn(msg); 580 throw new IllegalArgumentException(msg); 581 } 582 pos += valLen; 583 // check the tags 584 if (withTags) { 585 if (pos == endOffset) { 586 // withTags is true but no tag in the cell. 587 return; 588 } 589 pos = checkKeyValueTagBytes(buf, offset, length, pos, endOffset); 590 } 591 if (pos != endOffset) { 592 String msg = "Some redundant bytes in KeyValue's buffer, startOffset=" + pos + ", endOffset=" 593 + endOffset + bytesToHex(buf, offset, length); 594 LOG.warn(msg); 595 throw new IllegalArgumentException(msg); 596 } 597 } 598 599 private static int checkKeyValueTagBytes(byte[] buf, int offset, int length, int pos, 600 int endOffset) { 601 if (pos + Bytes.SIZEOF_SHORT > endOffset) { 602 String msg = 603 "Overflow when reading tags length at position=" + pos + bytesToHex(buf, offset, length); 604 LOG.warn(msg); 605 throw new IllegalArgumentException(msg); 606 } 607 short tagsLen = Bytes.toShort(buf, pos); 608 pos += Bytes.SIZEOF_SHORT; 609 if (tagsLen < 0 || pos + tagsLen > endOffset) { 610 String msg = "Invalid tags length in KeyValue at position=" + (pos - Bytes.SIZEOF_SHORT) 611 + bytesToHex(buf, offset, length); 612 LOG.warn(msg); 613 throw new IllegalArgumentException(msg); 614 } 615 int tagsEndOffset = pos + tagsLen; 616 for (; pos < tagsEndOffset;) { 617 if (pos + Tag.TAG_LENGTH_SIZE > endOffset) { 618 String msg = 619 "Overflow when reading tag length at position=" + pos + bytesToHex(buf, offset, length); 620 LOG.warn(msg); 621 throw new IllegalArgumentException(msg); 622 } 623 short tagLen = Bytes.toShort(buf, pos); 624 pos += Tag.TAG_LENGTH_SIZE; 625 // tagLen contains one byte tag type, so must be not less than 1. 626 if (tagLen < 1 || pos + tagLen > endOffset) { 627 String msg = "Invalid tag length at position=" + (pos - Tag.TAG_LENGTH_SIZE) 628 + ", tagLength=" + tagLen + bytesToHex(buf, offset, length); 629 LOG.warn(msg); 630 throw new IllegalArgumentException(msg); 631 } 632 pos += tagLen; 633 } 634 return pos; 635 } 636 637 /** 638 * Create a KeyValue reading from the raw InputStream. Named 639 * <code>createKeyValueFromInputStream</code> so doesn't clash with {@link #create(DataInput)} 640 * @param in inputStream to read. 641 * @param withTags whether the keyvalue should include tags are not 642 * @return Created KeyValue OR if we find a length of zero, we will return null which can be 643 * useful marking a stream as done. 644 */ 645 public static KeyValue createKeyValueFromInputStream(InputStream in, boolean withTags) 646 throws IOException { 647 byte[] intBytes = new byte[Bytes.SIZEOF_INT]; 648 int bytesRead = 0; 649 while (bytesRead < intBytes.length) { 650 int n = in.read(intBytes, bytesRead, intBytes.length - bytesRead); 651 if (n < 0) { 652 if (bytesRead == 0) { 653 throw new EOFException(); 654 } 655 throw new IOException("Failed read of int, read " + bytesRead + " bytes"); 656 } 657 bytesRead += n; 658 } 659 byte[] bytes = new byte[Bytes.toInt(intBytes)]; 660 IOUtils.readFully(in, bytes, 0, bytes.length); 661 return withTags 662 ? new KeyValue(bytes, 0, bytes.length) 663 : new NoTagsKeyValue(bytes, 0, bytes.length); 664 } 665 666 /** 667 * Returns a KeyValue made of a byte array that holds the key-only part. Needed to convert hfile 668 * index members to KeyValues. 669 */ 670 public static KeyValue createKeyValueFromKey(final byte[] b) { 671 return createKeyValueFromKey(b, 0, b.length); 672 } 673 674 /** 675 * Return a KeyValue made of a byte buffer that holds the key-only part. Needed to convert hfile 676 * index members to KeyValues. 677 */ 678 public static KeyValue createKeyValueFromKey(final ByteBuffer bb) { 679 return createKeyValueFromKey(bb.array(), bb.arrayOffset(), bb.limit()); 680 } 681 682 /** 683 * Return a KeyValue made of a byte array that holds the key-only part. Needed to convert hfile 684 * index members to KeyValues. 685 */ 686 public static KeyValue createKeyValueFromKey(final byte[] b, final int o, final int l) { 687 byte[] newb = new byte[l + KeyValue.ROW_OFFSET]; 688 System.arraycopy(b, o, newb, KeyValue.ROW_OFFSET, l); 689 Bytes.putInt(newb, 0, l); 690 Bytes.putInt(newb, Bytes.SIZEOF_INT, 0); 691 return new KeyValue(newb); 692 } 693 694 /** 695 * Where to read bytes from. Creates a byte array to hold the KeyValue backing bytes copied from 696 * the steam. 697 * @return KeyValue created by deserializing from <code>in</code> OR if we find a length of zero, 698 * we will return null which can be useful marking a stream as done. 699 */ 700 public static KeyValue create(final DataInput in) throws IOException { 701 return create(in.readInt(), in); 702 } 703 704 /** 705 * Create a KeyValue reading <code>length</code> from <code>in</code> 706 * @return Created KeyValue OR if we find a length of zero, we will return null which can be 707 * useful marking a stream as done. 708 */ 709 public static KeyValue create(int length, final DataInput in) throws IOException { 710 711 if (length <= 0) { 712 if (length == 0) return null; 713 throw new IOException("Failed read " + length + " bytes, stream corrupt?"); 714 } 715 716 // This is how the old Writables.readFrom used to deserialize. Didn't even 717 // vint. 718 byte[] bytes = new byte[length]; 719 in.readFully(bytes); 720 return new KeyValue(bytes, 0, length); 721 } 722 723 public static int getSerializedSize(Cell cell, boolean withTags) { 724 if (withTags) { 725 return cell.getSerializedSize(); 726 } 727 if (cell instanceof ExtendedCell) { 728 return ((ExtendedCell) cell).getSerializedSize(withTags); 729 } 730 return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(), 731 cell.getValueLength(), cell.getTagsLength(), withTags); 732 } 733 734 public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags) 735 throws IOException { 736 if (cell instanceof ExtendedCell) { 737 return ((ExtendedCell) cell).write(out, withTags); 738 } else { 739 short rlen = cell.getRowLength(); 740 byte flen = cell.getFamilyLength(); 741 int qlen = cell.getQualifierLength(); 742 int vlen = cell.getValueLength(); 743 int tlen = cell.getTagsLength(); 744 // write key length 745 int klen = keyLength(rlen, flen, qlen); 746 ByteBufferUtils.putInt(out, klen); 747 // write value length 748 ByteBufferUtils.putInt(out, vlen); 749 // Write rowkey - 2 bytes rk length followed by rowkey bytes 750 StreamUtils.writeShort(out, rlen); 751 out.write(cell.getRowArray(), cell.getRowOffset(), rlen); 752 // Write cf - 1 byte of cf length followed by the family bytes 753 out.write(flen); 754 out.write(cell.getFamilyArray(), cell.getFamilyOffset(), flen); 755 // write qualifier 756 out.write(cell.getQualifierArray(), cell.getQualifierOffset(), qlen); 757 // write timestamp 758 StreamUtils.writeLong(out, cell.getTimestamp()); 759 // write the type 760 out.write(cell.getTypeByte()); 761 // write value 762 out.write(cell.getValueArray(), cell.getValueOffset(), vlen); 763 int size = klen + vlen + KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE; 764 // write tags if we have to 765 if (withTags && tlen > 0) { 766 // 2 bytes tags length followed by tags bytes 767 // tags length is serialized with 2 bytes only(short way) even if the 768 // type is int. As this 769 // is non -ve numbers, we save the sign bit. See HBASE-11437 770 out.write((byte) (0xff & (tlen >> 8))); 771 out.write((byte) (0xff & tlen)); 772 out.write(cell.getTagsArray(), cell.getTagsOffset(), tlen); 773 size += tlen + KeyValue.TAGS_LENGTH_SIZE; 774 } 775 return size; 776 } 777 } 778}