001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.ByteArrayInputStream;
021import java.io.DataInputStream;
022import java.io.DataOutputStream;
023import java.io.IOException;
024import java.io.SequenceInputStream;
025import java.security.Key;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.Comparator;
029import java.util.List;
030import java.util.Map;
031import java.util.Set;
032import java.util.SortedMap;
033import java.util.TreeMap;
034import org.apache.commons.io.IOUtils;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.Path;
037import org.apache.hadoop.hbase.Cell;
038import org.apache.hadoop.hbase.CellUtil;
039import org.apache.hadoop.hbase.KeyValue;
040import org.apache.hadoop.hbase.io.crypto.Cipher;
041import org.apache.hadoop.hbase.io.crypto.Encryption;
042import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
043import org.apache.hadoop.hbase.security.EncryptionUtil;
044import org.apache.hadoop.hbase.util.Bytes;
045import org.apache.yetus.audience.InterfaceAudience;
046import org.slf4j.Logger;
047import org.slf4j.LoggerFactory;
048
049import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
050
051import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
052import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair;
054import org.apache.hadoop.hbase.shaded.protobuf.generated.HFileProtos;
055
056/**
057 * Metadata Map of attributes for HFile written out as HFile Trailer. Created by the Writer and
058 * added to the tail of the file just before close. Metadata includes core attributes such as last
059 * key seen, comparator used writing the file, etc. Clients can add their own attributes via
060 * {@link #append(byte[], byte[], boolean)} and they'll be persisted and available at read time.
061 * Reader creates the HFileInfo on open by reading the tail of the HFile. The parse of the HFile
062 * trailer also creates a {@link HFileContext}, a read-only data structure that includes bulk of the
063 * HFileInfo and extras that is safe to pass around when working on HFiles.
064 * @see HFileContext
065 */
066@InterfaceAudience.Private
067public class HFileInfo implements SortedMap<byte[], byte[]> {
068
069  private static final Logger LOG = LoggerFactory.getLogger(HFileInfo.class);
070
071  static final String RESERVED_PREFIX = "hfile.";
072  static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
073  static final byte[] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
074  static final byte[] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
075  static final byte[] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
076  static final byte[] CREATE_TIME_TS = Bytes.toBytes(RESERVED_PREFIX + "CREATE_TIME_TS");
077  static final byte[] TAGS_COMPRESSED = Bytes.toBytes(RESERVED_PREFIX + "TAGS_COMPRESSED");
078  static final byte[] KEY_OF_BIGGEST_CELL = Bytes.toBytes(RESERVED_PREFIX + "KEY_OF_BIGGEST_CELL");
079  static final byte[] LEN_OF_BIGGEST_CELL = Bytes.toBytes(RESERVED_PREFIX + "LEN_OF_BIGGEST_CELL");
080  public static final byte[] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
081  private final SortedMap<byte[], byte[]> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
082
083  /**
084   * We can read files whose major version is v2 IFF their minor version is at least 3.
085   */
086  private static final int MIN_V2_MINOR_VERSION_WITH_PB = 3;
087
088  /** Maximum minor version supported by this HFile format */
089  // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
090  // the file. This version can read Writables version 1.
091  static final int MAX_MINOR_VERSION = 3;
092
093  /** Last key in the file. Filled in when we read in the file info */
094  private Cell lastKeyCell = null;
095  /** Average key length read from file info */
096  private int avgKeyLen = -1;
097  /** Average value length read from file info */
098  private int avgValueLen = -1;
099  /** Biggest Cell in the file, key only. Filled in when we read in the file info */
100  private Cell biggestCell = null;
101  /** Length of the biggest Cell */
102  private long lenOfBiggestCell = -1;
103  private boolean includesMemstoreTS = false;
104  private boolean decodeMemstoreTS = false;
105
106  /**
107   * Blocks read from the load-on-open section, excluding data root index, meta index, and file
108   * info.
109   */
110  private List<HFileBlock> loadOnOpenBlocks = new ArrayList<>();
111
112  /**
113   * The iterator will track all blocks in load-on-open section, since we use the
114   * {@link org.apache.hadoop.hbase.io.ByteBuffAllocator} to manage the ByteBuffers in block now, so
115   * we must ensure that deallocate all ByteBuffers in the end.
116   */
117  private HFileBlock.BlockIterator blockIter;
118
119  private HFileBlockIndex.CellBasedKeyBlockIndexReader dataIndexReader;
120  private HFileBlockIndex.ByteArrayKeyBlockIndexReader metaIndexReader;
121
122  private FixedFileTrailer trailer;
123  private HFileContext hfileContext;
124
125  public HFileInfo() {
126    super();
127  }
128
129  public HFileInfo(ReaderContext context, Configuration conf) throws IOException {
130    this.initTrailerAndContext(context, conf);
131  }
132
133  /**
134   * Append the given key/value pair to the file info, optionally checking the key prefix.
135   * @param k           key to add
136   * @param v           value to add
137   * @param checkPrefix whether to check that the provided key does not start with the reserved
138   *                    prefix
139   * @return this file info object
140   * @throws IOException if the key or value is invalid
141   */
142  public HFileInfo append(final byte[] k, final byte[] v, final boolean checkPrefix)
143    throws IOException {
144    if (k == null || v == null) {
145      throw new NullPointerException("Key nor value may be null");
146    }
147    if (checkPrefix && isReservedFileInfoKey(k)) {
148      throw new IOException("Keys with a " + HFileInfo.RESERVED_PREFIX + " are reserved");
149    }
150    put(k, v);
151    return this;
152  }
153
154  /** Return true if the given file info key is reserved for internal use. */
155  public static boolean isReservedFileInfoKey(byte[] key) {
156    return Bytes.startsWith(key, HFileInfo.RESERVED_PREFIX_BYTES);
157  }
158
159  @Override
160  public void clear() {
161    this.map.clear();
162  }
163
164  @Override
165  public Comparator<? super byte[]> comparator() {
166    return map.comparator();
167  }
168
169  @Override
170  public boolean containsKey(Object key) {
171    return map.containsKey(key);
172  }
173
174  @Override
175  public boolean containsValue(Object value) {
176    return map.containsValue(value);
177  }
178
179  @Override
180  public Set<java.util.Map.Entry<byte[], byte[]>> entrySet() {
181    return map.entrySet();
182  }
183
184  @Override
185  public boolean equals(Object o) {
186    return map.equals(o);
187  }
188
189  @Override
190  public byte[] firstKey() {
191    return map.firstKey();
192  }
193
194  @Override
195  public byte[] get(Object key) {
196    return map.get(key);
197  }
198
199  @Override
200  public int hashCode() {
201    return map.hashCode();
202  }
203
204  @Override
205  public SortedMap<byte[], byte[]> headMap(byte[] toKey) {
206    return this.map.headMap(toKey);
207  }
208
209  @Override
210  public boolean isEmpty() {
211    return map.isEmpty();
212  }
213
214  @Override
215  public Set<byte[]> keySet() {
216    return map.keySet();
217  }
218
219  @Override
220  public byte[] lastKey() {
221    return map.lastKey();
222  }
223
224  @Override
225  public byte[] put(byte[] key, byte[] value) {
226    return this.map.put(key, value);
227  }
228
229  @Override
230  public void putAll(Map<? extends byte[], ? extends byte[]> m) {
231    this.map.putAll(m);
232  }
233
234  @Override
235  public byte[] remove(Object key) {
236    return this.map.remove(key);
237  }
238
239  @Override
240  public int size() {
241    return map.size();
242  }
243
244  @Override
245  public SortedMap<byte[], byte[]> subMap(byte[] fromKey, byte[] toKey) {
246    return this.map.subMap(fromKey, toKey);
247  }
248
249  @Override
250  public SortedMap<byte[], byte[]> tailMap(byte[] fromKey) {
251    return this.map.tailMap(fromKey);
252  }
253
254  @Override
255  public Collection<byte[]> values() {
256    return map.values();
257  }
258
259  /**
260   * Write out this instance on the passed in <code>out</code> stream. We write it as a protobuf.
261   * @see #read(DataInputStream)
262   */
263  void write(final DataOutputStream out) throws IOException {
264    HFileProtos.FileInfoProto.Builder builder = HFileProtos.FileInfoProto.newBuilder();
265    for (Map.Entry<byte[], byte[]> e : this.map.entrySet()) {
266      HBaseProtos.BytesBytesPair.Builder bbpBuilder = HBaseProtos.BytesBytesPair.newBuilder();
267      bbpBuilder.setFirst(UnsafeByteOperations.unsafeWrap(e.getKey()));
268      bbpBuilder.setSecond(UnsafeByteOperations.unsafeWrap(e.getValue()));
269      builder.addMapEntry(bbpBuilder.build());
270    }
271    out.write(ProtobufMagic.PB_MAGIC);
272    builder.build().writeDelimitedTo(out);
273  }
274
275  /**
276   * Populate this instance with what we find on the passed in <code>in</code> stream. Can
277   * deserialize protobuf of old Writables format.
278   * @see #write(DataOutputStream)
279   */
280  void read(final DataInputStream in) throws IOException {
281    // This code is tested over in TestHFileReaderV1 where we read an old hfile w/ this new code.
282    int pblen = ProtobufUtil.lengthOfPBMagic();
283    byte[] pbuf = new byte[pblen];
284    if (in.markSupported()) {
285      in.mark(pblen);
286    }
287    int read = in.read(pbuf);
288    if (read != pblen) {
289      throw new IOException("read=" + read + ", wanted=" + pblen);
290    }
291    if (ProtobufUtil.isPBMagicPrefix(pbuf)) {
292      parsePB(HFileProtos.FileInfoProto.parseDelimitedFrom(in));
293    } else {
294      if (in.markSupported()) {
295        in.reset();
296        parseWritable(in);
297      } else {
298        // We cannot use BufferedInputStream, it consumes more than we read from the underlying IS
299        ByteArrayInputStream bais = new ByteArrayInputStream(pbuf);
300        SequenceInputStream sis = new SequenceInputStream(bais, in); // Concatenate input streams
301        // TODO: Am I leaking anything here wrapping the passed in stream? We are not calling
302        // close on the wrapped streams but they should be let go after we leave this context?
303        // I see that we keep a reference to the passed in inputstream but since we no longer
304        // have a reference to this after we leave, we should be ok.
305        parseWritable(new DataInputStream(sis));
306      }
307    }
308  }
309
310  /**
311   * Now parse the old Writable format. It was a list of Map entries. Each map entry was a key and a
312   * value of a byte []. The old map format had a byte before each entry that held a code which was
313   * short for the key or value type. We know it was a byte [] so in below we just read and dump it.
314   */
315  void parseWritable(final DataInputStream in) throws IOException {
316    // First clear the map.
317    // Otherwise we will just accumulate entries every time this method is called.
318    this.map.clear();
319    // Read the number of entries in the map
320    int entries = in.readInt();
321    // Then read each key/value pair
322    for (int i = 0; i < entries; i++) {
323      byte[] key = Bytes.readByteArray(in);
324      // We used to read a byte that encoded the class type.
325      // Read and ignore it because it is always byte [] in hfile
326      in.readByte();
327      byte[] value = Bytes.readByteArray(in);
328      this.map.put(key, value);
329    }
330  }
331
332  /**
333   * Fill our map with content of the pb we read off disk
334   * @param fip protobuf message to read
335   */
336  void parsePB(final HFileProtos.FileInfoProto fip) {
337    this.map.clear();
338    for (BytesBytesPair pair : fip.getMapEntryList()) {
339      this.map.put(pair.getFirst().toByteArray(), pair.getSecond().toByteArray());
340    }
341  }
342
343  public void initTrailerAndContext(ReaderContext context, Configuration conf) throws IOException {
344    try {
345      boolean isHBaseChecksum = context.getInputStreamWrapper().shouldUseHBaseChecksum();
346      trailer = FixedFileTrailer.readFromStream(
347        context.getInputStreamWrapper().getStream(isHBaseChecksum), context.getFileSize());
348      Path path = context.getFilePath();
349      checkFileVersion(path);
350      this.hfileContext = createHFileContext(path, trailer, conf);
351      context.getInputStreamWrapper().unbuffer();
352    } catch (Throwable t) {
353      IOUtils.closeQuietly(context.getInputStreamWrapper(),
354        e -> LOG.warn("failed to close input stream wrapper", e));
355      throw new CorruptHFileException(
356        "Problem reading HFile Trailer from file " + context.getFilePath(), t);
357    }
358  }
359
360  /**
361   * should be called after initTrailerAndContext
362   */
363  public void initMetaAndIndex(HFile.Reader reader) throws IOException {
364    ReaderContext context = reader.getContext();
365    try {
366      HFileBlock.FSReader blockReader = reader.getUncachedBlockReader();
367      // Initialize an block iterator, and parse load-on-open blocks in the following.
368      blockIter = blockReader.blockRange(trailer.getLoadOnOpenDataOffset(),
369        context.getFileSize() - trailer.getTrailerSize());
370      // Data index. We also read statistics about the block index written after
371      // the root level.
372      HFileBlock dataBlockRootIndex = blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX);
373      HFileBlock metaBlockIndex = blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX);
374      loadMetaInfo(blockIter, hfileContext);
375
376      HFileIndexBlockEncoder indexBlockEncoder =
377        HFileIndexBlockEncoderImpl.createFromFileInfo(this);
378      this.dataIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReaderV2(
379        trailer.createComparator(), trailer.getNumDataIndexLevels(), indexBlockEncoder);
380      dataIndexReader.readMultiLevelIndexRoot(dataBlockRootIndex, trailer.getDataIndexCount());
381      reader.setDataBlockIndexReader(dataIndexReader);
382      // Meta index.
383      this.metaIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(1);
384      metaIndexReader.readRootIndex(metaBlockIndex, trailer.getMetaIndexCount());
385      reader.setMetaBlockIndexReader(metaIndexReader);
386
387      reader.setDataBlockEncoder(HFileDataBlockEncoderImpl.createFromFileInfo(this));
388      // Load-On-Open info
389      HFileBlock b;
390      while ((b = blockIter.nextBlock()) != null) {
391        loadOnOpenBlocks.add(b);
392      }
393      // close the block reader
394      context.getInputStreamWrapper().unbuffer();
395    } catch (Throwable t) {
396      IOUtils.closeQuietly(context.getInputStreamWrapper(),
397        e -> LOG.warn("failed to close input stream wrapper", e));
398      throw new CorruptHFileException(
399        "Problem reading data index and meta index from file " + context.getFilePath(), t);
400    }
401  }
402
403  private HFileContext createHFileContext(Path path, FixedFileTrailer trailer, Configuration conf)
404    throws IOException {
405    HFileContextBuilder builder = new HFileContextBuilder().withHBaseCheckSum(true)
406      .withHFileName(path.getName()).withCompression(trailer.getCompressionCodec())
407      .withCellComparator(FixedFileTrailer.createComparator(trailer.getComparatorClassName()));
408    // Check for any key material available
409    byte[] keyBytes = trailer.getEncryptionKey();
410    if (keyBytes != null) {
411      Encryption.Context cryptoContext = Encryption.newContext(conf);
412      Key key = EncryptionUtil.unwrapKey(conf, keyBytes);
413      // Use the algorithm the key wants
414      Cipher cipher = Encryption.getCipher(conf, key.getAlgorithm());
415      if (cipher == null) {
416        throw new IOException(
417          "Cipher '" + key.getAlgorithm() + "' is not available" + ", path=" + path);
418      }
419      cryptoContext.setCipher(cipher);
420      cryptoContext.setKey(key);
421      builder.withEncryptionContext(cryptoContext);
422    }
423    HFileContext context = builder.build();
424    return context;
425  }
426
427  private void loadMetaInfo(HFileBlock.BlockIterator blockIter, HFileContext hfileContext)
428    throws IOException {
429    read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream());
430    byte[] creationTimeBytes = get(HFileInfo.CREATE_TIME_TS);
431    hfileContext.setFileCreateTime(creationTimeBytes == null ? 0 : Bytes.toLong(creationTimeBytes));
432    byte[] tmp = get(HFileInfo.MAX_TAGS_LEN);
433    // max tag length is not present in the HFile means tags were not at all written to file.
434    if (tmp != null) {
435      hfileContext.setIncludesTags(true);
436      tmp = get(HFileInfo.TAGS_COMPRESSED);
437      if (tmp != null && Bytes.toBoolean(tmp)) {
438        hfileContext.setCompressTags(true);
439      }
440    }
441    // parse meta info
442    if (get(HFileInfo.LASTKEY) != null) {
443      lastKeyCell = new KeyValue.KeyOnlyKeyValue(get(HFileInfo.LASTKEY));
444    }
445    if (get(HFileInfo.KEY_OF_BIGGEST_CELL) != null) {
446      biggestCell = new KeyValue.KeyOnlyKeyValue(get(HFileInfo.KEY_OF_BIGGEST_CELL));
447      lenOfBiggestCell = Bytes.toLong(get(HFileInfo.LEN_OF_BIGGEST_CELL));
448    }
449    avgKeyLen = Bytes.toInt(get(HFileInfo.AVG_KEY_LEN));
450    avgValueLen = Bytes.toInt(get(HFileInfo.AVG_VALUE_LEN));
451    byte[] keyValueFormatVersion = get(HFileWriterImpl.KEY_VALUE_VERSION);
452    includesMemstoreTS = keyValueFormatVersion != null
453      && Bytes.toInt(keyValueFormatVersion) == HFileWriterImpl.KEY_VALUE_VER_WITH_MEMSTORE;
454    hfileContext.setIncludesMvcc(includesMemstoreTS);
455    if (includesMemstoreTS) {
456      decodeMemstoreTS = Bytes.toLong(get(HFileWriterImpl.MAX_MEMSTORE_TS_KEY)) > 0;
457    }
458  }
459
460  /**
461   * File version check is a little sloppy. We read v3 files but can also read v2 files if their
462   * content has been pb'd; files written with 0.98.
463   */
464  private void checkFileVersion(Path path) {
465    int majorVersion = trailer.getMajorVersion();
466    if (majorVersion == getMajorVersion()) {
467      return;
468    }
469    int minorVersion = trailer.getMinorVersion();
470    if (majorVersion == 2 && minorVersion >= MIN_V2_MINOR_VERSION_WITH_PB) {
471      return;
472    }
473    // We can read v3 or v2 versions of hfile.
474    throw new IllegalArgumentException("Invalid HFile version: major=" + trailer.getMajorVersion()
475      + ", minor=" + trailer.getMinorVersion() + ": expected at least " + "major=2 and minor="
476      + MAX_MINOR_VERSION + ", path=" + path);
477  }
478
479  public void close() {
480    if (blockIter != null) {
481      blockIter.freeBlocks();
482    }
483  }
484
485  public int getMajorVersion() {
486    return 3;
487  }
488
489  public void setTrailer(FixedFileTrailer trailer) {
490    this.trailer = trailer;
491  }
492
493  public FixedFileTrailer getTrailer() {
494    return this.trailer;
495  }
496
497  public HFileBlockIndex.CellBasedKeyBlockIndexReader getDataBlockIndexReader() {
498    return this.dataIndexReader;
499  }
500
501  public HFileBlockIndex.ByteArrayKeyBlockIndexReader getMetaBlockIndexReader() {
502    return this.metaIndexReader;
503  }
504
505  public HFileContext getHFileContext() {
506    return this.hfileContext;
507  }
508
509  public List<HFileBlock> getLoadOnOpenBlocks() {
510    return loadOnOpenBlocks;
511  }
512
513  public Cell getLastKeyCell() {
514    return lastKeyCell;
515  }
516
517  public int getAvgKeyLen() {
518    return avgKeyLen;
519  }
520
521  public int getAvgValueLen() {
522    return avgValueLen;
523  }
524
525  public String getKeyOfBiggestCell() {
526    return CellUtil.toString(biggestCell, false);
527  }
528
529  public long getLenOfBiggestCell() {
530    return lenOfBiggestCell;
531  }
532
533  public boolean shouldIncludeMemStoreTS() {
534    return includesMemstoreTS;
535  }
536
537  public boolean isDecodeMemstoreTS() {
538    return decodeMemstoreTS;
539  }
540}