001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.function.IntConsumer; 023import org.apache.hadoop.fs.Path; 024import org.apache.hadoop.hbase.Cell; 025import org.apache.hadoop.hbase.KeyValue; 026import org.apache.hadoop.hbase.client.Scan; 027import org.apache.yetus.audience.InterfaceAudience; 028 029/** 030 * Scanner that returns the next KeyValue. 031 */ 032@InterfaceAudience.Private 033// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner 034// so this should be something else altogether, a decoration on our base CellScanner. TODO. 035// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. 036public interface KeyValueScanner extends Shipper, Closeable { 037 /** 038 * The byte array represents for NO_NEXT_INDEXED_KEY; The actual value is irrelevant because this 039 * is always compared by reference. 040 */ 041 public static final Cell NO_NEXT_INDEXED_KEY = new KeyValue(); 042 043 /** 044 * Look at the next Cell in this scanner, but do not iterate scanner. NOTICE: The returned cell 045 * has not been passed into ScanQueryMatcher. So it may not be what the user need. 046 * @return the next Cell 047 */ 048 Cell peek(); 049 050 /** 051 * Return the next Cell in this scanner, iterating the scanner 052 * @return the next Cell 053 */ 054 Cell next() throws IOException; 055 056 /** 057 * Seek the scanner at or after the specified KeyValue. 058 * @param key seek value 059 * @return true if scanner has values left, false if end of scanner 060 */ 061 boolean seek(Cell key) throws IOException; 062 063 /** 064 * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or 065 * after the required key only if the key comes after the current position of the scanner. Should 066 * not be used to seek to a key which may come before the current position. 067 * @param key seek value (should be non-null) 068 * @return true if scanner has values left, false if end of scanner 069 */ 070 boolean reseek(Cell key) throws IOException; 071 072 /** 073 * Get the order of this KeyValueScanner. This is only relevant for StoreFileScanners. This is 074 * required for comparing multiple files to find out which one has the latest data. 075 * StoreFileScanners are ordered from 0 (oldest) to newest in increasing order. 076 */ 077 default long getScannerOrder() { 078 return 0; 079 } 080 081 /** 082 * Close the KeyValue scanner. 083 */ 084 @Override 085 void close(); 086 087 /** 088 * Allows to filter out scanners (both StoreFile and memstore) that we don't want to use based on 089 * criteria such as Bloom filters and timestamp ranges. 090 * @param scan the scan that we are selecting scanners for 091 * @param store the store we are performing the scan on. 092 * @param oldestUnexpiredTS the oldest timestamp we are interested in for this query, based on TTL 093 * @return true if the scanner should be included in the query 094 */ 095 boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS); 096 097 // "Lazy scanner" optimizations 098 099 /** 100 * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only does a seek operation 101 * after checking that it is really necessary for the row/column combination specified by the kv 102 * parameter. This function was added to avoid unnecessary disk seeks by checking row-column Bloom 103 * filters before a seek on multi-column get/scan queries, and to optimize by looking up more 104 * recent files first. 105 * @param forward do a forward-only "reseek" instead of a random-access seek 106 * @param useBloom whether to enable multi-column Bloom filter optimization 107 */ 108 boolean requestSeek(Cell kv, boolean forward, boolean useBloom) throws IOException; 109 110 /** 111 * We optimize our store scanners by checking the most recent store file first, so we sometimes 112 * pretend we have done a seek but delay it until the store scanner bubbles up to the top of the 113 * key-value heap. This method is then used to ensure the top store file scanner has done a seek 114 * operation. 115 */ 116 boolean realSeekDone(); 117 118 /** 119 * Does the real seek operation in case it was skipped by seekToRowCol(KeyValue, boolean) (TODO: 120 * Whats this?). Note that this function should be never called on scanners that always do real 121 * seek operations (i.e. most of the scanners). The easiest way to achieve this is to call 122 * {@link #realSeekDone()} first. 123 */ 124 void enforceSeek() throws IOException; 125 126 /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */ 127 boolean isFileScanner(); 128 129 /** 130 * Record the size of the current block in bytes, passing as an argument to the blockSizeConsumer. 131 * Implementations should ensure that blockSizeConsumer is only called once per block. 132 * @param blockSizeConsumer to be called with block size in bytes, once per block. 133 */ 134 void recordBlockSize(IntConsumer blockSizeConsumer); 135 136 /** 137 * @return the file path if this is a file scanner, otherwise null. 138 * @see #isFileScanner() 139 */ 140 Path getFilePath(); 141 142 // Support for "Reversed Scanner" 143 /** 144 * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner 145 * at or after the specified Cell, return if peek KeyValue of scanner has the same row with 146 * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous 147 * row of specified KeyValue 148 * @param key seek KeyValue 149 * @return true if the scanner is at the valid KeyValue, false if such KeyValue does not exist 150 */ 151 public boolean backwardSeek(Cell key) throws IOException; 152 153 /** 154 * Seek the scanner at the first Cell of the row which is the previous row of specified key 155 * @param key seek value 156 * @return true if the scanner at the first valid Cell of previous row, false if not existing such 157 * Cell 158 */ 159 public boolean seekToPreviousRow(Cell key) throws IOException; 160 161 /** 162 * Seek the scanner at the first KeyValue of last row 163 * @return true if scanner has values left, false if the underlying data is empty 164 */ 165 public boolean seekToLastRow() throws IOException; 166 167 /** 168 * @return the next key in the index, usually the first key of next block OR a key that falls 169 * between last key of current block and first key of next block.. see 170 * HFileWriterImpl#getMidpoint, or null if not known. 171 */ 172 public Cell getNextIndexedKey(); 173}