001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Iterator; 022import java.util.SortedSet; 023import java.util.function.IntConsumer; 024import org.apache.commons.lang3.NotImplementedException; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.Cell; 027import org.apache.hadoop.hbase.PrivateCellUtil; 028import org.apache.hadoop.hbase.client.Scan; 029import org.apache.yetus.audience.InterfaceAudience; 030 031/** 032 * A scanner of a single memstore segment. 033 */ 034@InterfaceAudience.Private 035public class SegmentScanner implements KeyValueScanner { 036 037 // the observed structure 038 protected final Segment segment; 039 // the highest relevant MVCC 040 private long readPoint; 041 // the current iterator that can be reinitialized by 042 // seek(), backwardSeek(), or reseek() 043 protected Iterator<Cell> iter; 044 // the pre-calculated cell to be returned by peek() 045 protected Cell current = null; 046 // or next() 047 // A flag represents whether could stop skipping KeyValues for MVCC 048 // if have encountered the next row. Only used for reversed scan 049 private boolean stopSkippingKVsIfNextRow = false; 050 // Stop skipping KeyValues for MVCC if finish this row. Only used for reversed scan 051 private Cell stopSkippingKVsRow; 052 // last iterated KVs by seek (to restore the iterator state after reseek) 053 private Cell last = null; 054 055 // flag to indicate if this scanner is closed 056 protected boolean closed = false; 057 058 /** 059 * Scanners are ordered from 0 (oldest) to newest in increasing order. 060 */ 061 protected SegmentScanner(Segment segment, long readPoint) { 062 this.segment = segment; 063 this.readPoint = readPoint; 064 // increase the reference count so the underlying structure will not be de-allocated 065 this.segment.incScannerCount(); 066 iter = segment.iterator(); 067 // the initialization of the current is required for working with heap of SegmentScanners 068 updateCurrent(); 069 if (current == null) { 070 // nothing to fetch from this scanner 071 close(); 072 } 073 } 074 075 /** 076 * Look at the next Cell in this scanner, but do not iterate the scanner 077 * @return the currently observed Cell 078 */ 079 @Override 080 public Cell peek() { // sanity check, the current should be always valid 081 if (closed) { 082 return null; 083 } 084 if (current != null && current.getSequenceId() > readPoint) { 085 throw new RuntimeException("current is invalid: read point is " + readPoint + ", " 086 + "while current sequence id is " + current.getSequenceId()); 087 } 088 return current; 089 } 090 091 /** 092 * Return the next Cell in this scanner, iterating the scanner 093 * @return the next Cell or null if end of scanner 094 */ 095 @Override 096 public Cell next() throws IOException { 097 if (closed) { 098 return null; 099 } 100 Cell oldCurrent = current; 101 updateCurrent(); // update the currently observed Cell 102 return oldCurrent; 103 } 104 105 /** 106 * Seek the scanner at or after the specified Cell. 107 * @param cell seek value 108 * @return true if scanner has values left, false if end of scanner 109 */ 110 @Override 111 public boolean seek(Cell cell) throws IOException { 112 if (closed) { 113 return false; 114 } 115 if (cell == null) { 116 close(); 117 return false; 118 } 119 // restart the iterator from new key 120 iter = getIterator(cell); 121 // last is going to be reinitialized in the next getNext() call 122 last = null; 123 updateCurrent(); 124 return (current != null); 125 } 126 127 protected Iterator<Cell> getIterator(Cell cell) { 128 return segment.tailSet(cell).iterator(); 129 } 130 131 /** 132 * Reseek the scanner at or after the specified KeyValue. This method is guaranteed to seek at or 133 * after the required key only if the key comes after the current position of the scanner. Should 134 * not be used to seek to a key which may come before the current position. 135 * @param cell seek value (should be non-null) 136 * @return true if scanner has values left, false if end of scanner 137 */ 138 @Override 139 public boolean reseek(Cell cell) throws IOException { 140 if (closed) { 141 return false; 142 } 143 /* 144 * See HBASE-4195 & HBASE-3855 & HBASE-6591 for the background on this implementation. This code 145 * is executed concurrently with flush and puts, without locks. The ideal implementation for 146 * performance would use the sub skip list implicitly pointed by the iterator. Unfortunately the 147 * Java API does not offer a method to get it. So we remember the last keys we iterated to and 148 * restore the reseeked set to at least that point. 149 */ 150 iter = getIterator(getHighest(cell, last)); 151 updateCurrent(); 152 return (current != null); 153 } 154 155 /** 156 * Seek the scanner at or before the row of specified Cell, it firstly tries to seek the scanner 157 * at or after the specified Cell, return if peek KeyValue of scanner has the same row with 158 * specified Cell, otherwise seek the scanner at the first Cell of the row which is the previous 159 * row of specified KeyValue 160 * @param key seek Cell 161 * @return true if the scanner is at the valid KeyValue, false if such Cell does not exist 162 */ 163 @Override 164 public boolean backwardSeek(Cell key) throws IOException { 165 if (closed) { 166 return false; 167 } 168 seek(key); // seek forward then go backward 169 if (peek() == null || segment.compareRows(peek(), key) > 0) { 170 return seekToPreviousRow(key); 171 } 172 return true; 173 } 174 175 /** 176 * Seek the scanner at the first Cell of the row which is the previous row of specified key 177 * @param cell seek value 178 * @return true if the scanner at the first valid Cell of previous row, false if not existing such 179 * Cell 180 */ 181 @Override 182 public boolean seekToPreviousRow(Cell cell) throws IOException { 183 if (closed) { 184 return false; 185 } 186 boolean keepSeeking; 187 Cell key = cell; 188 do { 189 Cell firstKeyOnRow = PrivateCellUtil.createFirstOnRow(key); 190 SortedSet<Cell> cellHead = segment.headSet(firstKeyOnRow); 191 Cell lastCellBeforeRow = cellHead.isEmpty() ? null : cellHead.last(); 192 if (lastCellBeforeRow == null) { 193 current = null; 194 return false; 195 } 196 Cell firstKeyOnPreviousRow = PrivateCellUtil.createFirstOnRow(lastCellBeforeRow); 197 this.stopSkippingKVsIfNextRow = true; 198 this.stopSkippingKVsRow = firstKeyOnPreviousRow; 199 seek(firstKeyOnPreviousRow); 200 this.stopSkippingKVsIfNextRow = false; 201 if ( 202 peek() == null || segment.getComparator().compareRows(peek(), firstKeyOnPreviousRow) > 0 203 ) { 204 keepSeeking = true; 205 key = firstKeyOnPreviousRow; 206 continue; 207 } else { 208 keepSeeking = false; 209 } 210 } while (keepSeeking); 211 return true; 212 } 213 214 /** 215 * Seek the scanner at the first KeyValue of last row 216 * @return true if scanner has values left, false if the underlying data is empty 217 */ 218 @Override 219 public boolean seekToLastRow() throws IOException { 220 if (closed) { 221 return false; 222 } 223 Cell higherCell = segment.isEmpty() ? null : segment.last(); 224 if (higherCell == null) { 225 return false; 226 } 227 228 Cell firstCellOnLastRow = PrivateCellUtil.createFirstOnRow(higherCell); 229 230 if (seek(firstCellOnLastRow)) { 231 return true; 232 } else { 233 return seekToPreviousRow(higherCell); 234 } 235 } 236 237 /** 238 * Close the KeyValue scanner. 239 */ 240 @Override 241 public void close() { 242 if (closed) { 243 return; 244 } 245 getSegment().decScannerCount(); 246 closed = true; 247 } 248 249 /** 250 * This functionality should be resolved in the higher level which is MemStoreScanner, currently 251 * returns true as default. Doesn't throw IllegalStateException in order not to change the 252 * signature of the overridden method 253 */ 254 @Override 255 public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) { 256 return getSegment().shouldSeek(scan.getColumnFamilyTimeRange().getOrDefault( 257 store.getColumnFamilyDescriptor().getName(), scan.getTimeRange()), oldestUnexpiredTS); 258 } 259 260 @Override 261 public boolean requestSeek(Cell c, boolean forward, boolean useBloom) throws IOException { 262 return NonLazyKeyValueScanner.doRealSeek(this, c, forward); 263 } 264 265 /** 266 * This scanner is working solely on the in-memory MemStore and doesn't work on store files, 267 * MutableCellSetSegmentScanner always does the seek, therefore always returning true. 268 */ 269 @Override 270 public boolean realSeekDone() { 271 return true; 272 } 273 274 /** 275 * This function should be never called on scanners that always do real seek operations (i.e. most 276 * of the scanners and also this one). The easiest way to achieve this is to call 277 * {@link #realSeekDone()} first. 278 */ 279 @Override 280 public void enforceSeek() throws IOException { 281 throw new NotImplementedException("enforceSeek cannot be called on a SegmentScanner"); 282 } 283 284 /** Returns true if this is a file scanner. Otherwise a memory scanner is assumed. */ 285 @Override 286 public boolean isFileScanner() { 287 return false; 288 } 289 290 @Override 291 public void recordBlockSize(IntConsumer blockSizeConsumer) { 292 // do nothing 293 } 294 295 @Override 296 public Path getFilePath() { 297 return null; 298 } 299 300 /** 301 * @return the next key in the index (the key to seek to the next block) if known, or null 302 * otherwise Not relevant for in-memory scanner 303 */ 304 @Override 305 public Cell getNextIndexedKey() { 306 return null; 307 } 308 309 /** 310 * Called after a batch of rows scanned (RPC) and set to be returned to client. Any in between 311 * cleanup can be done here. Nothing to be done for MutableCellSetSegmentScanner. 312 */ 313 @Override 314 public void shipped() throws IOException { 315 // do nothing 316 } 317 318 // debug method 319 @Override 320 public String toString() { 321 String res = "Store segment scanner of type " + this.getClass().getName() + "; "; 322 res += "Scanner order " + getScannerOrder() + "; "; 323 res += getSegment().toString(); 324 return res; 325 } 326 327 /********************* Private Methods **********************/ 328 329 private Segment getSegment() { 330 return segment; 331 } 332 333 /** 334 * Private internal method for iterating over the segment, skipping the cells with irrelevant MVCC 335 */ 336 protected void updateCurrent() { 337 Cell next = null; 338 339 try { 340 while (iter.hasNext()) { 341 next = iter.next(); 342 if (next.getSequenceId() <= this.readPoint) { 343 current = next; 344 return;// skip irrelevant versions 345 } 346 // for backwardSeek() stay in the boundaries of a single row 347 if (stopSkippingKVsIfNextRow && segment.compareRows(next, stopSkippingKVsRow) > 0) { 348 current = null; 349 return; 350 } 351 } // end of while 352 353 current = null; // nothing found 354 } finally { 355 if (next != null) { 356 // in all cases, remember the last KV we iterated to, needed for reseek() 357 last = next; 358 } 359 } 360 } 361 362 /** 363 * Private internal method that returns the higher of the two key values, or null if they are both 364 * null 365 */ 366 private Cell getHighest(Cell first, Cell second) { 367 if (first == null && second == null) { 368 return null; 369 } 370 if (first != null && second != null) { 371 int compare = segment.compare(first, second); 372 return (compare > 0 ? first : second); 373 } 374 return (first != null ? first : second); 375 } 376}