001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver.querymatcher; 019 020import java.io.IOException; 021import org.apache.hadoop.hbase.ExtendedCell; 022import org.apache.hadoop.hbase.regionserver.ShipperListener; 023import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; 024import org.apache.yetus.audience.InterfaceAudience; 025 026/** 027 * Implementing classes of this interface will be used for the tracking and enforcement of columns 028 * and numbers of versions and timeToLive during the course of a Get or Scan operation. 029 * <p> 030 * Currently there are two different types of Store/Family-level queries. 031 * <ul> 032 * <li>{@link ExplicitColumnTracker} is used when the query specifies one or more column qualifiers 033 * to return in the family.</li> 034 * <li>{@link ScanWildcardColumnTracker} is used when no columns are explicitly specified.</li> 035 * </ul> 036 * <p> 037 * This class is utilized by {@link ScanQueryMatcher} mainly through two methods: 038 * <ul> 039 * <li>{@link #checkColumn} is called when a Put satisfies all other conditions of the query.</li> 040 * <li>{@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher believes that the current 041 * column should be skipped (by timestamp, filter etc.)</li> 042 * </ul> 043 * <p> 044 * These two methods returns a 045 * {@link org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode} to define 046 * what action should be taken. 047 * <p> 048 * This class is NOT thread-safe as queries are never multi-threaded 049 */ 050@InterfaceAudience.Private 051public interface ColumnTracker extends ShipperListener { 052 053 /** 054 * Checks if the column is present in the list of requested columns by returning the match code 055 * instance. It does not check against the number of versions for the columns asked for. To do the 056 * version check, one has to call {@link #checkVersions(ExtendedCell, long, byte, boolean)} method 057 * based on the return type (INCLUDE) of this method. The values that can be returned by this 058 * method are {@link MatchCode#INCLUDE}, {@link MatchCode#SEEK_NEXT_COL} and 059 * {@link MatchCode#SEEK_NEXT_ROW}. 060 * @param cell a cell with the column to match against 061 * @param type The type of the Cell 062 * @return The match code instance. 063 * @throws IOException in case there is an internal consistency problem caused by a data 064 * corruption. 065 */ 066 ScanQueryMatcher.MatchCode checkColumn(ExtendedCell cell, byte type) throws IOException; 067 068 /** 069 * Keeps track of the number of versions for the columns asked for. It assumes that the user has 070 * already checked if the cell needs to be included by calling the 071 * {@link #checkColumn(ExtendedCell, byte)} method. The enum values returned by this method are 072 * {@link MatchCode#SKIP}, {@link MatchCode#INCLUDE}, {@link MatchCode#INCLUDE_AND_SEEK_NEXT_COL} 073 * and {@link MatchCode#INCLUDE_AND_SEEK_NEXT_ROW}. Implementations which include all the columns 074 * could just return {@link MatchCode#INCLUDE} in the {@link #checkColumn(ExtendedCell, byte)} 075 * method and perform all the operations in this checkVersions method. 076 * @param cell a cell with the column to match against 077 * @param timestamp The timestamp of the cell. 078 * @param type the type of the key value (Put/Delete) 079 * @param ignoreCount indicates if the KV needs to be excluded while counting (used during 080 * compactions. We only count KV's that are older than all the scanners' read 081 * points.) 082 * @return the scan query matcher match code instance 083 * @throws IOException in case there is an internal consistency problem caused by a data 084 * corruption. 085 */ 086 ScanQueryMatcher.MatchCode checkVersions(ExtendedCell cell, long timestamp, byte type, 087 boolean ignoreCount) throws IOException; 088 089 /** 090 * Resets the Matcher 091 */ 092 void reset(); 093 094 /** Returns <code>true</code> when done. */ 095 boolean done(); 096 097 /** 098 * Used by matcher and scan/get to get a hint of the next column to seek to after checkColumn() 099 * returns SKIP. Returns the next interesting column we want, or NULL there is none (wildcard 100 * scanner). Implementations aren't required to return anything useful unless the most recent call 101 * was to checkColumn() and the return code was SKIP. This is pretty implementation detail-y, but 102 * optimizations are like that. 103 * @return null, or a ColumnCount that we should seek to 104 */ 105 ColumnCount getColumnHint(); 106 107 /** 108 * Retrieve the MatchCode for the next row or column 109 */ 110 MatchCode getNextRowOrNextColumn(ExtendedCell cell); 111 112 /** 113 * Give the tracker a chance to declare it's done based on only the timestamp to allow an early 114 * out. 115 * @return <code>true</code> to early out based on timestamp. 116 */ 117 boolean isDone(long timestamp); 118 119 /** 120 * This method is used to inform the column tracker that we are done with this column. We may get 121 * this information from external filters or timestamp range and we then need to indicate this 122 * information to tracker. It is currently implemented for ExplicitColumnTracker. 123 */ 124 default void doneWithColumn(ExtendedCell cell) { 125 } 126}