001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.List; 023import java.util.Map; 024import java.util.Set; 025import org.apache.hadoop.hbase.HConstants; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 028import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 029import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; 030import org.apache.hadoop.hbase.regionserver.wal.WALSyncTimeoutIOException; 031import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider; 032import org.apache.yetus.audience.InterfaceAudience; 033import org.apache.yetus.audience.InterfaceStability; 034 035/** 036 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides 037 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some 038 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine 039 * if they have already seen a given WAL. 040 */ 041@InterfaceAudience.Private 042@InterfaceStability.Evolving 043public interface WAL extends Closeable, WALFileLengthProvider { 044 045 /** 046 * Used to initialize the WAL. Usually this is for creating the first writer. 047 */ 048 default void init() throws IOException { 049 } 050 051 /** 052 * Registers WALActionsListener 053 */ 054 void registerWALActionsListener(final WALActionsListener listener); 055 056 /** 057 * Unregisters WALActionsListener 058 */ 059 boolean unregisterWALActionsListener(final WALActionsListener listener); 060 061 /** 062 * Roll the log writer. That is, start writing log messages to a new file. 063 * <p/> 064 * The implementation is synchronized in order to make sure there's one rollWriter running at any 065 * given time. 066 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 067 * logs. Returns null if nothing to flush. Names are actual region names as returned by 068 * {@link RegionInfo#getEncodedName()} 069 */ 070 Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException; 071 072 /** 073 * Roll the log writer. That is, start writing log messages to a new file. 074 * <p/> 075 * The implementation is synchronized in order to make sure there's one rollWriter running at any 076 * given time. If true, force creation of a new writer even if no entries have been written to the 077 * current writer 078 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 079 * logs. Returns null if nothing to flush. Names are actual region names as returned by 080 * {@link RegionInfo#getEncodedName()} 081 */ 082 Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException; 083 084 /** 085 * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits 086 * are left in place in backing storage to be replayed later. 087 */ 088 void shutdown() throws IOException; 089 090 /** 091 * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying 092 * resources after this call; i.e. filesystem based WALs can archive or delete files. 093 */ 094 @Override 095 void close() throws IOException; 096 097 /** 098 * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will 099 * also have transitioned through the memstore. 100 * <p/> 101 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 102 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 103 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 104 * @param info the regioninfo associated with append 105 * @param key Modified by this call; we add to it this edits region edit/sequence id. 106 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 107 * sequence id that is after all currently appended edits. 108 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 109 * in it. 110 * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit) 111 */ 112 long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 113 114 /** 115 * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a 116 * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region 117 * close. The difference between a 'marker' append and a 'data' append as in 118 * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned 119 * through the memstore. 120 * <p/> 121 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 122 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 123 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 124 * @param info the regioninfo associated with append 125 * @param key Modified by this call; we add to it this edits region edit/sequence id. 126 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 127 * sequence id that is after all currently appended edits. 128 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 129 * in it. 130 * @see #appendData(RegionInfo, WALKeyImpl, WALEdit) 131 */ 132 long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 133 134 /** 135 * updates the seuence number of a specific store. depending on the flag: replaces current seq 136 * number if the given seq id is bigger, or even if it is lower than existing one 137 */ 138 void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, 139 boolean onlyIfGreater); 140 141 /** 142 * Sync what we have in the WAL. 143 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 144 */ 145 void sync() throws IOException; 146 147 /** 148 * Sync the WAL if the txId was not already sync'd. 149 * @param txid Transaction id to sync to. 150 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 151 */ 152 void sync(long txid) throws IOException; 153 154 /** 155 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 156 * vs hsync. 157 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 158 */ 159 default void sync(boolean forceSync) throws IOException { 160 sync(); 161 } 162 163 /** 164 * @param txid Transaction id to sync to. 165 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 166 * vs hsync. 167 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 168 */ 169 default void sync(long txid, boolean forceSync) throws IOException { 170 sync(txid); 171 } 172 173 /** 174 * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be 175 * able to do accounting to figure which WALs can be let go. This method tells WAL that some 176 * region is about to flush. The flush can be the whole region or for a column family of the 177 * region only. 178 * <p> 179 * Currently, it is expected that the update lock is held for the region; i.e. no concurrent 180 * appends while we set up cache flush. 181 * @param families Families to flush. May be a subset of all families in the region. 182 * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are 183 * flushing a subset of all families but there are no edits in those families not being 184 * flushed; in other words, this is effectively same as a flush of all of the region 185 * though we were passed a subset of regions. Otherwise, it returns the sequence id of the 186 * oldest/lowest outstanding edit. 187 * @see #completeCacheFlush(byte[], long) 188 * @see #abortCacheFlush(byte[]) 189 */ 190 Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families); 191 192 Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq); 193 194 /** 195 * Complete the cache flush. 196 * @param encodedRegionName Encoded region name. 197 * @param maxFlushedSeqId The maxFlushedSeqId for this flush. There is no edit in memory that is 198 * less that this sequence id. 199 * @see #startCacheFlush(byte[], Set) 200 * @see #abortCacheFlush(byte[]) 201 */ 202 void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId); 203 204 /** 205 * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush 206 * currently is a restart of the regionserver so the snapshot content dropped by the failure gets 207 * restored to the memstore. 208 * @param encodedRegionName Encoded region name. 209 */ 210 void abortCacheFlush(byte[] encodedRegionName); 211 212 /** Returns Coprocessor host. */ 213 WALCoprocessorHost getCoprocessorHost(); 214 215 /** 216 * Gets the earliest unflushed sequence id in the memstore for the store. 217 * @param encodedRegionName The region to get the number for. 218 * @param familyName The family to get the number for. 219 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 220 */ 221 long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName); 222 223 /** 224 * Tell the WAL that when creating new writer you can skip creating the remote writer. 225 * <p> 226 * Used by sync replication for switching states from ACTIVE, where the remote cluster is broken. 227 */ 228 default void skipRemoteWAL(boolean markerEditOnly) { 229 } 230 231 /** 232 * Human readable identifying information about the state of this WAL. Implementors are encouraged 233 * to include information appropriate for debugging. Consumers are advised not to rely on the 234 * details of the returned String; it does not have a defined structure. 235 */ 236 @Override 237 String toString(); 238 239 /** 240 * Utility class that lets us keep track of the edit with it's key. 241 */ 242 class Entry { 243 private final WALEdit edit; 244 private final WALKeyImpl key; 245 246 public Entry() { 247 this(new WALKeyImpl(), new WALEdit()); 248 } 249 250 /** 251 * Constructor for both params 252 * @param edit log's edit 253 * @param key log's key 254 */ 255 public Entry(WALKeyImpl key, WALEdit edit) { 256 this.key = key; 257 this.edit = edit; 258 } 259 260 /** 261 * Gets the edit 262 */ 263 public WALEdit getEdit() { 264 return edit; 265 } 266 267 /** 268 * Gets the key 269 */ 270 public WALKeyImpl getKey() { 271 return key; 272 } 273 274 @Override 275 public String toString() { 276 return this.key + "=" + this.edit; 277 } 278 } 279}