001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import static org.apache.commons.lang3.StringUtils.isNumeric; 021 022import java.io.Closeable; 023import java.io.IOException; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.regionserver.wal.CompressionContext; 030import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException; 031import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 032import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost; 033import org.apache.hadoop.hbase.regionserver.wal.WALSyncTimeoutIOException; 034import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.apache.yetus.audience.InterfaceStability; 037 038/** 039 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides 040 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some 041 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine 042 * if they have already seen a given WAL. 043 */ 044@InterfaceAudience.Private 045@InterfaceStability.Evolving 046public interface WAL extends Closeable, WALFileLengthProvider { 047 048 /** 049 * Registers WALActionsListener 050 */ 051 void registerWALActionsListener(final WALActionsListener listener); 052 053 /** 054 * Unregisters WALActionsListener 055 */ 056 boolean unregisterWALActionsListener(final WALActionsListener listener); 057 058 /** 059 * Roll the log writer. That is, start writing log messages to a new file. 060 * <p/> 061 * The implementation is synchronized in order to make sure there's one rollWriter running at any 062 * given time. 063 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 064 * logs. Returns null if nothing to flush. Names are actual region names as returned by 065 * {@link RegionInfo#getEncodedName()} 066 */ 067 Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException; 068 069 /** 070 * Roll the log writer. That is, start writing log messages to a new file. 071 * <p/> 072 * The implementation is synchronized in order to make sure there's one rollWriter running at any 073 * given time. If true, force creation of a new writer even if no entries have been written to the 074 * current writer 075 * @return If lots of logs, flush the stores of returned regions so next time through we can clean 076 * logs. Returns null if nothing to flush. Names are actual region names as returned by 077 * {@link RegionInfo#getEncodedName()} 078 */ 079 Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException; 080 081 /** 082 * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits 083 * are left in place in backing storage to be replayed later. 084 */ 085 void shutdown() throws IOException; 086 087 /** 088 * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying 089 * resources after this call; i.e. filesystem based WALs can archive or delete files. 090 */ 091 @Override 092 void close() throws IOException; 093 094 /** 095 * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will 096 * also have transitioned through the memstore. 097 * <p/> 098 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 099 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 100 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 101 * @param info the regioninfo associated with append 102 * @param key Modified by this call; we add to it this edits region edit/sequence id. 103 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 104 * sequence id that is after all currently appended edits. 105 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 106 * in it. 107 * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit) 108 */ 109 long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 110 111 /** 112 * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a 113 * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region 114 * close. The difference between a 'marker' append and a 'data' append as in 115 * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned 116 * through the memstore. 117 * <p/> 118 * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must 119 * have its region edit/sequence id assigned else it messes up our unification of mvcc and 120 * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in. 121 * @param info the regioninfo associated with append 122 * @param key Modified by this call; we add to it this edits region edit/sequence id. 123 * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit 124 * sequence id that is after all currently appended edits. 125 * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id 126 * in it. 127 * @see #appendData(RegionInfo, WALKeyImpl, WALEdit) 128 */ 129 long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException; 130 131 /** 132 * updates the seuence number of a specific store. depending on the flag: replaces current seq 133 * number if the given seq id is bigger, or even if it is lower than existing one 134 */ 135 void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid, 136 boolean onlyIfGreater); 137 138 /** 139 * Sync what we have in the WAL. 140 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 141 */ 142 void sync() throws IOException; 143 144 /** 145 * Sync the WAL if the txId was not already sync'd. 146 * @param txid Transaction id to sync to. 147 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 148 */ 149 void sync(long txid) throws IOException; 150 151 /** 152 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 153 * vs hsync. 154 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 155 */ 156 default void sync(boolean forceSync) throws IOException { 157 sync(); 158 } 159 160 /** 161 * @param txid Transaction id to sync to. 162 * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush 163 * vs hsync. 164 * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}. 165 */ 166 default void sync(long txid, boolean forceSync) throws IOException { 167 sync(txid); 168 } 169 170 /** 171 * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be 172 * able to do accounting to figure which WALs can be let go. This method tells WAL that some 173 * region is about to flush. The flush can be the whole region or for a column family of the 174 * region only. 175 * <p> 176 * Currently, it is expected that the update lock is held for the region; i.e. no concurrent 177 * appends while we set up cache flush. 178 * @param families Families to flush. May be a subset of all families in the region. 179 * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are 180 * flushing a subset of all families but there are no edits in those families not being 181 * flushed; in other words, this is effectively same as a flush of all of the region 182 * though we were passed a subset of regions. Otherwise, it returns the sequence id of the 183 * oldest/lowest outstanding edit. 184 * @see #completeCacheFlush(byte[], long) 185 * @see #abortCacheFlush(byte[]) 186 */ 187 Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families); 188 189 Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq); 190 191 /** 192 * Complete the cache flush. 193 * @param encodedRegionName Encoded region name. 194 * @param maxFlushedSeqId The maxFlushedSeqId for this flush. There is no edit in memory that is 195 * less that this sequence id. 196 * @see #startCacheFlush(byte[], Set) 197 * @see #abortCacheFlush(byte[]) 198 */ 199 void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId); 200 201 /** 202 * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush 203 * currently is a restart of the regionserver so the snapshot content dropped by the failure gets 204 * restored to the memstore. 205 * @param encodedRegionName Encoded region name. 206 */ 207 void abortCacheFlush(byte[] encodedRegionName); 208 209 /** Returns Coprocessor host. */ 210 WALCoprocessorHost getCoprocessorHost(); 211 212 /** 213 * Gets the earliest unflushed sequence id in the memstore for the region. 214 * @param encodedRegionName The region to get the number for. 215 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 216 * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal 217 * workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])} 218 */ 219 @Deprecated 220 long getEarliestMemStoreSeqNum(byte[] encodedRegionName); 221 222 /** 223 * Gets the earliest unflushed sequence id in the memstore for the store. 224 * @param encodedRegionName The region to get the number for. 225 * @param familyName The family to get the number for. 226 * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent. 227 */ 228 long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName); 229 230 /** 231 * Human readable identifying information about the state of this WAL. Implementors are encouraged 232 * to include information appropriate for debugging. Consumers are advised not to rely on the 233 * details of the returned String; it does not have a defined structure. 234 */ 235 @Override 236 String toString(); 237 238 /** 239 * Utility class that lets us keep track of the edit with it's key. 240 */ 241 class Entry { 242 private final WALEdit edit; 243 private final WALKeyImpl key; 244 245 public Entry() { 246 this(new WALKeyImpl(), new WALEdit()); 247 } 248 249 /** 250 * Constructor for both params 251 * @param edit log's edit 252 * @param key log's key 253 */ 254 public Entry(WALKeyImpl key, WALEdit edit) { 255 this.key = key; 256 this.edit = edit; 257 } 258 259 /** 260 * Gets the edit 261 */ 262 public WALEdit getEdit() { 263 return edit; 264 } 265 266 /** 267 * Gets the key 268 */ 269 public WALKeyImpl getKey() { 270 return key; 271 } 272 273 /** 274 * Set compression context for this entry. Compression context 275 * @deprecated deparcated since hbase 2.1.0 276 */ 277 @Deprecated 278 public void setCompressionContext(CompressionContext compressionContext) { 279 key.setCompressionContext(compressionContext); 280 } 281 282 @Override 283 public String toString() { 284 return this.key + "=" + this.edit; 285 } 286 } 287 288 /** 289 * Split a WAL filename to get a start time. WALs usually have the time we start writing to them 290 * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it 291 * is a WAL for the meta table. For example, WALs might look like this 292 * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the 293 * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a synchronous replication 294 * WAL which adds a '.syncrep' suffix. Check for these. File also may have no timestamp on it. For 295 * example the recovered.edits files are WALs but are named in ascending order. Here is an 296 * example: 0000000000000016310. Allow for this. 297 * @param name Name of the WAL file. 298 * @return Timestamp or -1. 299 */ 300 public static long getTimestamp(String name) { 301 String[] splits = name.split("\\."); 302 if (splits.length <= 1) { 303 return -1; 304 } 305 String timestamp = splits[splits.length - 1]; 306 if (!isNumeric(timestamp)) { 307 // Its a '.meta' or a '.syncrep' suffix. 308 timestamp = splits[splits.length - 2]; 309 if (!isNumeric(timestamp)) { 310 return -1; 311 } 312 } 313 return Long.parseLong(timestamp); 314 } 315}