001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.wal;
019
020import static org.apache.commons.lang3.StringUtils.isNumeric;
021
022import java.io.Closeable;
023import java.io.IOException;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.regionserver.wal.CompressionContext;
030import org.apache.hadoop.hbase.regionserver.wal.FailedLogCloseException;
031import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener;
032import org.apache.hadoop.hbase.regionserver.wal.WALCoprocessorHost;
033import org.apache.hadoop.hbase.regionserver.wal.WALSyncTimeoutIOException;
034import org.apache.hadoop.hbase.replication.regionserver.WALFileLengthProvider;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.apache.yetus.audience.InterfaceStability;
037
038/**
039 * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
040 * APIs for WAL users (such as RegionServer) to use the WAL (do append, sync, etc). Note that some
041 * internals, such as log rolling and performance evaluation tools, will use WAL.equals to determine
042 * if they have already seen a given WAL.
043 */
044@InterfaceAudience.Private
045@InterfaceStability.Evolving
046public interface WAL extends Closeable, WALFileLengthProvider {
047
048  /**
049   * Registers WALActionsListener
050   */
051  void registerWALActionsListener(final WALActionsListener listener);
052
053  /**
054   * Unregisters WALActionsListener
055   */
056  boolean unregisterWALActionsListener(final WALActionsListener listener);
057
058  /**
059   * Roll the log writer. That is, start writing log messages to a new file.
060   * <p/>
061   * The implementation is synchronized in order to make sure there's one rollWriter running at any
062   * given time.
063   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
064   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
065   *         {@link RegionInfo#getEncodedName()}
066   */
067  Map<byte[], List<byte[]>> rollWriter() throws FailedLogCloseException, IOException;
068
069  /**
070   * Roll the log writer. That is, start writing log messages to a new file.
071   * <p/>
072   * The implementation is synchronized in order to make sure there's one rollWriter running at any
073   * given time. If true, force creation of a new writer even if no entries have been written to the
074   * current writer
075   * @return If lots of logs, flush the stores of returned regions so next time through we can clean
076   *         logs. Returns null if nothing to flush. Names are actual region names as returned by
077   *         {@link RegionInfo#getEncodedName()}
078   */
079  Map<byte[], List<byte[]>> rollWriter(boolean force) throws IOException;
080
081  /**
082   * Stop accepting new writes. If we have unsynced writes still in buffer, sync them. Extant edits
083   * are left in place in backing storage to be replayed later.
084   */
085  void shutdown() throws IOException;
086
087  /**
088   * Caller no longer needs any edits from this WAL. Implementers are free to reclaim underlying
089   * resources after this call; i.e. filesystem based WALs can archive or delete files.
090   */
091  @Override
092  void close() throws IOException;
093
094  /**
095   * Append a set of data edits to the WAL. 'Data' here means that the content in the edits will
096   * also have transitioned through the memstore.
097   * <p/>
098   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
099   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
100   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
101   * @param info  the regioninfo associated with append
102   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
103   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
104   *              sequence id that is after all currently appended edits.
105   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
106   *         in it.
107   * @see #appendMarker(RegionInfo, WALKeyImpl, WALEdit)
108   */
109  long appendData(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
110
111  /**
112   * Append an operational 'meta' event marker edit to the WAL. A marker meta edit could be a
113   * FlushDescriptor, a compaction marker, or a region event marker; e.g. region open or region
114   * close. The difference between a 'marker' append and a 'data' append as in
115   * {@link #appendData(RegionInfo, WALKeyImpl, WALEdit)}is that a marker will not have transitioned
116   * through the memstore.
117   * <p/>
118   * The WAL is not flushed/sync'd after this transaction completes BUT on return this edit must
119   * have its region edit/sequence id assigned else it messes up our unification of mvcc and
120   * sequenceid. On return <code>key</code> will have the region edit/sequence id filled in.
121   * @param info  the regioninfo associated with append
122   * @param key   Modified by this call; we add to it this edits region edit/sequence id.
123   * @param edits Edits to append. MAY CONTAIN NO EDITS for case where we want to get an edit
124   *              sequence id that is after all currently appended edits.
125   * @return Returns a 'transaction id' and <code>key</code> will have the region edit/sequence id
126   *         in it.
127   * @see #appendData(RegionInfo, WALKeyImpl, WALEdit)
128   */
129  long appendMarker(RegionInfo info, WALKeyImpl key, WALEdit edits) throws IOException;
130
131  /**
132   * updates the seuence number of a specific store. depending on the flag: replaces current seq
133   * number if the given seq id is bigger, or even if it is lower than existing one
134   */
135  void updateStore(byte[] encodedRegionName, byte[] familyName, Long sequenceid,
136    boolean onlyIfGreater);
137
138  /**
139   * Sync what we have in the WAL.
140   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
141   */
142  void sync() throws IOException;
143
144  /**
145   * Sync the WAL if the txId was not already sync'd.
146   * @param txid Transaction id to sync to.
147   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
148   */
149  void sync(long txid) throws IOException;
150
151  /**
152   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
153   *                  vs hsync.
154   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
155   */
156  default void sync(boolean forceSync) throws IOException {
157    sync();
158  }
159
160  /**
161   * @param txid      Transaction id to sync to.
162   * @param forceSync Flag to force sync rather than flushing to the buffer. Example - Hadoop hflush
163   *                  vs hsync.
164   * @throws when timeout, it would throw {@link WALSyncTimeoutIOException}.
165   */
166  default void sync(long txid, boolean forceSync) throws IOException {
167    sync(txid);
168  }
169
170  /**
171   * WAL keeps track of the sequence numbers that are as yet not flushed im memstores in order to be
172   * able to do accounting to figure which WALs can be let go. This method tells WAL that some
173   * region is about to flush. The flush can be the whole region or for a column family of the
174   * region only.
175   * <p>
176   * Currently, it is expected that the update lock is held for the region; i.e. no concurrent
177   * appends while we set up cache flush.
178   * @param families Families to flush. May be a subset of all families in the region.
179   * @return Returns {@link HConstants#NO_SEQNUM} if we are flushing the whole region OR if we are
180   *         flushing a subset of all families but there are no edits in those families not being
181   *         flushed; in other words, this is effectively same as a flush of all of the region
182   *         though we were passed a subset of regions. Otherwise, it returns the sequence id of the
183   *         oldest/lowest outstanding edit.
184   * @see #completeCacheFlush(byte[], long)
185   * @see #abortCacheFlush(byte[])
186   */
187  Long startCacheFlush(final byte[] encodedRegionName, Set<byte[]> families);
188
189  Long startCacheFlush(final byte[] encodedRegionName, Map<byte[], Long> familyToSeq);
190
191  /**
192   * Complete the cache flush.
193   * @param encodedRegionName Encoded region name.
194   * @param maxFlushedSeqId   The maxFlushedSeqId for this flush. There is no edit in memory that is
195   *                          less that this sequence id.
196   * @see #startCacheFlush(byte[], Set)
197   * @see #abortCacheFlush(byte[])
198   */
199  void completeCacheFlush(final byte[] encodedRegionName, long maxFlushedSeqId);
200
201  /**
202   * Abort a cache flush. Call if the flush fails. Note that the only recovery for an aborted flush
203   * currently is a restart of the regionserver so the snapshot content dropped by the failure gets
204   * restored to the memstore.
205   * @param encodedRegionName Encoded region name.
206   */
207  void abortCacheFlush(byte[] encodedRegionName);
208
209  /** Returns Coprocessor host. */
210  WALCoprocessorHost getCoprocessorHost();
211
212  /**
213   * Gets the earliest unflushed sequence id in the memstore for the region.
214   * @param encodedRegionName The region to get the number for.
215   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
216   * @deprecated Since version 1.2.0. Removing because not used and exposes subtle internal
217   *             workings. Use {@link #getEarliestMemStoreSeqNum(byte[], byte[])}
218   */
219  @Deprecated
220  long getEarliestMemStoreSeqNum(byte[] encodedRegionName);
221
222  /**
223   * Gets the earliest unflushed sequence id in the memstore for the store.
224   * @param encodedRegionName The region to get the number for.
225   * @param familyName        The family to get the number for.
226   * @return The earliest/lowest/oldest sequence id if present, HConstants.NO_SEQNUM if absent.
227   */
228  long getEarliestMemStoreSeqNum(byte[] encodedRegionName, byte[] familyName);
229
230  /**
231   * Human readable identifying information about the state of this WAL. Implementors are encouraged
232   * to include information appropriate for debugging. Consumers are advised not to rely on the
233   * details of the returned String; it does not have a defined structure.
234   */
235  @Override
236  String toString();
237
238  /**
239   * Utility class that lets us keep track of the edit with it's key.
240   */
241  class Entry {
242    private final WALEdit edit;
243    private final WALKeyImpl key;
244
245    public Entry() {
246      this(new WALKeyImpl(), new WALEdit());
247    }
248
249    /**
250     * Constructor for both params
251     * @param edit log's edit
252     * @param key  log's key
253     */
254    public Entry(WALKeyImpl key, WALEdit edit) {
255      this.key = key;
256      this.edit = edit;
257    }
258
259    /**
260     * Gets the edit
261     */
262    public WALEdit getEdit() {
263      return edit;
264    }
265
266    /**
267     * Gets the key
268     */
269    public WALKeyImpl getKey() {
270      return key;
271    }
272
273    /**
274     * Set compression context for this entry. Compression context
275     * @deprecated deparcated since hbase 2.1.0
276     */
277    @Deprecated
278    public void setCompressionContext(CompressionContext compressionContext) {
279      key.setCompressionContext(compressionContext);
280    }
281
282    @Override
283    public String toString() {
284      return this.key + "=" + this.edit;
285    }
286  }
287
288  /**
289   * Split a WAL filename to get a start time. WALs usually have the time we start writing to them
290   * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
291   * is a WAL for the meta table. For example, WALs might look like this
292   * <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
293   * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a synchronous replication
294   * WAL which adds a '.syncrep' suffix. Check for these. File also may have no timestamp on it. For
295   * example the recovered.edits files are WALs but are named in ascending order. Here is an
296   * example: 0000000000000016310. Allow for this.
297   * @param name Name of the WAL file.
298   * @return Timestamp or -1.
299   */
300  public static long getTimestamp(String name) {
301    String[] splits = name.split("\\.");
302    if (splits.length <= 1) {
303      return -1;
304    }
305    String timestamp = splits[splits.length - 1];
306    if (!isNumeric(timestamp)) {
307      // Its a '.meta' or a '.syncrep' suffix.
308      timestamp = splits[splits.length - 2];
309      if (!isNumeric(timestamp)) {
310        return -1;
311      }
312    }
313    return Long.parseLong(timestamp);
314  }
315}