001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.impl;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.backup.BackupHFileCleaner;
031import org.apache.hadoop.hbase.backup.BackupInfo;
032import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
033import org.apache.hadoop.hbase.backup.BackupObserver;
034import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
035import org.apache.hadoop.hbase.backup.BackupType;
036import org.apache.hadoop.hbase.backup.master.BackupLogCleaner;
037import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
038import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager;
039import org.apache.hadoop.hbase.client.Admin;
040import org.apache.hadoop.hbase.client.Connection;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
043import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
044import org.apache.hadoop.hbase.procedure.ProcedureManagerHost;
045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
046import org.apache.yetus.audience.InterfaceAudience;
047import org.slf4j.Logger;
048import org.slf4j.LoggerFactory;
049
050/**
051 * Handles backup requests, creates backup info records in backup system table to keep track of
052 * backup sessions, dispatches backup request.
053 */
054@InterfaceAudience.Private
055public class BackupManager implements Closeable {
056  // in seconds
057  public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY =
058    "hbase.backup.exclusive.op.timeout.seconds";
059  // In seconds
060  private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600;
061  private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class);
062
063  protected Configuration conf = null;
064  protected BackupInfo backupInfo = null;
065  protected BackupSystemTable systemTable;
066  protected final Connection conn;
067
068  /**
069   * Backup manager constructor.
070   * @param conn connection
071   * @param conf configuration
072   * @throws IOException exception
073   */
074  public BackupManager(Connection conn, Configuration conf) throws IOException {
075    if (
076      !conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
077        BackupRestoreConstants.BACKUP_ENABLE_DEFAULT)
078    ) {
079      throw new BackupException("HBase backup is not enabled. Check your "
080        + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting.");
081    }
082    this.conf = conf;
083    this.conn = conn;
084    this.systemTable = new BackupSystemTable(conn);
085  }
086
087  /**
088   * Returns backup info
089   */
090  protected BackupInfo getBackupInfo() {
091    return backupInfo;
092  }
093
094  /**
095   * This method modifies the master's configuration in order to inject backup-related features
096   * (TESTs only)
097   * @param conf configuration
098   */
099  public static void decorateMasterConfiguration(Configuration conf) {
100    if (!isBackupEnabled(conf)) {
101      return;
102    }
103    // Add WAL archive cleaner plug-in
104    String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
105    String cleanerClass = BackupLogCleaner.class.getCanonicalName();
106    if (!plugins.contains(cleanerClass)) {
107      conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
108    }
109
110    String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY);
111    String masterProcedureClass = LogRollMasterProcedureManager.class.getName();
112    if (classes == null) {
113      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass);
114    } else if (!classes.contains(masterProcedureClass)) {
115      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY,
116        classes + "," + masterProcedureClass);
117    }
118
119    plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
120    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
121      (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName());
122
123    String observerClass = BackupObserver.class.getName();
124    String masterCoProc = conf.get(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY);
125    conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY,
126      (masterCoProc == null ? "" : masterCoProc + ",") + observerClass);
127
128    if (LOG.isDebugEnabled()) {
129      LOG.debug(
130        "Added log cleaner: {}. Added master procedure manager: {}."
131          + " Added master procedure manager: {}. Added master observer: {}",
132        cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName(), observerClass);
133    }
134  }
135
136  /**
137   * This method modifies the Region Server configuration in order to inject backup-related features
138   * TESTs only.
139   * @param conf configuration
140   */
141  public static void decorateRegionServerConfiguration(Configuration conf) {
142    if (!isBackupEnabled(conf)) {
143      return;
144    }
145
146    String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY);
147    String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName();
148    if (classes == null) {
149      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass);
150    } else if (!classes.contains(regionProcedureClass)) {
151      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY,
152        classes + "," + regionProcedureClass);
153    }
154    String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY);
155    String regionObserverClass = BackupObserver.class.getName();
156    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
157      (coproc == null ? "" : coproc + ",") + regionObserverClass);
158    if (LOG.isDebugEnabled()) {
159      LOG.debug("Added region procedure manager: {}. Added region observer: {}",
160        regionProcedureClass, regionObserverClass);
161    }
162  }
163
164  public static boolean isBackupEnabled(Configuration conf) {
165    return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
166      BackupRestoreConstants.BACKUP_ENABLE_DEFAULT);
167  }
168
169  /**
170   * Get configuration
171   */
172  Configuration getConf() {
173    return conf;
174  }
175
176  /**
177   * Stop all the work of backup.
178   */
179  @Override
180  public void close() {
181    if (systemTable != null) {
182      try {
183        systemTable.close();
184      } catch (Exception e) {
185        LOG.error(e.toString(), e);
186      }
187    }
188  }
189
190  /**
191   * Creates a backup info based on input backup request.
192   * @param backupId      backup id
193   * @param type          type
194   * @param tableList     table list
195   * @param targetRootDir root dir
196   * @param workers       number of parallel workers
197   * @param bandwidth     bandwidth per worker in MB per sec
198   * @throws BackupException exception
199   */
200  public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
201    String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify)
202    throws BackupException {
203    if (targetRootDir == null) {
204      throw new BackupException("Wrong backup request parameter: target backup root directory");
205    }
206
207    if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) {
208      // If table list is null for full backup, which means backup all tables. Then fill the table
209      // list with all user tables from meta. It no table available, throw the request exception.
210      List<TableDescriptor> htds = null;
211      try (Admin admin = conn.getAdmin()) {
212        htds = admin.listTableDescriptors();
213      } catch (Exception e) {
214        throw new BackupException(e);
215      }
216
217      if (htds == null) {
218        throw new BackupException("No table exists for full backup of all tables.");
219      } else {
220        tableList = new ArrayList<>();
221        for (TableDescriptor hTableDescriptor : htds) {
222          TableName tn = hTableDescriptor.getTableName();
223          if (tn.equals(BackupSystemTable.getTableName(conf))) {
224            // skip backup system table
225            continue;
226          }
227          tableList.add(hTableDescriptor.getTableName());
228        }
229
230        LOG.info("Full backup all the tables available in the cluster: {}", tableList);
231      }
232    }
233
234    // there are one or more tables in the table list
235    backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]),
236      targetRootDir);
237    backupInfo.setBandwidth(bandwidth);
238    backupInfo.setWorkers(workers);
239    backupInfo.setNoChecksumVerify(noChecksumVerify);
240    return backupInfo;
241  }
242
243  /**
244   * Check if any ongoing backup. Currently, we only reply on checking status in backup system
245   * table. We need to consider to handle the case of orphan records in the future. Otherwise, all
246   * the coming request will fail.
247   * @return the ongoing backup id if on going backup exists, otherwise null
248   * @throws IOException exception
249   */
250  private String getOngoingBackupId() throws IOException {
251    ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING);
252    if (sessions.size() == 0) {
253      return null;
254    }
255    return sessions.get(0).getBackupId();
256  }
257
258  /**
259   * Start the backup manager service.
260   * @throws IOException exception
261   */
262  public void initialize() throws IOException {
263    String ongoingBackupId = this.getOngoingBackupId();
264    if (ongoingBackupId != null) {
265      LOG.info("There is a ongoing backup {}"
266        + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId);
267      throw new BackupException("There is ongoing backup seesion.");
268    }
269  }
270
271  public void setBackupInfo(BackupInfo backupInfo) {
272    this.backupInfo = backupInfo;
273  }
274
275  /*
276   * backup system table operations
277   */
278
279  /**
280   * Updates status (state) of a backup session in a persistent store
281   * @param context context
282   * @throws IOException exception
283   */
284  public void updateBackupInfo(BackupInfo context) throws IOException {
285    systemTable.updateBackupInfo(context);
286  }
287
288  /**
289   * Starts new backup session
290   * @throws IOException if active session already exists
291   */
292  public void startBackupSession() throws IOException {
293    long startTime = EnvironmentEdgeManager.currentTime();
294    long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY,
295      DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L;
296    long lastWarningOutputTime = 0;
297    while (EnvironmentEdgeManager.currentTime() - startTime < timeout) {
298      try {
299        systemTable.startBackupExclusiveOperation();
300        return;
301      } catch (IOException e) {
302        if (e instanceof ExclusiveOperationException) {
303          // sleep, then repeat
304          try {
305            Thread.sleep(1000);
306          } catch (InterruptedException e1) {
307            // Restore the interrupted status
308            Thread.currentThread().interrupt();
309          }
310          if (
311            lastWarningOutputTime == 0
312              || (EnvironmentEdgeManager.currentTime() - lastWarningOutputTime) > 60000
313          ) {
314            lastWarningOutputTime = EnvironmentEdgeManager.currentTime();
315            LOG.warn("Waiting to acquire backup exclusive lock for {}s",
316              +(lastWarningOutputTime - startTime) / 1000);
317          }
318        } else {
319          throw e;
320        }
321      }
322    }
323    throw new IOException(
324      "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s");
325  }
326
327  /**
328   * Finishes active backup session
329   * @throws IOException if no active session
330   */
331  public void finishBackupSession() throws IOException {
332    systemTable.finishBackupExclusiveOperation();
333  }
334
335  /**
336   * Read the last backup start code (timestamp) of last successful backup. Will return null if
337   * there is no startcode stored in backup system table or the value is of length 0. These two
338   * cases indicate there is no successful backup completed so far.
339   * @return the timestamp of a last successful backup
340   * @throws IOException exception
341   */
342  public String readBackupStartCode() throws IOException {
343    return systemTable.readBackupStartCode(backupInfo.getBackupRootDir());
344  }
345
346  /**
347   * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte.
348   * @param startCode start code
349   * @throws IOException exception
350   */
351  public void writeBackupStartCode(Long startCode) throws IOException {
352    systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir());
353  }
354
355  /**
356   * Get the RS log information after the last log roll from backup system table.
357   * @return RS log info
358   * @throws IOException exception
359   */
360  public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException {
361    return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir());
362  }
363
364  public List<BulkLoad> readBulkloadRows(List<TableName> tableList) throws IOException {
365    return systemTable.readBulkloadRows(tableList);
366  }
367
368  public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException {
369    systemTable.deleteBulkLoadedRows(rows);
370  }
371
372  /**
373   * Get all completed backup information (in desc order by time)
374   * @return history info of BackupCompleteData
375   * @throws IOException exception
376   */
377  public List<BackupInfo> getBackupHistory() throws IOException {
378    return systemTable.getBackupHistory();
379  }
380
381  public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException {
382    return systemTable.getBackupHistory(completed);
383  }
384
385  /**
386   * Write the current timestamps for each regionserver to backup system table after a successful
387   * full or incremental backup. Each table may have a different set of log timestamps. The saved
388   * timestamp is of the last log file that was backed up already.
389   * @param tables tables
390   * @throws IOException exception
391   */
392  public void writeRegionServerLogTimestamp(Set<TableName> tables, Map<String, Long> newTimestamps)
393    throws IOException {
394    systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir());
395  }
396
397  /**
398   * Read the timestamp for each region server log after the last successful backup. Each table has
399   * its own set of the timestamps.
400   * @return the timestamp for each region server. key: tableName value:
401   *         RegionServer,PreviousTimeStamp
402   * @throws IOException exception
403   */
404  public Map<TableName, Map<String, Long>> readLogTimestampMap() throws IOException {
405    return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir());
406  }
407
408  /**
409   * Return the current tables covered by incremental backup.
410   * @return set of tableNames
411   * @throws IOException exception
412   */
413  public Set<TableName> getIncrementalBackupTableSet() throws IOException {
414    return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir());
415  }
416
417  /**
418   * Adds set of tables to overall incremental backup table set
419   * @param tables tables
420   * @throws IOException exception
421   */
422  public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException {
423    systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir());
424  }
425
426  public Connection getConnection() {
427    return conn;
428  }
429}