001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.impl;
019
020import java.io.Closeable;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.HashMap;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.TableName;
031import org.apache.hadoop.hbase.backup.BackupHFileCleaner;
032import org.apache.hadoop.hbase.backup.BackupInfo;
033import org.apache.hadoop.hbase.backup.BackupInfo.BackupState;
034import org.apache.hadoop.hbase.backup.BackupObserver;
035import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
036import org.apache.hadoop.hbase.backup.BackupType;
037import org.apache.hadoop.hbase.backup.HBackupFileSystem;
038import org.apache.hadoop.hbase.backup.impl.BackupManifest.BackupImage;
039import org.apache.hadoop.hbase.backup.master.BackupLogCleaner;
040import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager;
041import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager;
042import org.apache.hadoop.hbase.client.Admin;
043import org.apache.hadoop.hbase.client.Connection;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
046import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
047import org.apache.hadoop.hbase.procedure.ProcedureManagerHost;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
049import org.apache.hadoop.hbase.util.Pair;
050import org.apache.yetus.audience.InterfaceAudience;
051import org.slf4j.Logger;
052import org.slf4j.LoggerFactory;
053
054/**
055 * Handles backup requests, creates backup info records in backup system table to keep track of
056 * backup sessions, dispatches backup request.
057 */
058@InterfaceAudience.Private
059public class BackupManager implements Closeable {
060  // in seconds
061  public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY =
062    "hbase.backup.exclusive.op.timeout.seconds";
063  // In seconds
064  private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600;
065  private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class);
066
067  protected Configuration conf = null;
068  protected BackupInfo backupInfo = null;
069  protected BackupSystemTable systemTable;
070  protected final Connection conn;
071
072  /**
073   * Backup manager constructor.
074   * @param conn connection
075   * @param conf configuration
076   * @throws IOException exception
077   */
078  public BackupManager(Connection conn, Configuration conf) throws IOException {
079    if (
080      !conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
081        BackupRestoreConstants.BACKUP_ENABLE_DEFAULT)
082    ) {
083      throw new BackupException("HBase backup is not enabled. Check your "
084        + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting.");
085    }
086    this.conf = conf;
087    this.conn = conn;
088    this.systemTable = new BackupSystemTable(conn);
089  }
090
091  /**
092   * Returns backup info
093   */
094  protected BackupInfo getBackupInfo() {
095    return backupInfo;
096  }
097
098  /**
099   * This method modifies the master's configuration in order to inject backup-related features
100   * (TESTs only)
101   * @param conf configuration
102   */
103  public static void decorateMasterConfiguration(Configuration conf) {
104    if (!isBackupEnabled(conf)) {
105      return;
106    }
107    // Add WAL archive cleaner plug-in
108    String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS);
109    String cleanerClass = BackupLogCleaner.class.getCanonicalName();
110    if (!plugins.contains(cleanerClass)) {
111      conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass);
112    }
113
114    String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY);
115    String masterProcedureClass = LogRollMasterProcedureManager.class.getName();
116    if (classes == null) {
117      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass);
118    } else if (!classes.contains(masterProcedureClass)) {
119      conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY,
120        classes + "," + masterProcedureClass);
121    }
122
123    plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS);
124    conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
125      (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName());
126    if (LOG.isDebugEnabled()) {
127      LOG.debug(
128        "Added log cleaner: {}. Added master procedure manager: {}."
129          + "Added master procedure manager: {}",
130        cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName());
131    }
132  }
133
134  /**
135   * This method modifies the Region Server configuration in order to inject backup-related features
136   * TESTs only.
137   * @param conf configuration
138   */
139  public static void decorateRegionServerConfiguration(Configuration conf) {
140    if (!isBackupEnabled(conf)) {
141      return;
142    }
143
144    String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY);
145    String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName();
146    if (classes == null) {
147      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass);
148    } else if (!classes.contains(regionProcedureClass)) {
149      conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY,
150        classes + "," + regionProcedureClass);
151    }
152    String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY);
153    String regionObserverClass = BackupObserver.class.getName();
154    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
155      (coproc == null ? "" : coproc + ",") + regionObserverClass);
156    if (LOG.isDebugEnabled()) {
157      LOG.debug("Added region procedure manager: {}. Added region observer: {}",
158        regionProcedureClass, regionObserverClass);
159    }
160  }
161
162  public static boolean isBackupEnabled(Configuration conf) {
163    return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY,
164      BackupRestoreConstants.BACKUP_ENABLE_DEFAULT);
165  }
166
167  /**
168   * Get configuration n
169   */
170  Configuration getConf() {
171    return conf;
172  }
173
174  /**
175   * Stop all the work of backup.
176   */
177  @Override
178  public void close() {
179    if (systemTable != null) {
180      try {
181        systemTable.close();
182      } catch (Exception e) {
183        LOG.error(e.toString(), e);
184      }
185    }
186  }
187
188  /**
189   * Creates a backup info based on input backup request.
190   * @param backupId      backup id
191   * @param type          type
192   * @param tableList     table list
193   * @param targetRootDir root dir
194   * @param workers       number of parallel workers
195   * @param bandwidth     bandwidth per worker in MB per sec n * @throws BackupException exception
196   */
197  public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList,
198    String targetRootDir, int workers, long bandwidth) throws BackupException {
199    if (targetRootDir == null) {
200      throw new BackupException("Wrong backup request parameter: target backup root directory");
201    }
202
203    if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) {
204      // If table list is null for full backup, which means backup all tables. Then fill the table
205      // list with all user tables from meta. It no table available, throw the request exception.
206      List<TableDescriptor> htds = null;
207      try (Admin admin = conn.getAdmin()) {
208        htds = admin.listTableDescriptors();
209      } catch (Exception e) {
210        throw new BackupException(e);
211      }
212
213      if (htds == null) {
214        throw new BackupException("No table exists for full backup of all tables.");
215      } else {
216        tableList = new ArrayList<>();
217        for (TableDescriptor hTableDescriptor : htds) {
218          TableName tn = hTableDescriptor.getTableName();
219          if (tn.equals(BackupSystemTable.getTableName(conf))) {
220            // skip backup system table
221            continue;
222          }
223          tableList.add(hTableDescriptor.getTableName());
224        }
225
226        LOG.info("Full backup all the tables available in the cluster: {}", tableList);
227      }
228    }
229
230    // there are one or more tables in the table list
231    backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]),
232      targetRootDir);
233    backupInfo.setBandwidth(bandwidth);
234    backupInfo.setWorkers(workers);
235    return backupInfo;
236  }
237
238  /**
239   * Check if any ongoing backup. Currently, we only reply on checking status in backup system
240   * table. We need to consider to handle the case of orphan records in the future. Otherwise, all
241   * the coming request will fail.
242   * @return the ongoing backup id if on going backup exists, otherwise null
243   * @throws IOException exception
244   */
245  private String getOngoingBackupId() throws IOException {
246    ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING);
247    if (sessions.size() == 0) {
248      return null;
249    }
250    return sessions.get(0).getBackupId();
251  }
252
253  /**
254   * Start the backup manager service.
255   * @throws IOException exception
256   */
257  public void initialize() throws IOException {
258    String ongoingBackupId = this.getOngoingBackupId();
259    if (ongoingBackupId != null) {
260      LOG.info("There is a ongoing backup {}"
261        + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId);
262      throw new BackupException("There is ongoing backup seesion.");
263    }
264  }
265
266  public void setBackupInfo(BackupInfo backupInfo) {
267    this.backupInfo = backupInfo;
268  }
269
270  /**
271   * Get direct ancestors of the current backup.
272   * @param backupInfo The backup info for the current backup
273   * @return The ancestors for the current backup
274   * @throws IOException exception
275   */
276  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo) throws IOException {
277    LOG.debug("Getting the direct ancestors of the current backup {}", backupInfo.getBackupId());
278
279    ArrayList<BackupImage> ancestors = new ArrayList<>();
280
281    // full backup does not have ancestor
282    if (backupInfo.getType() == BackupType.FULL) {
283      LOG.debug("Current backup is a full backup, no direct ancestor for it.");
284      return ancestors;
285    }
286
287    // get all backup history list in descending order
288    ArrayList<BackupInfo> allHistoryList = getBackupHistory(true);
289    for (BackupInfo backup : allHistoryList) {
290
291      BackupImage.Builder builder = BackupImage.newBuilder();
292
293      BackupImage image = builder.withBackupId(backup.getBackupId()).withType(backup.getType())
294        .withRootDir(backup.getBackupRootDir()).withTableList(backup.getTableNames())
295        .withStartTime(backup.getStartTs()).withCompleteTime(backup.getCompleteTs()).build();
296
297      // Only direct ancestors for a backup are required and not entire history of backup for this
298      // table resulting in verifying all of the previous backups which is unnecessary and backup
299      // paths need not be valid beyond the lifetime of a backup.
300      //
301      // RootDir is way of grouping a single backup including one full and many incremental backups
302      if (!image.getRootDir().equals(backupInfo.getBackupRootDir())) {
303        continue;
304      }
305
306      // add the full backup image as an ancestor until the last incremental backup
307      if (backup.getType().equals(BackupType.FULL)) {
308        // check the backup image coverage, if previous image could be covered by the newer ones,
309        // then no need to add
310        if (!BackupManifest.canCoverImage(ancestors, image)) {
311          ancestors.add(image);
312        }
313      } else {
314        // found last incremental backup, if previously added full backup ancestor images can cover
315        // it, then this incremental ancestor is not the dependent of the current incremental
316        // backup, that is to say, this is the backup scope boundary of current table set.
317        // Otherwise, this incremental backup ancestor is the dependent ancestor of the ongoing
318        // incremental backup
319        if (BackupManifest.canCoverImage(ancestors, image)) {
320          LOG.debug("Met the backup boundary of the current table set:");
321          for (BackupImage image1 : ancestors) {
322            LOG.debug("  BackupID={}, BackupDir={}", image1.getBackupId(), image1.getRootDir());
323          }
324        } else {
325          Path logBackupPath =
326            HBackupFileSystem.getBackupPath(backup.getBackupRootDir(), backup.getBackupId());
327          LOG.debug(
328            "Current backup has an incremental backup ancestor, "
329              + "touching its image manifest in {}" + " to construct the dependency.",
330            logBackupPath.toString());
331          BackupManifest lastIncrImgManifest = new BackupManifest(conf, logBackupPath);
332          BackupImage lastIncrImage = lastIncrImgManifest.getBackupImage();
333          ancestors.add(lastIncrImage);
334
335          LOG.debug("Last dependent incremental backup image: {BackupID={}" + "BackupDir={}}",
336            lastIncrImage.getBackupId(), lastIncrImage.getRootDir());
337        }
338      }
339    }
340    LOG.debug("Got {} ancestors for the current backup.", ancestors.size());
341    return ancestors;
342  }
343
344  /**
345   * Get the direct ancestors of this backup for one table involved.
346   * @param backupInfo backup info
347   * @param table      table
348   * @return backupImages on the dependency list
349   * @throws IOException exception
350   */
351  public ArrayList<BackupImage> getAncestors(BackupInfo backupInfo, TableName table)
352    throws IOException {
353    ArrayList<BackupImage> ancestors = getAncestors(backupInfo);
354    ArrayList<BackupImage> tableAncestors = new ArrayList<>();
355    for (BackupImage image : ancestors) {
356      if (image.hasTable(table)) {
357        tableAncestors.add(image);
358        if (image.getType() == BackupType.FULL) {
359          break;
360        }
361      }
362    }
363    return tableAncestors;
364  }
365
366  /*
367   * backup system table operations
368   */
369
370  /**
371   * Updates status (state) of a backup session in a persistent store
372   * @param context context
373   * @throws IOException exception
374   */
375  public void updateBackupInfo(BackupInfo context) throws IOException {
376    systemTable.updateBackupInfo(context);
377  }
378
379  /**
380   * Starts new backup session
381   * @throws IOException if active session already exists
382   */
383  public void startBackupSession() throws IOException {
384    long startTime = EnvironmentEdgeManager.currentTime();
385    long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY,
386      DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L;
387    long lastWarningOutputTime = 0;
388    while (EnvironmentEdgeManager.currentTime() - startTime < timeout) {
389      try {
390        systemTable.startBackupExclusiveOperation();
391        return;
392      } catch (IOException e) {
393        if (e instanceof ExclusiveOperationException) {
394          // sleep, then repeat
395          try {
396            Thread.sleep(1000);
397          } catch (InterruptedException e1) {
398            // Restore the interrupted status
399            Thread.currentThread().interrupt();
400          }
401          if (
402            lastWarningOutputTime == 0
403              || (EnvironmentEdgeManager.currentTime() - lastWarningOutputTime) > 60000
404          ) {
405            lastWarningOutputTime = EnvironmentEdgeManager.currentTime();
406            LOG.warn("Waiting to acquire backup exclusive lock for {}s",
407              +(lastWarningOutputTime - startTime) / 1000);
408          }
409        } else {
410          throw e;
411        }
412      }
413    }
414    throw new IOException(
415      "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s");
416  }
417
418  /**
419   * Finishes active backup session
420   * @throws IOException if no active session
421   */
422  public void finishBackupSession() throws IOException {
423    systemTable.finishBackupExclusiveOperation();
424  }
425
426  /**
427   * Read the last backup start code (timestamp) of last successful backup. Will return null if
428   * there is no startcode stored in backup system table or the value is of length 0. These two
429   * cases indicate there is no successful backup completed so far.
430   * @return the timestamp of a last successful backup
431   * @throws IOException exception
432   */
433  public String readBackupStartCode() throws IOException {
434    return systemTable.readBackupStartCode(backupInfo.getBackupRootDir());
435  }
436
437  /**
438   * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte.
439   * @param startCode start code
440   * @throws IOException exception
441   */
442  public void writeBackupStartCode(Long startCode) throws IOException {
443    systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir());
444  }
445
446  /**
447   * Get the RS log information after the last log roll from backup system table.
448   * @return RS log info
449   * @throws IOException exception
450   */
451  public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException {
452    return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir());
453  }
454
455  public Pair<Map<TableName, Map<String, Map<String, List<Pair<String, Boolean>>>>>, List<byte[]>>
456    readBulkloadRows(List<TableName> tableList) throws IOException {
457    return systemTable.readBulkloadRows(tableList);
458  }
459
460  public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException {
461    systemTable.deleteBulkLoadedRows(rows);
462  }
463
464  /**
465   * Get all completed backup information (in desc order by time)
466   * @return history info of BackupCompleteData
467   * @throws IOException exception
468   */
469  public List<BackupInfo> getBackupHistory() throws IOException {
470    return systemTable.getBackupHistory();
471  }
472
473  public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException {
474    return systemTable.getBackupHistory(completed);
475  }
476
477  /**
478   * Write the current timestamps for each regionserver to backup system table after a successful
479   * full or incremental backup. Each table may have a different set of log timestamps. The saved
480   * timestamp is of the last log file that was backed up already.
481   * @param tables tables
482   * @throws IOException exception
483   */
484  public void writeRegionServerLogTimestamp(Set<TableName> tables, Map<String, Long> newTimestamps)
485    throws IOException {
486    systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir());
487  }
488
489  /**
490   * Read the timestamp for each region server log after the last successful backup. Each table has
491   * its own set of the timestamps.
492   * @return the timestamp for each region server. key: tableName value:
493   *         RegionServer,PreviousTimeStamp
494   * @throws IOException exception
495   */
496  public Map<TableName, Map<String, Long>> readLogTimestampMap() throws IOException {
497    return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir());
498  }
499
500  /**
501   * Return the current tables covered by incremental backup.
502   * @return set of tableNames
503   * @throws IOException exception
504   */
505  public Set<TableName> getIncrementalBackupTableSet() throws IOException {
506    return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir());
507  }
508
509  /**
510   * Adds set of tables to overall incremental backup table set
511   * @param tables tables
512   * @throws IOException exception
513   */
514  public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException {
515    systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir());
516  }
517
518  public Connection getConnection() {
519    return conn;
520  }
521}