001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.impl; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.backup.BackupHFileCleaner; 031import org.apache.hadoop.hbase.backup.BackupInfo; 032import org.apache.hadoop.hbase.backup.BackupInfo.BackupState; 033import org.apache.hadoop.hbase.backup.BackupObserver; 034import org.apache.hadoop.hbase.backup.BackupRestoreConstants; 035import org.apache.hadoop.hbase.backup.BackupType; 036import org.apache.hadoop.hbase.backup.master.BackupLogCleaner; 037import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager; 038import org.apache.hadoop.hbase.backup.regionserver.LogRollRegionServerProcedureManager; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.Connection; 041import org.apache.hadoop.hbase.client.TableDescriptor; 042import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 043import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; 044import org.apache.hadoop.hbase.procedure.ProcedureManagerHost; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.yetus.audience.InterfaceAudience; 047import org.slf4j.Logger; 048import org.slf4j.LoggerFactory; 049 050/** 051 * Handles backup requests, creates backup info records in backup system table to keep track of 052 * backup sessions, dispatches backup request. 053 */ 054@InterfaceAudience.Private 055public class BackupManager implements Closeable { 056 // in seconds 057 public final static String BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY = 058 "hbase.backup.exclusive.op.timeout.seconds"; 059 // In seconds 060 private final static int DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT = 3600; 061 private static final Logger LOG = LoggerFactory.getLogger(BackupManager.class); 062 063 protected Configuration conf = null; 064 protected BackupInfo backupInfo = null; 065 protected BackupSystemTable systemTable; 066 protected final Connection conn; 067 068 /** 069 * Backup manager constructor. 070 * @param conn connection 071 * @param conf configuration 072 * @throws IOException exception 073 */ 074 public BackupManager(Connection conn, Configuration conf) throws IOException { 075 if ( 076 !conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, 077 BackupRestoreConstants.BACKUP_ENABLE_DEFAULT) 078 ) { 079 throw new BackupException("HBase backup is not enabled. Check your " 080 + BackupRestoreConstants.BACKUP_ENABLE_KEY + " setting."); 081 } 082 this.conf = conf; 083 this.conn = conn; 084 this.systemTable = new BackupSystemTable(conn); 085 } 086 087 /** 088 * Returns backup info 089 */ 090 protected BackupInfo getBackupInfo() { 091 return backupInfo; 092 } 093 094 /** 095 * This method modifies the master's configuration in order to inject backup-related features 096 * (TESTs only) 097 * @param conf configuration 098 */ 099 public static void decorateMasterConfiguration(Configuration conf) { 100 if (!isBackupEnabled(conf)) { 101 return; 102 } 103 // Add WAL archive cleaner plug-in 104 String plugins = conf.get(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS); 105 String cleanerClass = BackupLogCleaner.class.getCanonicalName(); 106 if (!plugins.contains(cleanerClass)) { 107 conf.set(HConstants.HBASE_MASTER_LOGCLEANER_PLUGINS, plugins + "," + cleanerClass); 108 } 109 110 String classes = conf.get(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY); 111 String masterProcedureClass = LogRollMasterProcedureManager.class.getName(); 112 if (classes == null) { 113 conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, masterProcedureClass); 114 } else if (!classes.contains(masterProcedureClass)) { 115 conf.set(ProcedureManagerHost.MASTER_PROCEDURE_CONF_KEY, 116 classes + "," + masterProcedureClass); 117 } 118 119 plugins = conf.get(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS); 120 conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS, 121 (plugins == null ? "" : plugins + ",") + BackupHFileCleaner.class.getName()); 122 123 String observerClass = BackupObserver.class.getName(); 124 String masterCoProc = conf.get(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY); 125 conf.set(CoprocessorHost.MASTER_COPROCESSOR_CONF_KEY, 126 (masterCoProc == null ? "" : masterCoProc + ",") + observerClass); 127 128 if (LOG.isDebugEnabled()) { 129 LOG.debug( 130 "Added log cleaner: {}. Added master procedure manager: {}." 131 + " Added master procedure manager: {}. Added master observer: {}", 132 cleanerClass, masterProcedureClass, BackupHFileCleaner.class.getName(), observerClass); 133 } 134 } 135 136 /** 137 * This method modifies the Region Server configuration in order to inject backup-related features 138 * TESTs only. 139 * @param conf configuration 140 */ 141 public static void decorateRegionServerConfiguration(Configuration conf) { 142 if (!isBackupEnabled(conf)) { 143 return; 144 } 145 146 String classes = conf.get(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY); 147 String regionProcedureClass = LogRollRegionServerProcedureManager.class.getName(); 148 if (classes == null) { 149 conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, regionProcedureClass); 150 } else if (!classes.contains(regionProcedureClass)) { 151 conf.set(ProcedureManagerHost.REGIONSERVER_PROCEDURE_CONF_KEY, 152 classes + "," + regionProcedureClass); 153 } 154 String coproc = conf.get(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY); 155 String regionObserverClass = BackupObserver.class.getName(); 156 conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, 157 (coproc == null ? "" : coproc + ",") + regionObserverClass); 158 if (LOG.isDebugEnabled()) { 159 LOG.debug("Added region procedure manager: {}. Added region observer: {}", 160 regionProcedureClass, regionObserverClass); 161 } 162 } 163 164 public static boolean isBackupEnabled(Configuration conf) { 165 return conf.getBoolean(BackupRestoreConstants.BACKUP_ENABLE_KEY, 166 BackupRestoreConstants.BACKUP_ENABLE_DEFAULT); 167 } 168 169 /** 170 * Get configuration 171 */ 172 Configuration getConf() { 173 return conf; 174 } 175 176 /** 177 * Stop all the work of backup. 178 */ 179 @Override 180 public void close() { 181 if (systemTable != null) { 182 try { 183 systemTable.close(); 184 } catch (Exception e) { 185 LOG.error(e.toString(), e); 186 } 187 } 188 } 189 190 /** 191 * Creates a backup info based on input backup request. 192 * @param backupId backup id 193 * @param type type 194 * @param tableList table list 195 * @param targetRootDir root dir 196 * @param workers number of parallel workers 197 * @param bandwidth bandwidth per worker in MB per sec 198 * @throws BackupException exception 199 */ 200 public BackupInfo createBackupInfo(String backupId, BackupType type, List<TableName> tableList, 201 String targetRootDir, int workers, long bandwidth, boolean noChecksumVerify) 202 throws BackupException { 203 if (targetRootDir == null) { 204 throw new BackupException("Wrong backup request parameter: target backup root directory"); 205 } 206 207 if (type == BackupType.FULL && (tableList == null || tableList.isEmpty())) { 208 // If table list is null for full backup, which means backup all tables. Then fill the table 209 // list with all user tables from meta. It no table available, throw the request exception. 210 List<TableDescriptor> htds = null; 211 try (Admin admin = conn.getAdmin()) { 212 htds = admin.listTableDescriptors(); 213 } catch (Exception e) { 214 throw new BackupException(e); 215 } 216 217 if (htds == null) { 218 throw new BackupException("No table exists for full backup of all tables."); 219 } else { 220 tableList = new ArrayList<>(); 221 for (TableDescriptor hTableDescriptor : htds) { 222 TableName tn = hTableDescriptor.getTableName(); 223 if (tn.equals(BackupSystemTable.getTableName(conf))) { 224 // skip backup system table 225 continue; 226 } 227 tableList.add(hTableDescriptor.getTableName()); 228 } 229 230 LOG.info("Full backup all the tables available in the cluster: {}", tableList); 231 } 232 } 233 234 // there are one or more tables in the table list 235 backupInfo = new BackupInfo(backupId, type, tableList.toArray(new TableName[tableList.size()]), 236 targetRootDir); 237 backupInfo.setBandwidth(bandwidth); 238 backupInfo.setWorkers(workers); 239 backupInfo.setNoChecksumVerify(noChecksumVerify); 240 return backupInfo; 241 } 242 243 /** 244 * Check if any ongoing backup. Currently, we only reply on checking status in backup system 245 * table. We need to consider to handle the case of orphan records in the future. Otherwise, all 246 * the coming request will fail. 247 * @return the ongoing backup id if on going backup exists, otherwise null 248 * @throws IOException exception 249 */ 250 private String getOngoingBackupId() throws IOException { 251 ArrayList<BackupInfo> sessions = systemTable.getBackupInfos(BackupState.RUNNING); 252 if (sessions.size() == 0) { 253 return null; 254 } 255 return sessions.get(0).getBackupId(); 256 } 257 258 /** 259 * Start the backup manager service. 260 * @throws IOException exception 261 */ 262 public void initialize() throws IOException { 263 String ongoingBackupId = this.getOngoingBackupId(); 264 if (ongoingBackupId != null) { 265 LOG.info("There is a ongoing backup {}" 266 + ". Can not launch new backup until no ongoing backup remains.", ongoingBackupId); 267 throw new BackupException("There is ongoing backup seesion."); 268 } 269 } 270 271 public void setBackupInfo(BackupInfo backupInfo) { 272 this.backupInfo = backupInfo; 273 } 274 275 /* 276 * backup system table operations 277 */ 278 279 /** 280 * Updates status (state) of a backup session in a persistent store 281 * @param context context 282 * @throws IOException exception 283 */ 284 public void updateBackupInfo(BackupInfo context) throws IOException { 285 systemTable.updateBackupInfo(context); 286 } 287 288 /** 289 * Starts new backup session 290 * @throws IOException if active session already exists 291 */ 292 public void startBackupSession() throws IOException { 293 long startTime = EnvironmentEdgeManager.currentTime(); 294 long timeout = conf.getInt(BACKUP_EXCLUSIVE_OPERATION_TIMEOUT_SECONDS_KEY, 295 DEFAULT_BACKUP_EXCLUSIVE_OPERATION_TIMEOUT) * 1000L; 296 long lastWarningOutputTime = 0; 297 while (EnvironmentEdgeManager.currentTime() - startTime < timeout) { 298 try { 299 systemTable.startBackupExclusiveOperation(); 300 return; 301 } catch (IOException e) { 302 if (e instanceof ExclusiveOperationException) { 303 // sleep, then repeat 304 try { 305 Thread.sleep(1000); 306 } catch (InterruptedException e1) { 307 // Restore the interrupted status 308 Thread.currentThread().interrupt(); 309 } 310 if ( 311 lastWarningOutputTime == 0 312 || (EnvironmentEdgeManager.currentTime() - lastWarningOutputTime) > 60000 313 ) { 314 lastWarningOutputTime = EnvironmentEdgeManager.currentTime(); 315 LOG.warn("Waiting to acquire backup exclusive lock for {}s", 316 +(lastWarningOutputTime - startTime) / 1000); 317 } 318 } else { 319 throw e; 320 } 321 } 322 } 323 throw new IOException( 324 "Failed to acquire backup system table exclusive lock after " + timeout / 1000 + "s"); 325 } 326 327 /** 328 * Finishes active backup session 329 * @throws IOException if no active session 330 */ 331 public void finishBackupSession() throws IOException { 332 systemTable.finishBackupExclusiveOperation(); 333 } 334 335 /** 336 * Read the last backup start code (timestamp) of last successful backup. Will return null if 337 * there is no startcode stored in backup system table or the value is of length 0. These two 338 * cases indicate there is no successful backup completed so far. 339 * @return the timestamp of a last successful backup 340 * @throws IOException exception 341 */ 342 public String readBackupStartCode() throws IOException { 343 return systemTable.readBackupStartCode(backupInfo.getBackupRootDir()); 344 } 345 346 /** 347 * Write the start code (timestamp) to backup system table. If passed in null, then write 0 byte. 348 * @param startCode start code 349 * @throws IOException exception 350 */ 351 public void writeBackupStartCode(Long startCode) throws IOException { 352 systemTable.writeBackupStartCode(startCode, backupInfo.getBackupRootDir()); 353 } 354 355 /** 356 * Get the RS log information after the last log roll from backup system table. 357 * @return RS log info 358 * @throws IOException exception 359 */ 360 public HashMap<String, Long> readRegionServerLastLogRollResult() throws IOException { 361 return systemTable.readRegionServerLastLogRollResult(backupInfo.getBackupRootDir()); 362 } 363 364 public List<BulkLoad> readBulkloadRows(List<TableName> tableList) throws IOException { 365 return systemTable.readBulkloadRows(tableList); 366 } 367 368 public void deleteBulkLoadedRows(List<byte[]> rows) throws IOException { 369 systemTable.deleteBulkLoadedRows(rows); 370 } 371 372 /** 373 * Get all completed backup information (in desc order by time) 374 * @return history info of BackupCompleteData 375 * @throws IOException exception 376 */ 377 public List<BackupInfo> getBackupHistory() throws IOException { 378 return systemTable.getBackupHistory(); 379 } 380 381 public ArrayList<BackupInfo> getBackupHistory(boolean completed) throws IOException { 382 return systemTable.getBackupHistory(completed); 383 } 384 385 /** 386 * Write the current timestamps for each regionserver to backup system table after a successful 387 * full or incremental backup. Each table may have a different set of log timestamps. The saved 388 * timestamp is of the last log file that was backed up already. 389 * @param tables tables 390 * @throws IOException exception 391 */ 392 public void writeRegionServerLogTimestamp(Set<TableName> tables, Map<String, Long> newTimestamps) 393 throws IOException { 394 systemTable.writeRegionServerLogTimestamp(tables, newTimestamps, backupInfo.getBackupRootDir()); 395 } 396 397 /** 398 * Read the timestamp for each region server log after the last successful backup. Each table has 399 * its own set of the timestamps. 400 * @return the timestamp for each region server. key: tableName value: 401 * RegionServer,PreviousTimeStamp 402 * @throws IOException exception 403 */ 404 public Map<TableName, Map<String, Long>> readLogTimestampMap() throws IOException { 405 return systemTable.readLogTimestampMap(backupInfo.getBackupRootDir()); 406 } 407 408 /** 409 * Return the current tables covered by incremental backup. 410 * @return set of tableNames 411 * @throws IOException exception 412 */ 413 public Set<TableName> getIncrementalBackupTableSet() throws IOException { 414 return systemTable.getIncrementalBackupTableSet(backupInfo.getBackupRootDir()); 415 } 416 417 /** 418 * Adds set of tables to overall incremental backup table set 419 * @param tables tables 420 * @throws IOException exception 421 */ 422 public void addIncrementalBackupTableSet(Set<TableName> tables) throws IOException { 423 systemTable.addIncrementalBackupTableSet(tables, backupInfo.getBackupRootDir()); 424 } 425 426 public Connection getConnection() { 427 return conn; 428 } 429}