001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.util; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.List; 026import java.util.TreeMap; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileStatus; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.FileUtil; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.NamespaceNotFoundException; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.backup.BackupRestoreFactory; 037import org.apache.hadoop.hbase.backup.HBackupFileSystem; 038import org.apache.hadoop.hbase.backup.RestoreJob; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.TableDescriptor; 043import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 044import org.apache.hadoop.hbase.io.HFileLink; 045import org.apache.hadoop.hbase.io.hfile.HFile; 046import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 047import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 048import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 049import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool; 050import org.apache.hadoop.hbase.util.Bytes; 051import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 052import org.apache.hadoop.hbase.util.FSTableDescriptors; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 058 059/** 060 * A collection for methods used by multiple classes to restore HBase tables. 061 */ 062@InterfaceAudience.Private 063public class RestoreTool { 064 public static final Logger LOG = LoggerFactory.getLogger(RestoreTool.class); 065 private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000; 066 067 private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR }; 068 protected Configuration conf; 069 protected Path backupRootPath; 070 protected Path restoreRootDir; 071 protected String backupId; 072 protected FileSystem fs; 073 074 // store table name and snapshot dir mapping 075 private final HashMap<TableName, Path> snapshotMap = new HashMap<>(); 076 077 public RestoreTool(Configuration conf, final Path backupRootPath, final Path restoreRootDir, 078 final String backupId) throws IOException { 079 this.conf = conf; 080 this.backupRootPath = backupRootPath; 081 this.backupId = backupId; 082 this.fs = backupRootPath.getFileSystem(conf); 083 this.restoreRootDir = restoreRootDir; 084 } 085 086 /** 087 * return value represent path for: 088 * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn" 089 * @param tableName table name 090 * @return path to table archive 091 * @throws IOException exception 092 */ 093 Path getTableArchivePath(TableName tableName) throws IOException { 094 Path baseDir = 095 new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 096 HConstants.HFILE_ARCHIVE_DIRECTORY); 097 Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR); 098 Path archivePath = new Path(dataDir, tableName.getNamespaceAsString()); 099 Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString()); 100 if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) { 101 LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists"); 102 tableArchivePath = null; // empty table has no archive 103 } 104 return tableArchivePath; 105 } 106 107 /** 108 * Gets region list 109 * @param tableName table name 110 * @return RegionList region list 111 * @throws IOException exception 112 */ 113 ArrayList<Path> getRegionList(TableName tableName) throws IOException { 114 Path tableArchivePath = getTableArchivePath(tableName); 115 ArrayList<Path> regionDirList = new ArrayList<>(); 116 FileStatus[] children = fs.listStatus(tableArchivePath); 117 for (FileStatus childStatus : children) { 118 // here child refer to each region(Name) 119 Path child = childStatus.getPath(); 120 regionDirList.add(child); 121 } 122 return regionDirList; 123 } 124 125 void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException { 126 try (Admin admin = conn.getAdmin()) { 127 admin.modifyTable(desc); 128 int attempt = 0; 129 int maxAttempts = 600; 130 while (!admin.isTableAvailable(desc.getTableName())) { 131 Thread.sleep(100); 132 attempt++; 133 if (attempt++ > maxAttempts) { 134 throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms"); 135 } 136 } 137 } catch (Exception e) { 138 throw new IOException(e); 139 } 140 } 141 142 /** 143 * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently 144 * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in 145 * the future 146 * @param conn HBase connection 147 * @param tableBackupPath backup path 148 * @param logDirs : incremental backup folders, which contains WAL 149 * @param tableNames : source tableNames(table names were backuped) 150 * @param newTableNames : target tableNames(table names to be restored to) 151 * @param incrBackupId incremental backup Id 152 * @param keepOriginalSplits whether the original region splits from the full backup should be 153 * kept 154 * @throws IOException exception 155 */ 156 public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs, 157 TableName[] tableNames, TableName[] newTableNames, String incrBackupId, 158 boolean keepOriginalSplits) throws IOException { 159 try (Admin admin = conn.getAdmin()) { 160 if (tableNames.length != newTableNames.length) { 161 throw new IOException("Number of source tables and target tables does not match!"); 162 } 163 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 164 165 // for incremental backup image, expect the table already created either by user or previous 166 // full backup. Here, check that all new tables exists 167 for (TableName tableName : newTableNames) { 168 if (!admin.tableExists(tableName)) { 169 throw new IOException("HBase table " + tableName 170 + " does not exist. Create the table first, e.g. by restoring a full backup."); 171 } 172 } 173 // adjust table schema 174 for (int i = 0; i < tableNames.length; i++) { 175 TableName tableName = tableNames[i]; 176 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId); 177 if (tableDescriptor == null) { 178 throw new IOException("Can't find " + tableName + "'s descriptor."); 179 } 180 LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId); 181 182 TableName newTableName = newTableNames[i]; 183 TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName); 184 List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies()); 185 List<ColumnFamilyDescriptor> existingFamilies = 186 Arrays.asList(newTableDescriptor.getColumnFamilies()); 187 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor); 188 boolean schemaChangeNeeded = false; 189 for (ColumnFamilyDescriptor family : families) { 190 if (!existingFamilies.contains(family)) { 191 builder.setColumnFamily(family); 192 schemaChangeNeeded = true; 193 } 194 } 195 for (ColumnFamilyDescriptor family : existingFamilies) { 196 if (!families.contains(family)) { 197 builder.removeColumnFamily(family.getName()); 198 schemaChangeNeeded = true; 199 } 200 } 201 if (schemaChangeNeeded) { 202 modifyTableSync(conn, builder.build()); 203 LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor); 204 } 205 } 206 configureForRestoreJob(keepOriginalSplits); 207 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 208 209 restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false); 210 } 211 } 212 213 public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName, 214 TableName newTableName, boolean truncateIfExists, boolean isKeepOriginalSplits, 215 String lastIncrBackupId) throws IOException { 216 createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists, 217 isKeepOriginalSplits, lastIncrBackupId); 218 } 219 220 /** 221 * Returns value represent path for path to backup table snapshot directory: 222 * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot" 223 * @param backupRootPath backup root path 224 * @param tableName table name 225 * @param backupId backup Id 226 * @return path for snapshot 227 */ 228 Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) { 229 return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 230 HConstants.SNAPSHOT_DIR_NAME); 231 } 232 233 /** 234 * Returns value represent path for: 235 * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/ 236 * snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and 237 * 0.98) this path contains .snapshotinfo, .data.manifest (trunk) 238 * @param tableName table name 239 * @return path to table info 240 * @throws IOException exception 241 */ 242 Path getTableInfoPath(TableName tableName) throws IOException { 243 Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 244 Path tableInfoPath = null; 245 246 // can't build the path directly as the timestamp values are different 247 FileStatus[] snapshots = fs.listStatus(tableSnapShotPath, 248 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 249 for (FileStatus snapshot : snapshots) { 250 tableInfoPath = snapshot.getPath(); 251 // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest"; 252 if (tableInfoPath.getName().endsWith("data.manifest")) { 253 break; 254 } 255 } 256 return tableInfoPath; 257 } 258 259 /** 260 * Get table descriptor 261 * @param tableName is the table backed up 262 * @return {@link TableDescriptor} saved in backup image of the table 263 */ 264 TableDescriptor getTableDesc(TableName tableName) throws IOException { 265 Path tableInfoPath = this.getTableInfoPath(tableName); 266 SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath); 267 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc); 268 TableDescriptor tableDescriptor = manifest.getTableDescriptor(); 269 if (!tableDescriptor.getTableName().equals(tableName)) { 270 LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: " 271 + tableInfoPath.toString()); 272 LOG.error( 273 "tableDescriptor.getNameAsString() = " + tableDescriptor.getTableName().getNameAsString()); 274 throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName 275 + " under tableInfoPath: " + tableInfoPath.toString()); 276 } 277 return tableDescriptor; 278 } 279 280 private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName, 281 String lastIncrBackupId) throws IOException { 282 if (lastIncrBackupId != null) { 283 String target = 284 BackupUtils.getTableBackupDir(backupRootPath.toString(), lastIncrBackupId, tableName); 285 return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target)); 286 } 287 return null; 288 } 289 290 private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName, 291 Path tableBackupPath, boolean truncateIfExists, boolean isKeepOriginalSplits, 292 String lastIncrBackupId) throws IOException { 293 if (newTableName == null) { 294 newTableName = tableName; 295 } 296 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 297 298 // get table descriptor first 299 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId); 300 if (tableDescriptor != null) { 301 LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId); 302 } 303 304 if (tableDescriptor == null) { 305 Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 306 if (fileSys.exists(tableSnapshotPath)) { 307 // snapshot path exist means the backup path is in HDFS 308 // check whether snapshot dir already recorded for target table 309 if (snapshotMap.get(tableName) != null) { 310 SnapshotDescription desc = 311 SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath); 312 SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc); 313 tableDescriptor = manifest.getTableDescriptor(); 314 } else { 315 tableDescriptor = getTableDesc(tableName); 316 snapshotMap.put(tableName, getTableInfoPath(tableName)); 317 } 318 if (tableDescriptor == null) { 319 LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost"); 320 } 321 } else { 322 throw new IOException( 323 "Table snapshot directory: " + tableSnapshotPath + " does not exist."); 324 } 325 } 326 327 Path tableArchivePath = getTableArchivePath(tableName); 328 if (tableArchivePath == null) { 329 if (tableDescriptor != null) { 330 // find table descriptor but no archive dir means the table is empty, create table and exit 331 if (LOG.isDebugEnabled()) { 332 LOG.debug("find table descriptor but no archive dir for table " + tableName 333 + ", will only create table"); 334 } 335 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 336 checkAndCreateTable(conn, newTableName, null, tableDescriptor, truncateIfExists); 337 return; 338 } else { 339 throw new IllegalStateException( 340 "Cannot restore hbase table because directory '" + " tableArchivePath is null."); 341 } 342 } 343 344 if (tableDescriptor == null) { 345 tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build(); 346 } else { 347 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 348 } 349 350 // record all region dirs: 351 // load all files in dir 352 try { 353 ArrayList<Path> regionPathList = getRegionList(tableName); 354 355 // should only try to create the table with all region informations, so we could pre-split 356 // the regions in fine grain 357 checkAndCreateTable(conn, newTableName, regionPathList, tableDescriptor, truncateIfExists); 358 configureForRestoreJob(isKeepOriginalSplits); 359 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 360 Path[] paths = new Path[regionPathList.size()]; 361 regionPathList.toArray(paths); 362 restoreService.run(paths, new TableName[] { tableName }, restoreRootDir, 363 new TableName[] { newTableName }, true); 364 365 } catch (Exception e) { 366 LOG.error(e.toString(), e); 367 throw new IllegalStateException("Cannot restore hbase table", e); 368 } 369 } 370 371 /** 372 * Gets region list 373 * @param tableArchivePath table archive path 374 * @return RegionList region list 375 * @throws IOException exception 376 */ 377 ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException { 378 ArrayList<Path> regionDirList = new ArrayList<>(); 379 FileStatus[] children = fs.listStatus(tableArchivePath); 380 for (FileStatus childStatus : children) { 381 // here child refer to each region(Name) 382 Path child = childStatus.getPath(); 383 regionDirList.add(child); 384 } 385 return regionDirList; 386 } 387 388 /** 389 * Calculate region boundaries and add all the column families to the table descriptor 390 * @param regionDirList region dir list 391 * @return a set of keys to store the boundaries 392 */ 393 byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException { 394 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 395 // Build a set of keys to store the boundaries 396 // calculate region boundaries and add all the column families to the table descriptor 397 for (Path regionDir : regionDirList) { 398 LOG.debug("Parsing region dir: " + regionDir); 399 Path hfofDir = regionDir; 400 401 if (!fs.exists(hfofDir)) { 402 LOG.warn("HFileOutputFormat dir " + hfofDir + " not found"); 403 } 404 405 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); 406 if (familyDirStatuses == null) { 407 throw new IOException("No families found in " + hfofDir); 408 } 409 410 for (FileStatus stat : familyDirStatuses) { 411 if (!stat.isDirectory()) { 412 LOG.warn("Skipping non-directory " + stat.getPath()); 413 continue; 414 } 415 boolean isIgnore = false; 416 String pathName = stat.getPath().getName(); 417 for (String ignore : ignoreDirs) { 418 if (pathName.contains(ignore)) { 419 LOG.warn("Skipping non-family directory" + pathName); 420 isIgnore = true; 421 break; 422 } 423 } 424 if (isIgnore) { 425 continue; 426 } 427 Path familyDir = stat.getPath(); 428 LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]"); 429 // Skip _logs, etc 430 if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) { 431 continue; 432 } 433 434 // start to parse hfile inside one family dir 435 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); 436 for (Path hfile : hfiles) { 437 if ( 438 hfile.getName().startsWith("_") || hfile.getName().startsWith(".") 439 || StoreFileInfo.isReference(hfile.getName()) 440 || HFileLink.isHFileLink(hfile.getName()) 441 ) { 442 continue; 443 } 444 HFile.Reader reader = HFile.createReader(fs, hfile, conf); 445 final byte[] first, last; 446 try { 447 if (reader.getEntries() == 0) { 448 LOG.debug("Skipping hfile with 0 entries: " + hfile); 449 continue; 450 } 451 first = reader.getFirstRowKey().get(); 452 last = reader.getLastRowKey().get(); 453 LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first=" 454 + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last)); 455 456 // To eventually infer start key-end key boundaries 457 Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0; 458 map.put(first, value + 1); 459 value = map.containsKey(last) ? (Integer) map.get(last) : 0; 460 map.put(last, value - 1); 461 } finally { 462 reader.close(); 463 } 464 } 465 } 466 } 467 return BulkLoadHFilesTool.inferBoundaries(map); 468 } 469 470 /** 471 * Prepare the table for bulkload, most codes copied from {@code createTable} method in 472 * {@code BulkLoadHFilesTool}. 473 * @param conn connection 474 * @param targetTableName target table name 475 * @param regionDirList region directory list 476 * @param htd table descriptor 477 * @param truncateIfExists truncates table if exists 478 * @throws IOException exception 479 */ 480 private void checkAndCreateTable(Connection conn, TableName targetTableName, 481 ArrayList<Path> regionDirList, TableDescriptor htd, boolean truncateIfExists) 482 throws IOException { 483 try (Admin admin = conn.getAdmin()) { 484 boolean createNew = false; 485 if (admin.tableExists(targetTableName)) { 486 if (truncateIfExists) { 487 LOG.info( 488 "Truncating exising target table '" + targetTableName + "', preserving region splits"); 489 admin.disableTable(targetTableName); 490 admin.truncateTable(targetTableName, true); 491 } else { 492 LOG.info("Using exising target table '" + targetTableName + "'"); 493 } 494 } else { 495 createNew = true; 496 } 497 if (createNew) { 498 LOG.info("Creating target table '" + targetTableName + "'"); 499 byte[][] keys = null; 500 try { 501 if (regionDirList == null || regionDirList.size() == 0) { 502 admin.createTable(htd); 503 } else { 504 keys = generateBoundaryKeys(regionDirList); 505 if (keys.length > 0) { 506 // create table using table descriptor and region boundaries 507 admin.createTable(htd, keys); 508 } else { 509 admin.createTable(htd); 510 } 511 } 512 } catch (NamespaceNotFoundException e) { 513 LOG.warn("There was no namespace and the same will be created"); 514 String namespaceAsString = targetTableName.getNamespaceAsString(); 515 LOG.info("Creating target namespace '" + namespaceAsString + "'"); 516 admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build()); 517 if (null == keys) { 518 admin.createTable(htd); 519 } else { 520 admin.createTable(htd, keys); 521 } 522 } 523 524 } 525 long startTime = EnvironmentEdgeManager.currentTime(); 526 while (!admin.isTableAvailable(targetTableName)) { 527 try { 528 Thread.sleep(100); 529 } catch (InterruptedException ie) { 530 Thread.currentThread().interrupt(); 531 } 532 if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) { 533 throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table " 534 + targetTableName + " is still not available"); 535 } 536 } 537 } 538 } 539 540 private void configureForRestoreJob(boolean keepOriginalSplits) { 541 conf.setBoolean(RestoreJob.KEEP_ORIGINAL_SPLITS_KEY, keepOriginalSplits); 542 conf.set(RestoreJob.BACKUP_ROOT_PATH_KEY, backupRootPath.toString()); 543 } 544}