001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.util; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.HashMap; 025import java.util.List; 026import java.util.TreeMap; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileStatus; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.FileUtil; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HConstants; 033import org.apache.hadoop.hbase.NamespaceDescriptor; 034import org.apache.hadoop.hbase.NamespaceNotFoundException; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.backup.BackupRestoreFactory; 037import org.apache.hadoop.hbase.backup.HBackupFileSystem; 038import org.apache.hadoop.hbase.backup.RestoreJob; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.TableDescriptor; 043import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 044import org.apache.hadoop.hbase.io.HFileLink; 045import org.apache.hadoop.hbase.io.hfile.HFile; 046import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 047import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 048import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 049import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool; 050import org.apache.hadoop.hbase.util.Bytes; 051import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 052import org.apache.hadoop.hbase.util.FSTableDescriptors; 053import org.apache.yetus.audience.InterfaceAudience; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 058 059/** 060 * A collection for methods used by multiple classes to restore HBase tables. 061 */ 062@InterfaceAudience.Private 063public class RestoreTool { 064 public static final Logger LOG = LoggerFactory.getLogger(RestoreTool.class); 065 private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000; 066 067 private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR }; 068 protected Configuration conf; 069 protected Path backupRootPath; 070 protected Path restoreRootDir; 071 protected String backupId; 072 protected FileSystem fs; 073 074 // store table name and snapshot dir mapping 075 private final HashMap<TableName, Path> snapshotMap = new HashMap<>(); 076 077 public RestoreTool(Configuration conf, final Path backupRootPath, final Path restoreRootDir, 078 final String backupId) throws IOException { 079 this.conf = conf; 080 this.backupRootPath = backupRootPath; 081 this.backupId = backupId; 082 this.fs = backupRootPath.getFileSystem(conf); 083 this.restoreRootDir = restoreRootDir; 084 } 085 086 /** 087 * return value represent path for: 088 * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn" 089 * @param tableName table name 090 * @return path to table archive 091 * @throws IOException exception 092 */ 093 Path getTableArchivePath(TableName tableName) throws IOException { 094 Path baseDir = 095 new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 096 HConstants.HFILE_ARCHIVE_DIRECTORY); 097 Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR); 098 Path archivePath = new Path(dataDir, tableName.getNamespaceAsString()); 099 Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString()); 100 if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) { 101 LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists"); 102 tableArchivePath = null; // empty table has no archive 103 } 104 return tableArchivePath; 105 } 106 107 /** 108 * Gets region list 109 * @param tableName table name 110 * @return RegionList region list 111 * @throws IOException exception 112 */ 113 ArrayList<Path> getRegionList(TableName tableName) throws IOException { 114 Path tableArchivePath = getTableArchivePath(tableName); 115 ArrayList<Path> regionDirList = new ArrayList<>(); 116 FileStatus[] children = fs.listStatus(tableArchivePath); 117 for (FileStatus childStatus : children) { 118 // here child refer to each region(Name) 119 Path child = childStatus.getPath(); 120 regionDirList.add(child); 121 } 122 return regionDirList; 123 } 124 125 void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException { 126 try (Admin admin = conn.getAdmin()) { 127 admin.modifyTable(desc); 128 int attempt = 0; 129 int maxAttempts = 600; 130 while (!admin.isTableAvailable(desc.getTableName())) { 131 Thread.sleep(100); 132 attempt++; 133 if (attempt++ > maxAttempts) { 134 throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms"); 135 } 136 } 137 } catch (Exception e) { 138 throw new IOException(e); 139 } 140 } 141 142 /** 143 * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently 144 * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in 145 * the future 146 * @param conn HBase connection 147 * @param tableBackupPath backup path 148 * @param logDirs : incremental backup folders, which contains WAL 149 * @param tableNames : source tableNames(table names were backuped) 150 * @param newTableNames : target tableNames(table names to be restored to) 151 * @param incrBackupId incremental backup Id 152 * @throws IOException exception 153 */ 154 public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs, 155 TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException { 156 try (Admin admin = conn.getAdmin()) { 157 if (tableNames.length != newTableNames.length) { 158 throw new IOException("Number of source tables and target tables does not match!"); 159 } 160 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 161 162 // for incremental backup image, expect the table already created either by user or previous 163 // full backup. Here, check that all new tables exists 164 for (TableName tableName : newTableNames) { 165 if (!admin.tableExists(tableName)) { 166 throw new IOException("HBase table " + tableName 167 + " does not exist. Create the table first, e.g. by restoring a full backup."); 168 } 169 } 170 // adjust table schema 171 for (int i = 0; i < tableNames.length; i++) { 172 TableName tableName = tableNames[i]; 173 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId); 174 if (tableDescriptor == null) { 175 throw new IOException("Can't find " + tableName + "'s descriptor."); 176 } 177 LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId); 178 179 TableName newTableName = newTableNames[i]; 180 TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName); 181 List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies()); 182 List<ColumnFamilyDescriptor> existingFamilies = 183 Arrays.asList(newTableDescriptor.getColumnFamilies()); 184 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor); 185 boolean schemaChangeNeeded = false; 186 for (ColumnFamilyDescriptor family : families) { 187 if (!existingFamilies.contains(family)) { 188 builder.setColumnFamily(family); 189 schemaChangeNeeded = true; 190 } 191 } 192 for (ColumnFamilyDescriptor family : existingFamilies) { 193 if (!families.contains(family)) { 194 builder.removeColumnFamily(family.getName()); 195 schemaChangeNeeded = true; 196 } 197 } 198 if (schemaChangeNeeded) { 199 modifyTableSync(conn, builder.build()); 200 LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor); 201 } 202 } 203 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 204 205 restoreService.run(logDirs, tableNames, restoreRootDir, newTableNames, false); 206 } 207 } 208 209 public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName, 210 TableName newTableName, boolean truncateIfExists, String lastIncrBackupId) throws IOException { 211 createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists, 212 lastIncrBackupId); 213 } 214 215 /** 216 * Returns value represent path for path to backup table snapshot directory: 217 * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot" 218 * @param backupRootPath backup root path 219 * @param tableName table name 220 * @param backupId backup Id 221 * @return path for snapshot 222 */ 223 Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) { 224 return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId), 225 HConstants.SNAPSHOT_DIR_NAME); 226 } 227 228 /** 229 * Returns value represent path for: 230 * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/ 231 * snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and 232 * 0.98) this path contains .snapshotinfo, .data.manifest (trunk) 233 * @param tableName table name 234 * @return path to table info 235 * @throws IOException exception 236 */ 237 Path getTableInfoPath(TableName tableName) throws IOException { 238 Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 239 Path tableInfoPath = null; 240 241 // can't build the path directly as the timestamp values are different 242 FileStatus[] snapshots = fs.listStatus(tableSnapShotPath, 243 new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs)); 244 for (FileStatus snapshot : snapshots) { 245 tableInfoPath = snapshot.getPath(); 246 // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest"; 247 if (tableInfoPath.getName().endsWith("data.manifest")) { 248 break; 249 } 250 } 251 return tableInfoPath; 252 } 253 254 /** 255 * Get table descriptor 256 * @param tableName is the table backed up 257 * @return {@link TableDescriptor} saved in backup image of the table 258 */ 259 TableDescriptor getTableDesc(TableName tableName) throws IOException { 260 Path tableInfoPath = this.getTableInfoPath(tableName); 261 SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath); 262 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc); 263 TableDescriptor tableDescriptor = manifest.getTableDescriptor(); 264 if (!tableDescriptor.getTableName().equals(tableName)) { 265 LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: " 266 + tableInfoPath.toString()); 267 LOG.error( 268 "tableDescriptor.getNameAsString() = " + tableDescriptor.getTableName().getNameAsString()); 269 throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName 270 + " under tableInfoPath: " + tableInfoPath.toString()); 271 } 272 return tableDescriptor; 273 } 274 275 private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName, 276 String lastIncrBackupId) throws IOException { 277 if (lastIncrBackupId != null) { 278 String target = 279 BackupUtils.getTableBackupDir(backupRootPath.toString(), lastIncrBackupId, tableName); 280 return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target)); 281 } 282 return null; 283 } 284 285 private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName, 286 Path tableBackupPath, boolean truncateIfExists, String lastIncrBackupId) throws IOException { 287 if (newTableName == null) { 288 newTableName = tableName; 289 } 290 FileSystem fileSys = tableBackupPath.getFileSystem(this.conf); 291 292 // get table descriptor first 293 TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId); 294 if (tableDescriptor != null) { 295 LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId); 296 } 297 298 if (tableDescriptor == null) { 299 Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId); 300 if (fileSys.exists(tableSnapshotPath)) { 301 // snapshot path exist means the backup path is in HDFS 302 // check whether snapshot dir already recorded for target table 303 if (snapshotMap.get(tableName) != null) { 304 SnapshotDescription desc = 305 SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath); 306 SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc); 307 tableDescriptor = manifest.getTableDescriptor(); 308 } else { 309 tableDescriptor = getTableDesc(tableName); 310 snapshotMap.put(tableName, getTableInfoPath(tableName)); 311 } 312 if (tableDescriptor == null) { 313 LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost"); 314 } 315 } else { 316 throw new IOException( 317 "Table snapshot directory: " + tableSnapshotPath + " does not exist."); 318 } 319 } 320 321 Path tableArchivePath = getTableArchivePath(tableName); 322 if (tableArchivePath == null) { 323 if (tableDescriptor != null) { 324 // find table descriptor but no archive dir means the table is empty, create table and exit 325 if (LOG.isDebugEnabled()) { 326 LOG.debug("find table descriptor but no archive dir for table " + tableName 327 + ", will only create table"); 328 } 329 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 330 checkAndCreateTable(conn, newTableName, null, tableDescriptor, truncateIfExists); 331 return; 332 } else { 333 throw new IllegalStateException( 334 "Cannot restore hbase table because directory '" + " tableArchivePath is null."); 335 } 336 } 337 338 if (tableDescriptor == null) { 339 tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build(); 340 } else { 341 tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor); 342 } 343 344 // record all region dirs: 345 // load all files in dir 346 try { 347 ArrayList<Path> regionPathList = getRegionList(tableName); 348 349 // should only try to create the table with all region informations, so we could pre-split 350 // the regions in fine grain 351 checkAndCreateTable(conn, newTableName, regionPathList, tableDescriptor, truncateIfExists); 352 RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf); 353 Path[] paths = new Path[regionPathList.size()]; 354 regionPathList.toArray(paths); 355 restoreService.run(paths, new TableName[] { tableName }, restoreRootDir, 356 new TableName[] { newTableName }, true); 357 358 } catch (Exception e) { 359 LOG.error(e.toString(), e); 360 throw new IllegalStateException("Cannot restore hbase table", e); 361 } 362 } 363 364 /** 365 * Gets region list 366 * @param tableArchivePath table archive path 367 * @return RegionList region list 368 * @throws IOException exception 369 */ 370 ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException { 371 ArrayList<Path> regionDirList = new ArrayList<>(); 372 FileStatus[] children = fs.listStatus(tableArchivePath); 373 for (FileStatus childStatus : children) { 374 // here child refer to each region(Name) 375 Path child = childStatus.getPath(); 376 regionDirList.add(child); 377 } 378 return regionDirList; 379 } 380 381 /** 382 * Calculate region boundaries and add all the column families to the table descriptor 383 * @param regionDirList region dir list 384 * @return a set of keys to store the boundaries 385 */ 386 byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException { 387 TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR); 388 // Build a set of keys to store the boundaries 389 // calculate region boundaries and add all the column families to the table descriptor 390 for (Path regionDir : regionDirList) { 391 LOG.debug("Parsing region dir: " + regionDir); 392 Path hfofDir = regionDir; 393 394 if (!fs.exists(hfofDir)) { 395 LOG.warn("HFileOutputFormat dir " + hfofDir + " not found"); 396 } 397 398 FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); 399 if (familyDirStatuses == null) { 400 throw new IOException("No families found in " + hfofDir); 401 } 402 403 for (FileStatus stat : familyDirStatuses) { 404 if (!stat.isDirectory()) { 405 LOG.warn("Skipping non-directory " + stat.getPath()); 406 continue; 407 } 408 boolean isIgnore = false; 409 String pathName = stat.getPath().getName(); 410 for (String ignore : ignoreDirs) { 411 if (pathName.contains(ignore)) { 412 LOG.warn("Skipping non-family directory" + pathName); 413 isIgnore = true; 414 break; 415 } 416 } 417 if (isIgnore) { 418 continue; 419 } 420 Path familyDir = stat.getPath(); 421 LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]"); 422 // Skip _logs, etc 423 if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) { 424 continue; 425 } 426 427 // start to parse hfile inside one family dir 428 Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); 429 for (Path hfile : hfiles) { 430 if ( 431 hfile.getName().startsWith("_") || hfile.getName().startsWith(".") 432 || StoreFileInfo.isReference(hfile.getName()) 433 || HFileLink.isHFileLink(hfile.getName()) 434 ) { 435 continue; 436 } 437 HFile.Reader reader = HFile.createReader(fs, hfile, conf); 438 final byte[] first, last; 439 try { 440 if (reader.getEntries() == 0) { 441 LOG.debug("Skipping hfile with 0 entries: " + hfile); 442 continue; 443 } 444 first = reader.getFirstRowKey().get(); 445 last = reader.getLastRowKey().get(); 446 LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first=" 447 + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last)); 448 449 // To eventually infer start key-end key boundaries 450 Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0; 451 map.put(first, value + 1); 452 value = map.containsKey(last) ? (Integer) map.get(last) : 0; 453 map.put(last, value - 1); 454 } finally { 455 reader.close(); 456 } 457 } 458 } 459 } 460 return BulkLoadHFilesTool.inferBoundaries(map); 461 } 462 463 /** 464 * Prepare the table for bulkload, most codes copied from {@code createTable} method in 465 * {@code BulkLoadHFilesTool}. 466 * @param conn connection 467 * @param targetTableName target table name 468 * @param regionDirList region directory list 469 * @param htd table descriptor 470 * @param truncateIfExists truncates table if exists 471 * @throws IOException exception 472 */ 473 private void checkAndCreateTable(Connection conn, TableName targetTableName, 474 ArrayList<Path> regionDirList, TableDescriptor htd, boolean truncateIfExists) 475 throws IOException { 476 try (Admin admin = conn.getAdmin()) { 477 boolean createNew = false; 478 if (admin.tableExists(targetTableName)) { 479 if (truncateIfExists) { 480 LOG.info( 481 "Truncating exising target table '" + targetTableName + "', preserving region splits"); 482 admin.disableTable(targetTableName); 483 admin.truncateTable(targetTableName, true); 484 } else { 485 LOG.info("Using exising target table '" + targetTableName + "'"); 486 } 487 } else { 488 createNew = true; 489 } 490 if (createNew) { 491 LOG.info("Creating target table '" + targetTableName + "'"); 492 byte[][] keys = null; 493 try { 494 if (regionDirList == null || regionDirList.size() == 0) { 495 admin.createTable(htd); 496 } else { 497 keys = generateBoundaryKeys(regionDirList); 498 if (keys.length > 0) { 499 // create table using table descriptor and region boundaries 500 admin.createTable(htd, keys); 501 } else { 502 admin.createTable(htd); 503 } 504 } 505 } catch (NamespaceNotFoundException e) { 506 LOG.warn("There was no namespace and the same will be created"); 507 String namespaceAsString = targetTableName.getNamespaceAsString(); 508 LOG.info("Creating target namespace '" + namespaceAsString + "'"); 509 admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build()); 510 if (null == keys) { 511 admin.createTable(htd); 512 } else { 513 admin.createTable(htd, keys); 514 } 515 } 516 517 } 518 long startTime = EnvironmentEdgeManager.currentTime(); 519 while (!admin.isTableAvailable(targetTableName)) { 520 try { 521 Thread.sleep(100); 522 } catch (InterruptedException ie) { 523 Thread.currentThread().interrupt(); 524 } 525 if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) { 526 throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table " 527 + targetTableName + " is still not available"); 528 } 529 } 530 } 531 } 532}