001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.snapshot; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.fail; 022 023import java.io.IOException; 024import java.util.Collections; 025import java.util.Comparator; 026import java.util.HashMap; 027import java.util.List; 028import java.util.Map; 029import java.util.concurrent.TimeUnit; 030import java.util.concurrent.TimeoutException; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtil; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.TableNotFoundException; 039import org.apache.hadoop.hbase.client.Admin; 040import org.apache.hadoop.hbase.client.RegionInfo; 041import org.apache.hadoop.hbase.client.SnapshotDescription; 042import org.apache.hadoop.hbase.client.SnapshotType; 043import org.apache.hadoop.hbase.client.Table; 044import org.apache.hadoop.hbase.master.HMaster; 045import org.apache.hadoop.hbase.master.snapshot.SnapshotManager; 046import org.apache.hadoop.hbase.regionserver.ConstantSizeRegionSplitPolicy; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.testclassification.RegionServerTests; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 051import org.junit.After; 052import org.junit.AfterClass; 053import org.junit.Before; 054import org.junit.BeforeClass; 055import org.junit.ClassRule; 056import org.junit.Test; 057import org.junit.experimental.categories.Category; 058import org.slf4j.Logger; 059import org.slf4j.LoggerFactory; 060 061import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 062 063import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneRequest; 065import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.IsSnapshotDoneResponse; 066import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 067 068/** 069 * Test creating/using/deleting snapshots from the client 070 * <p> 071 * This is an end-to-end test for the snapshot utility TODO This is essentially a clone of 072 * TestSnapshotFromClient. This is worth refactoring this because there will be a few more flavors 073 * of snapshots that need to run these tests. 074 */ 075@Category({ RegionServerTests.class, LargeTests.class }) 076public class TestFlushSnapshotFromClient { 077 078 @ClassRule 079 public static final HBaseClassTestRule CLASS_RULE = 080 HBaseClassTestRule.forClass(TestFlushSnapshotFromClient.class); 081 082 private static final Logger LOG = LoggerFactory.getLogger(TestFlushSnapshotFromClient.class); 083 084 protected static final HBaseTestingUtil UTIL = new HBaseTestingUtil(); 085 protected static final int NUM_RS = 2; 086 protected static final byte[] TEST_FAM = Bytes.toBytes("fam"); 087 protected static final TableName TABLE_NAME = TableName.valueOf("test"); 088 protected final int DEFAULT_NUM_ROWS = 100; 089 protected Admin admin = null; 090 091 @BeforeClass 092 public static void setupCluster() throws Exception { 093 setupConf(UTIL.getConfiguration()); 094 UTIL.startMiniCluster(NUM_RS); 095 } 096 097 protected static void setupConf(Configuration conf) { 098 // disable the ui 099 conf.setInt("hbase.regionsever.info.port", -1); 100 // change the flush size to a small amount, regulating number of store files 101 conf.setInt("hbase.hregion.memstore.flush.size", 25000); 102 // so make sure we get a compaction when doing a load, but keep around some 103 // files in the store 104 conf.setInt("hbase.hstore.compaction.min", 10); 105 conf.setInt("hbase.hstore.compactionThreshold", 10); 106 // block writes if we get to 12 store files 107 conf.setInt("hbase.hstore.blockingStoreFiles", 12); 108 // Enable snapshot 109 conf.setBoolean(SnapshotManager.HBASE_SNAPSHOT_ENABLED, true); 110 conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, 111 ConstantSizeRegionSplitPolicy.class.getName()); 112 } 113 114 @Before 115 public void setup() throws Exception { 116 createTable(); 117 this.admin = UTIL.getConnection().getAdmin(); 118 } 119 120 protected void createTable() throws Exception { 121 SnapshotTestingUtils.createTable(UTIL, TABLE_NAME, TEST_FAM); 122 } 123 124 @After 125 public void tearDown() throws Exception { 126 UTIL.deleteTable(TABLE_NAME); 127 SnapshotTestingUtils.deleteAllSnapshots(this.admin); 128 this.admin.close(); 129 SnapshotTestingUtils.deleteArchiveDirectory(UTIL); 130 } 131 132 @AfterClass 133 public static void cleanupTest() throws Exception { 134 try { 135 UTIL.shutdownMiniCluster(); 136 } catch (Exception e) { 137 LOG.warn("failure shutting down cluster", e); 138 } 139 } 140 141 /** 142 * Test simple flush snapshotting a table that is online 143 */ 144 @Test 145 public void testFlushTableSnapshot() throws Exception { 146 // make sure we don't fail on listing snapshots 147 SnapshotTestingUtils.assertNoSnapshots(admin); 148 149 // put some stuff in the table 150 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 151 152 LOG.debug("FS state before snapshot:"); 153 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 154 155 // take a snapshot of the enabled table 156 String snapshotString = "offlineTableSnapshot"; 157 byte[] snapshot = Bytes.toBytes(snapshotString); 158 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.FLUSH); 159 LOG.debug("Snapshot completed."); 160 161 // make sure we have the snapshot 162 List<SnapshotDescription> snapshots = 163 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 164 165 // make sure its a valid snapshot 166 LOG.debug("FS state after snapshot:"); 167 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 168 169 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 170 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 171 } 172 173 /** 174 * Test snapshotting a table that is online without flushing 175 */ 176 @Test 177 public void testSkipFlushTableSnapshot() throws Exception { 178 // make sure we don't fail on listing snapshots 179 SnapshotTestingUtils.assertNoSnapshots(admin); 180 181 // put some stuff in the table 182 Table table = UTIL.getConnection().getTable(TABLE_NAME); 183 UTIL.loadTable(table, TEST_FAM); 184 UTIL.flush(TABLE_NAME); 185 186 LOG.debug("FS state before snapshot:"); 187 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 188 189 // take a snapshot of the enabled table 190 String snapshotString = "skipFlushTableSnapshot"; 191 String snapshot = snapshotString; 192 admin.snapshot(snapshotString, TABLE_NAME, SnapshotType.SKIPFLUSH); 193 LOG.debug("Snapshot completed."); 194 195 // make sure we have the snapshot 196 List<SnapshotDescription> snapshots = 197 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 198 199 // make sure its a valid snapshot 200 LOG.debug("FS state after snapshot:"); 201 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 202 203 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 204 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 205 206 admin.deleteSnapshot(snapshot); 207 snapshots = admin.listSnapshots(); 208 SnapshotTestingUtils.assertNoSnapshots(admin); 209 } 210 211 /** 212 * Test simple flush snapshotting a table that is online 213 */ 214 @Test 215 public void testFlushTableSnapshotWithProcedure() throws Exception { 216 // make sure we don't fail on listing snapshots 217 SnapshotTestingUtils.assertNoSnapshots(admin); 218 219 // put some stuff in the table 220 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 221 222 LOG.debug("FS state before snapshot:"); 223 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 224 225 // take a snapshot of the enabled table 226 String snapshotString = "offlineTableSnapshot"; 227 byte[] snapshot = Bytes.toBytes(snapshotString); 228 Map<String, String> props = new HashMap<>(); 229 props.put("table", TABLE_NAME.getNameAsString()); 230 admin.execProcedure(SnapshotManager.ONLINE_SNAPSHOT_CONTROLLER_DESCRIPTION, snapshotString, 231 props); 232 233 LOG.debug("Snapshot completed."); 234 235 // make sure we have the snapshot 236 List<SnapshotDescription> snapshots = 237 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot, TABLE_NAME); 238 239 // make sure its a valid snapshot 240 LOG.debug("FS state after snapshot:"); 241 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 242 243 SnapshotTestingUtils.confirmSnapshotValid(UTIL, 244 ProtobufUtil.createHBaseProtosSnapshotDesc(snapshots.get(0)), TABLE_NAME, TEST_FAM); 245 } 246 247 @Test 248 public void testSnapshotFailsOnNonExistantTable() throws Exception { 249 // make sure we don't fail on listing snapshots 250 SnapshotTestingUtils.assertNoSnapshots(admin); 251 TableName tableName = TableName.valueOf("_not_a_table"); 252 253 // make sure the table doesn't exist 254 boolean fail = false; 255 do { 256 try { 257 admin.getDescriptor(tableName); 258 fail = true; 259 LOG.error("Table:" + tableName + " already exists, checking a new name"); 260 tableName = TableName.valueOf(tableName + "!"); 261 } catch (TableNotFoundException e) { 262 fail = false; 263 } 264 } while (fail); 265 266 // snapshot the non-existant table 267 try { 268 admin.snapshot("fail", tableName, SnapshotType.FLUSH); 269 fail("Snapshot succeeded even though there is not table."); 270 } catch (SnapshotCreationException e) { 271 LOG.info("Correctly failed to snapshot a non-existant table:" + e.getMessage()); 272 } 273 } 274 275 /** 276 * Helper method for testing async snapshot operations. Just waits for the given snapshot to 277 * complete on the server by repeatedly checking the master. 278 * @param master the master running the snapshot 279 * @param snapshot the snapshot to check 280 * @param timeoutNanos the timeout in nano between checks to see if the snapshot is done 281 */ 282 private static void waitForSnapshotToComplete(HMaster master, 283 SnapshotProtos.SnapshotDescription snapshot, long timeoutNanos) throws Exception { 284 final IsSnapshotDoneRequest request = 285 IsSnapshotDoneRequest.newBuilder().setSnapshot(snapshot).build(); 286 long start = System.nanoTime(); 287 while (System.nanoTime() - start < timeoutNanos) { 288 try { 289 IsSnapshotDoneResponse done = master.getMasterRpcServices().isSnapshotDone(null, request); 290 if (done.getDone()) { 291 return; 292 } 293 } catch (ServiceException e) { 294 // ignore UnknownSnapshotException, this is possible as for AsyncAdmin, the method will 295 // return immediately after sending out the request, no matter whether the master has 296 // processed the request or not. 297 if (!(e.getCause() instanceof UnknownSnapshotException)) { 298 throw e; 299 } 300 } 301 302 Thread.sleep(200); 303 } 304 throw new TimeoutException("Timeout waiting for snapshot " + snapshot + " to complete"); 305 } 306 307 @Test 308 public void testAsyncFlushSnapshot() throws Exception { 309 SnapshotProtos.SnapshotDescription snapshot = SnapshotProtos.SnapshotDescription.newBuilder() 310 .setName("asyncSnapshot").setTable(TABLE_NAME.getNameAsString()) 311 .setType(SnapshotProtos.SnapshotDescription.Type.FLUSH).build(); 312 313 // take the snapshot async 314 admin.snapshotAsync(new SnapshotDescription("asyncSnapshot", TABLE_NAME, SnapshotType.FLUSH)); 315 316 // constantly loop, looking for the snapshot to complete 317 HMaster master = UTIL.getMiniHBaseCluster().getMaster(); 318 waitForSnapshotToComplete(master, snapshot, TimeUnit.MINUTES.toNanos(1)); 319 LOG.info(" === Async Snapshot Completed ==="); 320 UTIL.getHBaseCluster().getMaster().getMasterFileSystem().logFileSystemState(LOG); 321 322 // make sure we get the snapshot 323 SnapshotTestingUtils.assertOneSnapshotThatMatches(admin, snapshot); 324 } 325 326 @Test 327 public void testSnapshotStateAfterMerge() throws Exception { 328 int numRows = DEFAULT_NUM_ROWS; 329 // make sure we don't fail on listing snapshots 330 SnapshotTestingUtils.assertNoSnapshots(admin); 331 // load the table so we have some data 332 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 333 334 // Take a snapshot 335 String snapshotBeforeMergeName = "snapshotBeforeMerge"; 336 admin.snapshot(snapshotBeforeMergeName, TABLE_NAME, SnapshotType.FLUSH); 337 338 // Clone the table 339 TableName cloneBeforeMergeName = TableName.valueOf("cloneBeforeMerge"); 340 admin.cloneSnapshot(snapshotBeforeMergeName, cloneBeforeMergeName); 341 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneBeforeMergeName); 342 343 // Merge two regions 344 List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 345 Collections.sort(regions, new Comparator<RegionInfo>() { 346 @Override 347 public int compare(RegionInfo r1, RegionInfo r2) { 348 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 349 } 350 }); 351 352 int numRegions = admin.getRegions(TABLE_NAME).size(); 353 int numRegionsAfterMerge = numRegions - 2; 354 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 355 regions.get(2).getEncodedNameAsBytes(), true); 356 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 357 regions.get(5).getEncodedNameAsBytes(), true); 358 359 // Verify that there's one region less 360 waitRegionsAfterMerge(numRegionsAfterMerge); 361 assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size()); 362 363 // Clone the table 364 TableName cloneAfterMergeName = TableName.valueOf("cloneAfterMerge"); 365 admin.cloneSnapshot(snapshotBeforeMergeName, cloneAfterMergeName); 366 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneAfterMergeName); 367 368 verifyRowCount(UTIL, TABLE_NAME, numRows); 369 verifyRowCount(UTIL, cloneBeforeMergeName, numRows); 370 verifyRowCount(UTIL, cloneAfterMergeName, numRows); 371 372 // test that we can delete the snapshot 373 UTIL.deleteTable(cloneAfterMergeName); 374 UTIL.deleteTable(cloneBeforeMergeName); 375 } 376 377 @Test 378 public void testTakeSnapshotAfterMerge() throws Exception { 379 int numRows = DEFAULT_NUM_ROWS; 380 // make sure we don't fail on listing snapshots 381 SnapshotTestingUtils.assertNoSnapshots(admin); 382 // load the table so we have some data 383 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, numRows, TEST_FAM); 384 385 // Merge two regions 386 List<RegionInfo> regions = admin.getRegions(TABLE_NAME); 387 Collections.sort(regions, new Comparator<RegionInfo>() { 388 @Override 389 public int compare(RegionInfo r1, RegionInfo r2) { 390 return Bytes.compareTo(r1.getStartKey(), r2.getStartKey()); 391 } 392 }); 393 394 int numRegions = admin.getRegions(TABLE_NAME).size(); 395 int numRegionsAfterMerge = numRegions - 2; 396 admin.mergeRegionsAsync(regions.get(1).getEncodedNameAsBytes(), 397 regions.get(2).getEncodedNameAsBytes(), true); 398 admin.mergeRegionsAsync(regions.get(4).getEncodedNameAsBytes(), 399 regions.get(5).getEncodedNameAsBytes(), true); 400 401 waitRegionsAfterMerge(numRegionsAfterMerge); 402 assertEquals(numRegionsAfterMerge, admin.getRegions(TABLE_NAME).size()); 403 404 // Take a snapshot 405 String snapshotName = "snapshotAfterMerge"; 406 SnapshotTestingUtils.snapshot(admin, snapshotName, TABLE_NAME, SnapshotType.FLUSH, 3); 407 408 // Clone the table 409 TableName cloneName = TableName.valueOf("cloneMerge"); 410 admin.cloneSnapshot(snapshotName, cloneName); 411 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, cloneName); 412 413 verifyRowCount(UTIL, TABLE_NAME, numRows); 414 verifyRowCount(UTIL, cloneName, numRows); 415 416 // test that we can delete the snapshot 417 UTIL.deleteTable(cloneName); 418 } 419 420 /** 421 * Basic end-to-end test of simple-flush-based snapshots 422 */ 423 @Test 424 public void testFlushCreateListDestroy() throws Exception { 425 LOG.debug("------- Starting Snapshot test -------------"); 426 // make sure we don't fail on listing snapshots 427 SnapshotTestingUtils.assertNoSnapshots(admin); 428 // load the table so we have some data 429 SnapshotTestingUtils.loadData(UTIL, TABLE_NAME, DEFAULT_NUM_ROWS, TEST_FAM); 430 431 String snapshotName = "flushSnapshotCreateListDestroy"; 432 FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem(); 433 Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir(); 434 SnapshotTestingUtils.createSnapshotAndValidate(admin, TABLE_NAME, Bytes.toString(TEST_FAM), 435 snapshotName, rootDir, fs, true); 436 } 437 438 private void waitRegionsAfterMerge(final long numRegionsAfterMerge) 439 throws IOException, InterruptedException { 440 // Verify that there's one region less 441 long startTime = EnvironmentEdgeManager.currentTime(); 442 while (admin.getRegions(TABLE_NAME).size() != numRegionsAfterMerge) { 443 // This may be flaky... if after 15sec the merge is not complete give up 444 // it will fail in the assertEquals(numRegionsAfterMerge). 445 if ((EnvironmentEdgeManager.currentTime() - startTime) > 15000) { 446 break; 447 } 448 Thread.sleep(100); 449 } 450 SnapshotTestingUtils.waitForTableToBeOnline(UTIL, TABLE_NAME); 451 } 452 453 protected void verifyRowCount(final HBaseTestingUtil util, final TableName tableName, 454 long expectedRows) throws IOException { 455 SnapshotTestingUtils.verifyRowCount(util, tableName, expectedRows); 456 } 457 458 protected int countRows(final Table table, final byte[]... families) throws IOException { 459 return UTIL.countRows(table, families); 460 } 461}