001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.IOException; 021import java.util.List; 022import java.util.Map; 023import org.apache.hadoop.fs.FileSystem; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.TableName; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.RegionReplicaUtil; 028import org.apache.hadoop.hbase.client.TableDescriptor; 029import org.apache.hadoop.hbase.master.MasterServices; 030import org.apache.hadoop.hbase.mob.MobUtils; 031import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 032import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 033import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 034import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 035import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 036import org.apache.hadoop.hbase.util.CommonFSUtils; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.apache.yetus.audience.InterfaceStability; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 043import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; 045 046/** 047 * General snapshot verification on the master. 048 * <p> 049 * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't attempt 050 * to verify that the files are exact copies (that would be paramount to taking the snapshot 051 * again!), but instead just attempts to ensure that the files match the expected files and are the 052 * same length. 053 * <p> 054 * Taking an online snapshots can race against other operations and this is an last line of defense. 055 * For example, if meta changes between when snapshots are taken not all regions of a table may be 056 * present. This can be caused by a region split (daughters present on this scan, but snapshot took 057 * parent), or move (snapshots only checks lists of region servers, a move could have caused a 058 * region to be skipped or done twice). 059 * <p> 060 * Current snapshot files checked: 061 * <ol> 062 * <li>SnapshotDescription is readable</li> 063 * <li>Table info is readable</li> 064 * <li>Regions</li> 065 * </ol> 066 * <ul> 067 * <li>Matching regions in the snapshot as currently in the table</li> 068 * <li>{@link RegionInfo} matches the current and stored regions</li> 069 * <li>All referenced hfiles have valid names</li> 070 * <li>All the hfiles are present (either in .archive directory in the region)</li> 071 * <li>All recovered.edits files are present (by name) and have the correct file size</li> 072 * </ul> 073 */ 074@InterfaceAudience.Private 075@InterfaceStability.Unstable 076public final class MasterSnapshotVerifier { 077 private static final Logger LOG = LoggerFactory.getLogger(MasterSnapshotVerifier.class); 078 079 private SnapshotDescription snapshot; 080 private FileSystem workingDirFs; 081 private TableName tableName; 082 private MasterServices services; 083 084 /** 085 * @param services services for the master 086 * @param snapshot snapshot to check 087 * @param workingDirFs the file system containing the temporary snapshot information 088 */ 089 public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, 090 FileSystem workingDirFs) { 091 this.workingDirFs = workingDirFs; 092 this.services = services; 093 this.snapshot = snapshot; 094 this.tableName = TableName.valueOf(snapshot.getTable()); 095 } 096 097 /** 098 * Verify that the snapshot in the directory is a valid snapshot 099 * @param snapshotDir snapshot directory to check 100 * @throws CorruptedSnapshotException if the snapshot is invalid 101 * @throws IOException if there is an unexpected connection issue to the filesystem 102 */ 103 public void verifySnapshot(Path snapshotDir, boolean verifyRegions) 104 throws CorruptedSnapshotException, IOException { 105 SnapshotManifest manifest = 106 SnapshotManifest.open(services.getConfiguration(), workingDirFs, snapshotDir, snapshot); 107 // verify snapshot info matches 108 verifySnapshotDescription(snapshotDir); 109 110 // check that tableinfo is a valid table description 111 verifyTableInfo(manifest); 112 113 // check that each region is valid 114 verifyRegions(manifest, verifyRegions); 115 } 116 117 /** 118 * Check that the snapshot description written in the filesystem matches the current snapshot 119 * @param snapshotDir snapshot directory to check 120 */ 121 private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException { 122 SnapshotDescription found = 123 SnapshotDescriptionUtils.readSnapshotInfo(workingDirFs, snapshotDir); 124 if (!this.snapshot.equals(found)) { 125 throw new CorruptedSnapshotException( 126 "Snapshot read (" + found + ") doesn't equal snapshot we ran (" + snapshot + ").", 127 ProtobufUtil.createSnapshotDesc(snapshot)); 128 } 129 } 130 131 /** 132 * Check that the table descriptor for the snapshot is a valid table descriptor 133 * @param manifest snapshot manifest to inspect 134 */ 135 private void verifyTableInfo(final SnapshotManifest manifest) throws IOException { 136 TableDescriptor htd = manifest.getTableDescriptor(); 137 if (htd == null) { 138 throw new CorruptedSnapshotException("Missing Table Descriptor", 139 ProtobufUtil.createSnapshotDesc(snapshot)); 140 } 141 142 if (!htd.getTableName().getNameAsString().equals(snapshot.getTable())) { 143 throw new CorruptedSnapshotException("Invalid Table Descriptor. Expected " 144 + snapshot.getTable() + " name, got " + htd.getTableName().getNameAsString(), 145 ProtobufUtil.createSnapshotDesc(snapshot)); 146 } 147 } 148 149 /** 150 * Check that all the regions in the snapshot are valid, and accounted for. 151 * @param manifest snapshot manifest to inspect 152 * @throws IOException if we can't reach hbase:meta or read the files from the FS 153 */ 154 private void verifyRegions(SnapshotManifest manifest, boolean verifyRegions) throws IOException { 155 List<RegionInfo> regions = services.getAssignmentManager().getTableRegions(tableName, false); 156 // Remove the non-default regions 157 RegionReplicaUtil.removeNonDefaultRegions(regions); 158 159 Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap(); 160 if (regionManifests == null) { 161 String msg = "Snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " looks empty"; 162 LOG.error(msg); 163 throw new CorruptedSnapshotException(msg); 164 } 165 166 String errorMsg = ""; 167 boolean hasMobStore = false; 168 // the mob region is a dummy region, it's not a real region in HBase. 169 // the mob region has a special name, it could be found by the region name. 170 if (regionManifests.get(MobUtils.getMobRegionInfo(tableName).getEncodedName()) != null) { 171 hasMobStore = true; 172 } 173 int realRegionCount = hasMobStore ? regionManifests.size() - 1 : regionManifests.size(); 174 if (realRegionCount != regions.size()) { 175 errorMsg = 176 "Regions moved during the snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot) 177 + "'. expected=" + regions.size() + " snapshotted=" + realRegionCount + "."; 178 LOG.error(errorMsg); 179 } 180 181 // Verify RegionInfo 182 if (verifyRegions) { 183 for (RegionInfo region : regions) { 184 SnapshotRegionManifest regionManifest = regionManifests.get(region.getEncodedName()); 185 if (regionManifest == null) { 186 // could happen due to a move or split race. 187 String mesg = " No snapshot region directory found for region:" + region; 188 if (errorMsg.isEmpty()) { 189 errorMsg = mesg; 190 } 191 LOG.error(mesg); 192 continue; 193 } 194 195 verifyRegionInfo(region, regionManifest); 196 } 197 if (!errorMsg.isEmpty()) { 198 throw new CorruptedSnapshotException(errorMsg); 199 } 200 201 // Verify Snapshot HFiles 202 // Requires the root directory file system as HFiles are stored in the root directory 203 SnapshotReferenceUtil.verifySnapshot(services.getConfiguration(), 204 CommonFSUtils.getRootDirFileSystem(services.getConfiguration()), manifest); 205 } 206 } 207 208 /** 209 * Verify that the regionInfo is valid 210 * @param region the region to check 211 * @param manifest snapshot manifest to inspect 212 */ 213 private void verifyRegionInfo(final RegionInfo region, final SnapshotRegionManifest manifest) 214 throws IOException { 215 RegionInfo manifestRegionInfo = ProtobufUtil.toRegionInfo(manifest.getRegionInfo()); 216 if (RegionInfo.COMPARATOR.compare(region, manifestRegionInfo) != 0) { 217 String msg = 218 "Manifest region info " + manifestRegionInfo + "doesn't match expected region:" + region; 219 throw new CorruptedSnapshotException(msg, ProtobufUtil.createSnapshotDesc(snapshot)); 220 } 221 } 222}