001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.List; 023import java.util.UUID; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HRegionInfo; 028import org.apache.hadoop.hbase.PrivateCellUtil; 029import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; 030import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 031import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 032import org.apache.hadoop.hbase.util.CommonFSUtils; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; 039 040/** 041 * A Scanner which performs a scan over snapshot files. Using this class requires copying the 042 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that 043 * directory. Actual data files are not copied. 044 * <p> 045 * This also allows one to run the scan from an online or offline hbase cluster. The snapshot files 046 * can be exported by using the org.apache.hadoop.hbase.snapshot.ExportSnapshot tool, to a pure-hdfs 047 * cluster, and this scanner can be used to run the scan directly over the snapshot files. The 048 * snapshot should not be deleted while there are open scanners reading from snapshot files. 049 * <p> 050 * An internal RegionScanner is used to execute the {@link Scan} obtained from the user for each 051 * region in the snapshot. 052 * <p> 053 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from 054 * snapshot files and data files. HBase also enforces security because all the requests are handled 055 * by the server layer, and the user cannot read from the data files directly. To read from snapshot 056 * files directly from the file system, the user who is running the MR job must have sufficient 057 * permissions to access snapshot and reference files. This means that to run mapreduce over 058 * snapshot files, the job has to be run as the HBase user or the user must have group or other 059 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from 060 * snapshot/data files will completely circumvent the access control enforced by HBase. See 061 * org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat. 062 */ 063@InterfaceAudience.Private 064public class TableSnapshotScanner extends AbstractClientScanner { 065 066 private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class); 067 068 private Configuration conf; 069 private String snapshotName; 070 private FileSystem fs; 071 private Path rootDir; 072 private Path restoreDir; 073 private Scan scan; 074 private ArrayList<RegionInfo> regions; 075 private TableDescriptor htd; 076 private final boolean snapshotAlreadyRestored; 077 078 private ClientSideRegionScanner currentRegionScanner = null; 079 private int currentRegion = -1; 080 081 private int numOfCompleteRows = 0; 082 083 /** 084 * Creates a TableSnapshotScanner. 085 * @param conf the configuration 086 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should 087 * have write permissions to this directory, and this should not be a 088 * subdirectory of rootDir. The scanner deletes the contents of the directory 089 * once the scanner is closed. 090 * @param snapshotName the name of the snapshot to read from 091 * @param scan a Scan representing scan parameters 092 * @throws IOException in case of error 093 */ 094 public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan) 095 throws IOException { 096 this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan); 097 } 098 099 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir, 100 String snapshotName, Scan scan) throws IOException { 101 this(conf, rootDir, restoreDir, snapshotName, scan, false); 102 } 103 104 /** 105 * Creates a TableSnapshotScanner. 106 * @param conf the configuration 107 * @param rootDir root directory for HBase. 108 * @param restoreDir a temporary directory to copy the snapshot files into. Current 109 * user should have write permissions to this directory, and this 110 * should not be a subdirectory of rootdir. The scanner deletes the 111 * contents of the directory once the scanner is closed. 112 * @param snapshotName the name of the snapshot to read from 113 * @param scan a Scan representing scan parameters 114 * @param snapshotAlreadyRestored true to indicate that snapshot has been restored. 115 * @throws IOException in case of error 116 */ 117 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir, 118 String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException { 119 this.conf = conf; 120 this.snapshotName = snapshotName; 121 this.rootDir = rootDir; 122 this.scan = scan; 123 this.snapshotAlreadyRestored = snapshotAlreadyRestored; 124 this.fs = rootDir.getFileSystem(conf); 125 126 if (snapshotAlreadyRestored) { 127 this.restoreDir = restoreDir; 128 openWithoutRestoringSnapshot(); 129 } else { 130 // restoreDir will be deleted in close(), use a unique sub directory 131 this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString()); 132 openWithRestoringSnapshot(); 133 } 134 135 initScanMetrics(scan); 136 } 137 138 private void openWithoutRestoringSnapshot() throws IOException { 139 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 140 SnapshotProtos.SnapshotDescription snapshotDesc = 141 SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 142 143 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); 144 List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests(); 145 if (regionManifests == null) { 146 throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName); 147 } 148 149 regions = new ArrayList<>(regionManifests.size()); 150 regionManifests.stream().map(r -> HRegionInfo.convert(r.getRegionInfo())) 151 .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r)); 152 htd = manifest.getTableDescriptor(); 153 } 154 155 private boolean isValidRegion(RegionInfo hri) { 156 // An offline split parent region should be excluded. 157 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) { 158 return false; 159 } 160 return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(), 161 hri.getEndKey()); 162 } 163 164 private void openWithRestoringSnapshot() throws IOException { 165 final RestoreSnapshotHelper.RestoreMetaChanges meta = 166 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName); 167 final List<RegionInfo> restoredRegions = meta.getRegionsToAdd(); 168 169 htd = meta.getTableDescriptor(); 170 regions = new ArrayList<>(restoredRegions.size()); 171 restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r)); 172 } 173 174 @Override 175 public Result next() throws IOException { 176 Result result = null; 177 while (true) { 178 if (currentRegionScanner == null) { 179 currentRegion++; 180 if (currentRegion >= regions.size()) { 181 return null; 182 } 183 184 RegionInfo hri = regions.get(currentRegion); 185 currentRegionScanner = 186 new ClientSideRegionScanner(conf, fs, restoreDir, htd, hri, scan, scanMetrics); 187 if (this.scanMetrics != null) { 188 this.scanMetrics.countOfRegions.incrementAndGet(); 189 } 190 } 191 192 try { 193 result = currentRegionScanner.next(); 194 if (result != null) { 195 if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) { 196 result = null; 197 } 198 return result; 199 } 200 } finally { 201 if (result == null) { 202 currentRegionScanner.close(); 203 currentRegionScanner = null; 204 } 205 } 206 } 207 } 208 209 private void cleanup() { 210 try { 211 if (fs.exists(this.restoreDir)) { 212 if (!fs.delete(this.restoreDir, true)) { 213 LOG.warn( 214 "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir); 215 } 216 } 217 } catch (IOException ex) { 218 LOG.warn( 219 "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex); 220 } 221 } 222 223 @Override 224 public void close() { 225 if (currentRegionScanner != null) { 226 currentRegionScanner.close(); 227 } 228 // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand. 229 if (!this.snapshotAlreadyRestored) { 230 cleanup(); 231 } 232 } 233 234 @Override 235 public boolean renewLease() { 236 throw new UnsupportedOperationException(); 237 } 238 239}