001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import java.io.IOException; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.fs.Path; 025import org.apache.hadoop.hbase.HConstants; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.RegionInfo; 028import org.apache.hadoop.hbase.client.RegionInfoBuilder; 029import org.apache.hadoop.hbase.mob.MobConstants; 030import org.apache.hadoop.hbase.regionserver.HRegion; 031import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 032import org.apache.hadoop.hbase.util.CommonFSUtils; 033import org.apache.hadoop.hbase.util.HFileArchiveUtil; 034import org.apache.hadoop.hbase.util.Pair; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile 041 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of 042 * the location where it is. 043 * <p> 044 * Searches for hfiles in the following order and locations: 045 * <ul> 046 * <li>/hbase/table/region/cf/hfile</li> 047 * <li>/hbase/.archive/table/region/cf/hfile</li> 048 * </ul> 049 * The link checks first in the original path if it is not present it fallbacks to the archived 050 * path. 051 */ 052@InterfaceAudience.Private 053@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS", 054 justification = "To be fixed but warning suppressed for now") 055public class HFileLink extends FileLink { 056 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 057 058 /** 059 * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link 060 * to an hfile in a different table/region and the name is in the form: table=region-hfile. 061 * <p> 062 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 063 * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for 064 * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and 065 * the bulk loaded (_SeqId_[0-9]+_) hfiles. 066 * <p> 067 * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and 068 * '4567' is region name and 'abcd' is filename. 069 */ 070 public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s", 071 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 072 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 073 074 /** Define the HFile Link name parser in the form of: table=region-hfile */ 075 public static final Pattern LINK_NAME_PATTERN = 076 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 077 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 078 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 079 080 /** 081 * The pattern should be used for hfile and reference links that can be found in 082 * /hbase/table/region/family/ 083 */ 084 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 085 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX, 086 TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 087 088 private final Path archivePath; 089 private final Path originPath; 090 private final Path mobPath; 091 private final Path tempPath; 092 093 /** 094 * Dead simple hfile link constructor 095 */ 096 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 097 final Path archivePath) { 098 this.tempPath = tempPath; 099 this.originPath = originPath; 100 this.mobPath = mobPath; 101 this.archivePath = archivePath; 102 setLocations(originPath, archivePath, tempPath, mobPath); 103 } 104 105 /** 106 * @param conf {@link Configuration} from which to extract specific archive locations 107 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 108 * @throws IOException on unexpected error. 109 */ 110 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 111 throws IOException { 112 return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf), 113 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 114 } 115 116 /** 117 * @param rootDir Path to the root directory where hbase files are stored 118 * @param archiveDir Path to the hbase archive directory 119 * @param hFileLinkPattern The path of the HFile Link. 120 */ 121 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir, 122 final Path hFileLinkPattern) { 123 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 124 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 125 Path originPath = new Path(rootDir, hfilePath); 126 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 127 Path archivePath = new Path(archiveDir, hfilePath); 128 return new HFileLink(originPath, tempPath, mobPath, archivePath); 129 } 130 131 /** 132 * Create an HFileLink relative path for the table/region/family/hfile location 133 * @param table Table name 134 * @param region Region Name 135 * @param family Family Name 136 * @param hfile HFile Name 137 * @return the relative Path to open the specified table/region/family/hfile link 138 */ 139 public static Path createPath(final TableName table, final String region, final String family, 140 final String hfile) { 141 if (HFileLink.isHFileLink(hfile)) { 142 return new Path(family, hfile); 143 } 144 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 145 } 146 147 /** 148 * Create an HFileLink instance from table/region/family/hfile location 149 * @param conf {@link Configuration} from which to extract specific archive locations 150 * @param table Table name 151 * @param region Region Name 152 * @param family Family Name 153 * @param hfile HFile Name 154 * @return Link to the file with the specified table/region/family/hfile location 155 * @throws IOException on unexpected error. 156 */ 157 public static HFileLink build(final Configuration conf, final TableName table, 158 final String region, final String family, final String hfile) throws IOException { 159 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 160 } 161 162 /** Returns the origin path of the hfile. */ 163 public Path getOriginPath() { 164 return this.originPath; 165 } 166 167 /** Returns the path of the archived hfile. */ 168 public Path getArchivePath() { 169 return this.archivePath; 170 } 171 172 /** Returns the path of the mob hfiles. */ 173 public Path getMobPath() { 174 return this.mobPath; 175 } 176 177 /** 178 * @param path Path to check. 179 * @return True if the path is a HFileLink. 180 */ 181 public static boolean isHFileLink(final Path path) { 182 return isHFileLink(path.getName()); 183 } 184 185 /** 186 * @param fileName File name to check. 187 * @return True if the path is a HFileLink. 188 */ 189 public static boolean isHFileLink(String fileName) { 190 // The LINK_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail 191 // on a simple heuristic first. The regex contains a literal "=", so if that character 192 // isn't in the fileName, then the regex cannot match. 193 if (!fileName.contains("=")) { 194 return false; 195 } 196 197 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 198 if (!m.matches()) { 199 return false; 200 } 201 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 202 } 203 204 /** 205 * Convert a HFileLink path to a table relative path. e.g. the link: 206 * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd 207 * @param path HFileLink path 208 * @return Relative table path 209 * @throws IOException on unexpected error. 210 */ 211 private static Path getHFileLinkPatternRelativePath(final Path path) { 212 // table=region-hfile 213 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 214 if (!m.matches()) { 215 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 216 } 217 218 // Convert the HFileLink name into a real table/region/cf/hfile path. 219 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 220 String regionName = m.group(3); 221 String hfileName = m.group(4); 222 String familyName = path.getParent().getName(); 223 Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName); 224 return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName))); 225 } 226 227 /** 228 * Get the HFile name of the referenced link 229 * @param fileName HFileLink file name 230 * @return the name of the referenced HFile 231 */ 232 public static String getReferencedHFileName(final String fileName) { 233 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 234 if (!m.matches()) { 235 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 236 } 237 return (m.group(4)); 238 } 239 240 /** 241 * Get the Region name of the referenced link 242 * @param fileName HFileLink file name 243 * @return the name of the referenced Region 244 */ 245 public static String getReferencedRegionName(final String fileName) { 246 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 247 if (!m.matches()) { 248 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 249 } 250 return (m.group(3)); 251 } 252 253 /** 254 * Get the Table name of the referenced link 255 * @param fileName HFileLink file name 256 * @return the name of the referenced Table 257 */ 258 public static TableName getReferencedTableName(final String fileName) { 259 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 260 if (!m.matches()) { 261 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 262 } 263 return (TableName.valueOf(m.group(1), m.group(2))); 264 } 265 266 /** 267 * Create a new HFileLink name 268 * @param hfileRegionInfo - Linked HFile Region Info 269 * @param hfileName - Linked HFile name 270 * @return file name of the HFile Link 271 */ 272 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 273 final String hfileName) { 274 return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(), 275 hfileName); 276 } 277 278 /** 279 * Create a new HFileLink name 280 * @param tableName - Linked HFile table name 281 * @param regionName - Linked HFile region name 282 * @param hfileName - Linked HFile name 283 * @return file name of the HFile Link 284 */ 285 public static String createHFileLinkName(final TableName tableName, final String regionName, 286 final String hfileName) { 287 String s = String.format("%s=%s-%s", 288 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName); 289 return s; 290 } 291 292 /** 293 * Create the back reference name 294 */ 295 // package-private for testing 296 public static String createBackReferenceName(final String tableNameStr, final String regionName) { 297 298 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 299 } 300 301 /** 302 * Get the full path of the HFile referenced by the back reference 303 * @param rootDir root hbase directory 304 * @param linkRefPath Link Back Reference path 305 * @return full path of the referenced hfile 306 */ 307 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 308 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 309 TableName linkTableName = p.getFirst(); 310 String linkRegionName = p.getSecond(); 311 312 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 313 Path familyPath = linkRefPath.getParent().getParent(); 314 Path regionPath = familyPath.getParent(); 315 Path tablePath = regionPath.getParent(); 316 317 String linkName = 318 createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName); 319 Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName); 320 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 321 return new Path(new Path(regionDir, familyPath.getName()), linkName); 322 } 323 324 public static Pair<TableName, String> parseBackReferenceName(String name) { 325 int separatorIndex = name.indexOf('.'); 326 String linkRegionName = name.substring(0, separatorIndex); 327 String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM); 328 TableName linkTableName = TableName.valueOf(tableSubstr); 329 return new Pair<>(linkTableName, linkRegionName); 330 } 331 332 /** 333 * Get the full path of the HFile referenced by the back reference 334 * @param conf {@link Configuration} to read for the archive directory name 335 * @param linkRefPath Link Back Reference path 336 * @return full path of the referenced hfile 337 * @throws IOException on unexpected error. 338 */ 339 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 340 throws IOException { 341 return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath); 342 } 343 344}