001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import java.io.IOException; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.fs.FileSystem; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.client.RegionInfoBuilder; 030import org.apache.hadoop.hbase.mob.MobConstants; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 033import org.apache.hadoop.hbase.util.CommonFSUtils; 034import org.apache.hadoop.hbase.util.HFileArchiveUtil; 035import org.apache.hadoop.hbase.util.Pair; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile 042 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of 043 * the location where it is. 044 * <p> 045 * Searches for hfiles in the following order and locations: 046 * <ul> 047 * <li>/hbase/table/region/cf/hfile</li> 048 * <li>/hbase/.archive/table/region/cf/hfile</li> 049 * </ul> 050 * The link checks first in the original path if it is not present it fallbacks to the archived 051 * path. 052 */ 053@InterfaceAudience.Private 054@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS", 055 justification = "To be fixed but warning suppressed for now") 056public class HFileLink extends FileLink { 057 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 058 059 /** 060 * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link 061 * to an hfile in a different table/region and the name is in the form: table=region-hfile. 062 * <p> 063 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 064 * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for 065 * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and 066 * the bulk loaded (_SeqId_[0-9]+_) hfiles. 067 * <p> 068 * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and 069 * '4567' is region name and 'abcd' is filename. 070 */ 071 public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s", 072 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 073 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 074 075 /** Define the HFile Link name parser in the form of: table=region-hfile */ 076 public static final Pattern LINK_NAME_PATTERN = 077 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 078 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 079 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 080 081 /** 082 * The pattern should be used for hfile and reference links that can be found in 083 * /hbase/table/region/family/ 084 */ 085 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 086 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX, 087 TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 088 089 private final Path archivePath; 090 private final Path originPath; 091 private final Path mobPath; 092 private final Path tempPath; 093 094 /** 095 * Dead simple hfile link constructor 096 */ 097 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 098 final Path archivePath) { 099 this.tempPath = tempPath; 100 this.originPath = originPath; 101 this.mobPath = mobPath; 102 this.archivePath = archivePath; 103 setLocations(originPath, archivePath, tempPath, mobPath); 104 } 105 106 /** 107 * @param conf {@link Configuration} from which to extract specific archive locations 108 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 109 * @throws IOException on unexpected error. 110 */ 111 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 112 throws IOException { 113 return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf), 114 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 115 } 116 117 /** 118 * @param rootDir Path to the root directory where hbase files are stored 119 * @param archiveDir Path to the hbase archive directory 120 * @param hFileLinkPattern The path of the HFile Link. 121 */ 122 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir, 123 final Path hFileLinkPattern) { 124 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 125 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 126 Path originPath = new Path(rootDir, hfilePath); 127 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 128 Path archivePath = new Path(archiveDir, hfilePath); 129 return new HFileLink(originPath, tempPath, mobPath, archivePath); 130 } 131 132 /** 133 * Create an HFileLink relative path for the table/region/family/hfile location 134 * @param table Table name 135 * @param region Region Name 136 * @param family Family Name 137 * @param hfile HFile Name 138 * @return the relative Path to open the specified table/region/family/hfile link 139 */ 140 public static Path createPath(final TableName table, final String region, final String family, 141 final String hfile) { 142 if (HFileLink.isHFileLink(hfile)) { 143 return new Path(family, hfile); 144 } 145 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 146 } 147 148 /** 149 * Create an HFileLink instance from table/region/family/hfile location 150 * @param conf {@link Configuration} from which to extract specific archive locations 151 * @param table Table name 152 * @param region Region Name 153 * @param family Family Name 154 * @param hfile HFile Name 155 * @return Link to the file with the specified table/region/family/hfile location 156 * @throws IOException on unexpected error. 157 */ 158 public static HFileLink build(final Configuration conf, final TableName table, 159 final String region, final String family, final String hfile) throws IOException { 160 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 161 } 162 163 /** Returns the origin path of the hfile. */ 164 public Path getOriginPath() { 165 return this.originPath; 166 } 167 168 /** Returns the path of the archived hfile. */ 169 public Path getArchivePath() { 170 return this.archivePath; 171 } 172 173 /** Returns the path of the mob hfiles. */ 174 public Path getMobPath() { 175 return this.mobPath; 176 } 177 178 /** 179 * @param path Path to check. 180 * @return True if the path is a HFileLink. 181 */ 182 public static boolean isHFileLink(final Path path) { 183 return isHFileLink(path.getName()); 184 } 185 186 /** 187 * @param fileName File name to check. 188 * @return True if the path is a HFileLink. 189 */ 190 public static boolean isHFileLink(String fileName) { 191 // The LINK_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail 192 // on a simple heuristic first. The regex contains a literal "=", so if that character 193 // isn't in the fileName, then the regex cannot match. 194 if (!fileName.contains("=")) { 195 return false; 196 } 197 198 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 199 if (!m.matches()) { 200 return false; 201 } 202 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 203 } 204 205 /** 206 * Convert a HFileLink path to a table relative path. e.g. the link: 207 * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd 208 * @param path HFileLink path 209 * @return Relative table path 210 * @throws IOException on unexpected error. 211 */ 212 private static Path getHFileLinkPatternRelativePath(final Path path) { 213 // table=region-hfile 214 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 215 if (!m.matches()) { 216 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 217 } 218 219 // Convert the HFileLink name into a real table/region/cf/hfile path. 220 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 221 String regionName = m.group(3); 222 String hfileName = m.group(4); 223 String familyName = path.getParent().getName(); 224 Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName); 225 return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName))); 226 } 227 228 /** 229 * Get the HFile name of the referenced link 230 * @param fileName HFileLink file name 231 * @return the name of the referenced HFile 232 */ 233 public static String getReferencedHFileName(final String fileName) { 234 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 235 if (!m.matches()) { 236 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 237 } 238 return (m.group(4)); 239 } 240 241 /** 242 * Get the Region name of the referenced link 243 * @param fileName HFileLink file name 244 * @return the name of the referenced Region 245 */ 246 public static String getReferencedRegionName(final String fileName) { 247 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 248 if (!m.matches()) { 249 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 250 } 251 return (m.group(3)); 252 } 253 254 /** 255 * Get the Table name of the referenced link 256 * @param fileName HFileLink file name 257 * @return the name of the referenced Table 258 */ 259 public static TableName getReferencedTableName(final String fileName) { 260 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 261 if (!m.matches()) { 262 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 263 } 264 return (TableName.valueOf(m.group(1), m.group(2))); 265 } 266 267 /** 268 * Create a new HFileLink name 269 * @param hfileRegionInfo - Linked HFile Region Info 270 * @param hfileName - Linked HFile name 271 * @return file name of the HFile Link 272 */ 273 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 274 final String hfileName) { 275 return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(), 276 hfileName); 277 } 278 279 /** 280 * Create a new HFileLink name 281 * @param tableName - Linked HFile table name 282 * @param regionName - Linked HFile region name 283 * @param hfileName - Linked HFile name 284 * @return file name of the HFile Link 285 */ 286 public static String createHFileLinkName(final TableName tableName, final String regionName, 287 final String hfileName) { 288 String s = String.format("%s=%s-%s", 289 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName); 290 return s; 291 } 292 293 /** 294 * Create a new HFileLink 295 * <p> 296 * It also adds a back-reference to the hfile back-reference directory to simplify the 297 * reference-count and the cleaning process. 298 * @param conf {@link Configuration} to read for the archive directory name 299 * @param fs {@link FileSystem} on which to write the HFileLink 300 * @param dstFamilyPath - Destination path (table/region/cf/) 301 * @param hfileRegionInfo - Linked HFile Region Info 302 * @param hfileName - Linked HFile name 303 * @return the file link name. 304 * @throws IOException on file or parent directory creation failure. 305 */ 306 public static String create(final Configuration conf, final FileSystem fs, 307 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName) 308 throws IOException { 309 return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); 310 } 311 312 /** 313 * Create a new HFileLink 314 * <p> 315 * It also adds a back-reference to the hfile back-reference directory to simplify the 316 * reference-count and the cleaning process. 317 * @param conf {@link Configuration} to read for the archive directory name 318 * @param fs {@link FileSystem} on which to write the HFileLink 319 * @param dstFamilyPath - Destination path (table/region/cf/) 320 * @param hfileRegionInfo - Linked HFile Region Info 321 * @param hfileName - Linked HFile name 322 * @param createBackRef - Whether back reference should be created. Defaults to true. 323 * @return the file link name. 324 * @throws IOException on file or parent directory creation failure. 325 */ 326 public static String create(final Configuration conf, final FileSystem fs, 327 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName, 328 final boolean createBackRef) throws IOException { 329 TableName linkedTable = hfileRegionInfo.getTable(); 330 String linkedRegion = hfileRegionInfo.getEncodedName(); 331 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); 332 } 333 334 /** 335 * Create a new HFileLink 336 * <p> 337 * It also adds a back-reference to the hfile back-reference directory to simplify the 338 * reference-count and the cleaning process. 339 * @param conf {@link Configuration} to read for the archive directory name 340 * @param fs {@link FileSystem} on which to write the HFileLink 341 * @param dstFamilyPath - Destination path (table/region/cf/) 342 * @param linkedTable - Linked Table Name 343 * @param linkedRegion - Linked Region Name 344 * @param hfileName - Linked HFile name 345 * @return the file link name. 346 * @throws IOException on file or parent directory creation failure. 347 */ 348 public static String create(final Configuration conf, final FileSystem fs, 349 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 350 final String hfileName) throws IOException { 351 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); 352 } 353 354 /** 355 * Create a new HFileLink. In the event of link creation failure, this method throws an 356 * IOException, so that the calling upper laying can decide on how to proceed with this. 357 * <p> 358 * It also adds a back-reference to the hfile back-reference directory to simplify the 359 * reference-count and the cleaning process. 360 * @param conf {@link Configuration} to read for the archive directory name 361 * @param fs {@link FileSystem} on which to write the HFileLink 362 * @param dstFamilyPath - Destination path (table/region/cf/) 363 * @param linkedTable - Linked Table Name 364 * @param linkedRegion - Linked Region Name 365 * @param hfileName - Linked HFile name 366 * @param createBackRef - Whether back reference should be created. Defaults to true. 367 * @return the file link name. 368 * @throws IOException on file or parent directory creation failure. 369 */ 370 public static String create(final Configuration conf, final FileSystem fs, 371 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 372 final String hfileName, final boolean createBackRef) throws IOException { 373 String familyName = dstFamilyPath.getName(); 374 String regionName = dstFamilyPath.getParent().getName(); 375 String tableName = 376 CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()).getNameAsString(); 377 378 return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable, 379 linkedRegion, hfileName, createBackRef); 380 } 381 382 /** 383 * Create a new HFileLink 384 * <p> 385 * It also adds a back-reference to the hfile back-reference directory to simplify the 386 * reference-count and the cleaning process. 387 * @param conf {@link Configuration} to read for the archive directory name 388 * @param fs {@link FileSystem} on which to write the HFileLink 389 * @param dstFamilyPath - Destination path (table/region/cf/) 390 * @param dstTableName - Destination table name 391 * @param dstRegionName - Destination region name 392 * @param linkedTable - Linked Table Name 393 * @param linkedRegion - Linked Region Name 394 * @param hfileName - Linked HFile name 395 * @param createBackRef - Whether back reference should be created. Defaults to true. 396 * @return the file link name. 397 * @throws IOException on file or parent directory creation failure 398 */ 399 public static String create(final Configuration conf, final FileSystem fs, 400 final Path dstFamilyPath, final String familyName, final String dstTableName, 401 final String dstRegionName, final TableName linkedTable, final String linkedRegion, 402 final String hfileName, final boolean createBackRef) throws IOException { 403 String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); 404 String refName = createBackReferenceName(dstTableName, dstRegionName); 405 406 // Make sure the destination directory exists 407 fs.mkdirs(dstFamilyPath); 408 409 // Make sure the FileLink reference directory exists 410 Path archiveStoreDir = 411 HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName); 412 Path backRefPath = null; 413 if (createBackRef) { 414 Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); 415 fs.mkdirs(backRefssDir); 416 417 // Create the reference for the link 418 backRefPath = new Path(backRefssDir, refName); 419 fs.createNewFile(backRefPath); 420 } 421 try { 422 // Create the link 423 if (fs.createNewFile(new Path(dstFamilyPath, name))) { 424 return name; 425 } 426 } catch (IOException e) { 427 LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); 428 // Revert the reference if the link creation failed 429 if (createBackRef) { 430 fs.delete(backRefPath, false); 431 } 432 throw e; 433 } 434 throw new IOException( 435 "File link=" + name + " already exists under " + dstFamilyPath + " folder."); 436 } 437 438 /** 439 * Create a new HFileLink starting from a hfileLink name 440 * <p> 441 * It also adds a back-reference to the hfile back-reference directory to simplify the 442 * reference-count and the cleaning process. 443 * @param conf {@link Configuration} to read for the archive directory name 444 * @param fs {@link FileSystem} on which to write the HFileLink 445 * @param dstFamilyPath - Destination path (table/region/cf/) 446 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 447 * @param createBackRef - Whether back reference should be created. Defaults to true. 448 * @return the file link name. 449 * @throws IOException on file or parent directory creation failure. 450 */ 451 public static String createFromHFileLink(final Configuration conf, final FileSystem fs, 452 final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) 453 throws IOException { 454 Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); 455 if (!m.matches()) { 456 throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); 457 } 458 return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3), 459 m.group(4), createBackRef); 460 } 461 462 /** 463 * Create the back reference name 464 */ 465 // package-private for testing 466 public static String createBackReferenceName(final String tableNameStr, final String regionName) { 467 468 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 469 } 470 471 /** 472 * Get the full path of the HFile referenced by the back reference 473 * @param rootDir root hbase directory 474 * @param linkRefPath Link Back Reference path 475 * @return full path of the referenced hfile 476 */ 477 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 478 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 479 TableName linkTableName = p.getFirst(); 480 String linkRegionName = p.getSecond(); 481 482 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 483 Path familyPath = linkRefPath.getParent().getParent(); 484 Path regionPath = familyPath.getParent(); 485 Path tablePath = regionPath.getParent(); 486 487 String linkName = 488 createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName); 489 Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName); 490 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 491 return new Path(new Path(regionDir, familyPath.getName()), linkName); 492 } 493 494 public static Pair<TableName, String> parseBackReferenceName(String name) { 495 int separatorIndex = name.indexOf('.'); 496 String linkRegionName = name.substring(0, separatorIndex); 497 String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM); 498 TableName linkTableName = TableName.valueOf(tableSubstr); 499 return new Pair<>(linkTableName, linkRegionName); 500 } 501 502 /** 503 * Get the full path of the HFile referenced by the back reference 504 * @param conf {@link Configuration} to read for the archive directory name 505 * @param linkRefPath Link Back Reference path 506 * @return full path of the referenced hfile 507 * @throws IOException on unexpected error. 508 */ 509 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 510 throws IOException { 511 return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath); 512 } 513 514}