001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io;
019
020import java.io.IOException;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.fs.FileSystem;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.RegionInfoBuilder;
030import org.apache.hadoop.hbase.mob.MobConstants;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
033import org.apache.hadoop.hbase.util.CommonFSUtils;
034import org.apache.hadoop.hbase.util.HFileArchiveUtil;
035import org.apache.hadoop.hbase.util.Pair;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile
042 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of
043 * the location where it is.
044 * <p>
045 * Searches for hfiles in the following order and locations:
046 * <ul>
047 * <li>/hbase/table/region/cf/hfile</li>
048 * <li>/hbase/.archive/table/region/cf/hfile</li>
049 * </ul>
050 * The link checks first in the original path if it is not present it fallbacks to the archived
051 * path.
052 */
053@InterfaceAudience.Private
054@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS",
055    justification = "To be fixed but warning suppressed for now")
056public class HFileLink extends FileLink {
057  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
058
059  /**
060   * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link
061   * to an hfile in a different table/region and the name is in the form: table=region-hfile.
062   * <p>
063   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
064   * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for
065   * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and
066   * the bulk loaded (_SeqId_[0-9]+_) hfiles.
067   * <p>
068   * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and
069   * '4567' is region name and 'abcd' is filename.
070   */
071  public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s",
072    TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
073    RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
074
075  /** Define the HFile Link name parser in the form of: table=region-hfile */
076  public static final Pattern LINK_NAME_PATTERN =
077    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
078      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
079      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
080
081  /**
082   * The pattern should be used for hfile and reference links that can be found in
083   * /hbase/table/region/family/
084   */
085  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
086    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX,
087      TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
088
089  private final Path archivePath;
090  private final Path originPath;
091  private final Path mobPath;
092  private final Path tempPath;
093
094  /**
095   * Dead simple hfile link constructor
096   */
097  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
098    final Path archivePath) {
099    this.tempPath = tempPath;
100    this.originPath = originPath;
101    this.mobPath = mobPath;
102    this.archivePath = archivePath;
103    setLocations(originPath, archivePath, tempPath, mobPath);
104  }
105
106  /**
107   * @param conf             {@link Configuration} from which to extract specific archive locations
108   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
109   * @throws IOException on unexpected error.
110   */
111  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
112    throws IOException {
113    return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf),
114      HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
115  }
116
117  /**
118   * @param rootDir          Path to the root directory where hbase files are stored
119   * @param archiveDir       Path to the hbase archive directory
120   * @param hFileLinkPattern The path of the HFile Link.
121   */
122  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir,
123    final Path hFileLinkPattern) {
124    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
125    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
126    Path originPath = new Path(rootDir, hfilePath);
127    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
128    Path archivePath = new Path(archiveDir, hfilePath);
129    return new HFileLink(originPath, tempPath, mobPath, archivePath);
130  }
131
132  /**
133   * Create an HFileLink relative path for the table/region/family/hfile location
134   * @param table  Table name
135   * @param region Region Name
136   * @param family Family Name
137   * @param hfile  HFile Name
138   * @return the relative Path to open the specified table/region/family/hfile link
139   */
140  public static Path createPath(final TableName table, final String region, final String family,
141    final String hfile) {
142    if (HFileLink.isHFileLink(hfile)) {
143      return new Path(family, hfile);
144    }
145    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
146  }
147
148  /**
149   * Create an HFileLink instance from table/region/family/hfile location
150   * @param conf   {@link Configuration} from which to extract specific archive locations
151   * @param table  Table name
152   * @param region Region Name
153   * @param family Family Name
154   * @param hfile  HFile Name
155   * @return Link to the file with the specified table/region/family/hfile location
156   * @throws IOException on unexpected error.
157   */
158  public static HFileLink build(final Configuration conf, final TableName table,
159    final String region, final String family, final String hfile) throws IOException {
160    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
161  }
162
163  /** Returns the origin path of the hfile. */
164  public Path getOriginPath() {
165    return this.originPath;
166  }
167
168  /** Returns the path of the archived hfile. */
169  public Path getArchivePath() {
170    return this.archivePath;
171  }
172
173  /** Returns the path of the mob hfiles. */
174  public Path getMobPath() {
175    return this.mobPath;
176  }
177
178  /**
179   * @param path Path to check.
180   * @return True if the path is a HFileLink.
181   */
182  public static boolean isHFileLink(final Path path) {
183    return isHFileLink(path.getName());
184  }
185
186  /**
187   * @param fileName File name to check.
188   * @return True if the path is a HFileLink.
189   */
190  public static boolean isHFileLink(String fileName) {
191    // The LINK_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail
192    // on a simple heuristic first. The regex contains a literal "=", so if that character
193    // isn't in the fileName, then the regex cannot match.
194    if (!fileName.contains("=")) {
195      return false;
196    }
197
198    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
199    if (!m.matches()) {
200      return false;
201    }
202    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
203  }
204
205  /**
206   * Convert a HFileLink path to a table relative path. e.g. the link:
207   * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd
208   * @param path HFileLink path
209   * @return Relative table path
210   * @throws IOException on unexpected error.
211   */
212  private static Path getHFileLinkPatternRelativePath(final Path path) {
213    // table=region-hfile
214    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
215    if (!m.matches()) {
216      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
217    }
218
219    // Convert the HFileLink name into a real table/region/cf/hfile path.
220    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
221    String regionName = m.group(3);
222    String hfileName = m.group(4);
223    String familyName = path.getParent().getName();
224    Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName);
225    return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName)));
226  }
227
228  /**
229   * Get the HFile name of the referenced link
230   * @param fileName HFileLink file name
231   * @return the name of the referenced HFile
232   */
233  public static String getReferencedHFileName(final String fileName) {
234    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
235    if (!m.matches()) {
236      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
237    }
238    return (m.group(4));
239  }
240
241  /**
242   * Get the Region name of the referenced link
243   * @param fileName HFileLink file name
244   * @return the name of the referenced Region
245   */
246  public static String getReferencedRegionName(final String fileName) {
247    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
248    if (!m.matches()) {
249      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
250    }
251    return (m.group(3));
252  }
253
254  /**
255   * Get the Table name of the referenced link
256   * @param fileName HFileLink file name
257   * @return the name of the referenced Table
258   */
259  public static TableName getReferencedTableName(final String fileName) {
260    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
261    if (!m.matches()) {
262      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
263    }
264    return (TableName.valueOf(m.group(1), m.group(2)));
265  }
266
267  /**
268   * Create a new HFileLink name
269   * @param hfileRegionInfo - Linked HFile Region Info
270   * @param hfileName       - Linked HFile name
271   * @return file name of the HFile Link
272   */
273  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
274    final String hfileName) {
275    return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(),
276      hfileName);
277  }
278
279  /**
280   * Create a new HFileLink name
281   * @param tableName  - Linked HFile table name
282   * @param regionName - Linked HFile region name
283   * @param hfileName  - Linked HFile name
284   * @return file name of the HFile Link
285   */
286  public static String createHFileLinkName(final TableName tableName, final String regionName,
287    final String hfileName) {
288    String s = String.format("%s=%s-%s",
289      tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName);
290    return s;
291  }
292
293  /**
294   * Create a new HFileLink
295   * <p>
296   * It also adds a back-reference to the hfile back-reference directory to simplify the
297   * reference-count and the cleaning process.
298   * @param conf            {@link Configuration} to read for the archive directory name
299   * @param fs              {@link FileSystem} on which to write the HFileLink
300   * @param dstFamilyPath   - Destination path (table/region/cf/)
301   * @param hfileRegionInfo - Linked HFile Region Info
302   * @param hfileName       - Linked HFile name
303   * @return the file link name.
304   * @throws IOException on file or parent directory creation failure.
305   */
306  public static String create(final Configuration conf, final FileSystem fs,
307    final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName)
308    throws IOException {
309    return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
310  }
311
312  /**
313   * Create a new HFileLink
314   * <p>
315   * It also adds a back-reference to the hfile back-reference directory to simplify the
316   * reference-count and the cleaning process.
317   * @param conf            {@link Configuration} to read for the archive directory name
318   * @param fs              {@link FileSystem} on which to write the HFileLink
319   * @param dstFamilyPath   - Destination path (table/region/cf/)
320   * @param hfileRegionInfo - Linked HFile Region Info
321   * @param hfileName       - Linked HFile name
322   * @param createBackRef   - Whether back reference should be created. Defaults to true.
323   * @return the file link name.
324   * @throws IOException on file or parent directory creation failure.
325   */
326  public static String create(final Configuration conf, final FileSystem fs,
327    final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName,
328    final boolean createBackRef) throws IOException {
329    TableName linkedTable = hfileRegionInfo.getTable();
330    String linkedRegion = hfileRegionInfo.getEncodedName();
331    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
332  }
333
334  /**
335   * Create a new HFileLink
336   * <p>
337   * It also adds a back-reference to the hfile back-reference directory to simplify the
338   * reference-count and the cleaning process.
339   * @param conf          {@link Configuration} to read for the archive directory name
340   * @param fs            {@link FileSystem} on which to write the HFileLink
341   * @param dstFamilyPath - Destination path (table/region/cf/)
342   * @param linkedTable   - Linked Table Name
343   * @param linkedRegion  - Linked Region Name
344   * @param hfileName     - Linked HFile name
345   * @return the file link name.
346   * @throws IOException on file or parent directory creation failure.
347   */
348  public static String create(final Configuration conf, final FileSystem fs,
349    final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
350    final String hfileName) throws IOException {
351    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
352  }
353
354  /**
355   * Create a new HFileLink. In the event of link creation failure, this method throws an
356   * IOException, so that the calling upper laying can decide on how to proceed with this.
357   * <p>
358   * It also adds a back-reference to the hfile back-reference directory to simplify the
359   * reference-count and the cleaning process.
360   * @param conf          {@link Configuration} to read for the archive directory name
361   * @param fs            {@link FileSystem} on which to write the HFileLink
362   * @param dstFamilyPath - Destination path (table/region/cf/)
363   * @param linkedTable   - Linked Table Name
364   * @param linkedRegion  - Linked Region Name
365   * @param hfileName     - Linked HFile name
366   * @param createBackRef - Whether back reference should be created. Defaults to true.
367   * @return the file link name.
368   * @throws IOException on file or parent directory creation failure.
369   */
370  public static String create(final Configuration conf, final FileSystem fs,
371    final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
372    final String hfileName, final boolean createBackRef) throws IOException {
373    String familyName = dstFamilyPath.getName();
374    String regionName = dstFamilyPath.getParent().getName();
375    String tableName =
376      CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()).getNameAsString();
377
378    return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable,
379      linkedRegion, hfileName, createBackRef);
380  }
381
382  /**
383   * Create a new HFileLink
384   * <p>
385   * It also adds a back-reference to the hfile back-reference directory to simplify the
386   * reference-count and the cleaning process.
387   * @param conf          {@link Configuration} to read for the archive directory name
388   * @param fs            {@link FileSystem} on which to write the HFileLink
389   * @param dstFamilyPath - Destination path (table/region/cf/)
390   * @param dstTableName  - Destination table name
391   * @param dstRegionName - Destination region name
392   * @param linkedTable   - Linked Table Name
393   * @param linkedRegion  - Linked Region Name
394   * @param hfileName     - Linked HFile name
395   * @param createBackRef - Whether back reference should be created. Defaults to true.
396   * @return the file link name.
397   * @throws IOException on file or parent directory creation failure
398   */
399  public static String create(final Configuration conf, final FileSystem fs,
400    final Path dstFamilyPath, final String familyName, final String dstTableName,
401    final String dstRegionName, final TableName linkedTable, final String linkedRegion,
402    final String hfileName, final boolean createBackRef) throws IOException {
403    String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
404    String refName = createBackReferenceName(dstTableName, dstRegionName);
405
406    // Make sure the destination directory exists
407    fs.mkdirs(dstFamilyPath);
408
409    // Make sure the FileLink reference directory exists
410    Path archiveStoreDir =
411      HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName);
412    Path backRefPath = null;
413    if (createBackRef) {
414      Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
415      fs.mkdirs(backRefssDir);
416
417      // Create the reference for the link
418      backRefPath = new Path(backRefssDir, refName);
419      fs.createNewFile(backRefPath);
420    }
421    try {
422      // Create the link
423      if (fs.createNewFile(new Path(dstFamilyPath, name))) {
424        return name;
425      }
426    } catch (IOException e) {
427      LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
428      // Revert the reference if the link creation failed
429      if (createBackRef) {
430        fs.delete(backRefPath, false);
431      }
432      throw e;
433    }
434    throw new IOException(
435      "File link=" + name + " already exists under " + dstFamilyPath + " folder.");
436  }
437
438  /**
439   * Create a new HFileLink starting from a hfileLink name
440   * <p>
441   * It also adds a back-reference to the hfile back-reference directory to simplify the
442   * reference-count and the cleaning process.
443   * @param conf          {@link Configuration} to read for the archive directory name
444   * @param fs            {@link FileSystem} on which to write the HFileLink
445   * @param dstFamilyPath - Destination path (table/region/cf/)
446   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
447   * @param createBackRef - Whether back reference should be created. Defaults to true.
448   * @return the file link name.
449   * @throws IOException on file or parent directory creation failure.
450   */
451  public static String createFromHFileLink(final Configuration conf, final FileSystem fs,
452    final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
453    throws IOException {
454    Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
455    if (!m.matches()) {
456      throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
457    }
458    return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3),
459      m.group(4), createBackRef);
460  }
461
462  /**
463   * Create the back reference name
464   */
465  // package-private for testing
466  public static String createBackReferenceName(final String tableNameStr, final String regionName) {
467
468    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
469  }
470
471  /**
472   * Get the full path of the HFile referenced by the back reference
473   * @param rootDir     root hbase directory
474   * @param linkRefPath Link Back Reference path
475   * @return full path of the referenced hfile
476   */
477  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
478    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
479    TableName linkTableName = p.getFirst();
480    String linkRegionName = p.getSecond();
481
482    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
483    Path familyPath = linkRefPath.getParent().getParent();
484    Path regionPath = familyPath.getParent();
485    Path tablePath = regionPath.getParent();
486
487    String linkName =
488      createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName);
489    Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName);
490    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
491    return new Path(new Path(regionDir, familyPath.getName()), linkName);
492  }
493
494  public static Pair<TableName, String> parseBackReferenceName(String name) {
495    int separatorIndex = name.indexOf('.');
496    String linkRegionName = name.substring(0, separatorIndex);
497    String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM);
498    TableName linkTableName = TableName.valueOf(tableSubstr);
499    return new Pair<>(linkTableName, linkRegionName);
500  }
501
502  /**
503   * Get the full path of the HFile referenced by the back reference
504   * @param conf        {@link Configuration} to read for the archive directory name
505   * @param linkRefPath Link Back Reference path
506   * @return full path of the referenced hfile
507   * @throws IOException on unexpected error.
508   */
509  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
510    throws IOException {
511    return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath);
512  }
513
514}