001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io;
019
020import java.io.IOException;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.fs.Path;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.RegionInfo;
028import org.apache.hadoop.hbase.client.RegionInfoBuilder;
029import org.apache.hadoop.hbase.mob.MobConstants;
030import org.apache.hadoop.hbase.regionserver.HRegion;
031import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
032import org.apache.hadoop.hbase.util.CommonFSUtils;
033import org.apache.hadoop.hbase.util.HFileArchiveUtil;
034import org.apache.hadoop.hbase.util.Pair;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile
041 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of
042 * the location where it is.
043 * <p>
044 * Searches for hfiles in the following order and locations:
045 * <ul>
046 * <li>/hbase/table/region/cf/hfile</li>
047 * <li>/hbase/.archive/table/region/cf/hfile</li>
048 * </ul>
049 * The link checks first in the original path if it is not present it fallbacks to the archived
050 * path.
051 */
052@InterfaceAudience.Private
053@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS",
054    justification = "To be fixed but warning suppressed for now")
055public class HFileLink extends FileLink {
056  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
057
058  /**
059   * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link
060   * to an hfile in a different table/region and the name is in the form: table=region-hfile.
061   * <p>
062   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
063   * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for
064   * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and
065   * the bulk loaded (_SeqId_[0-9]+_) hfiles.
066   * <p>
067   * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and
068   * '4567' is region name and 'abcd' is filename.
069   */
070  public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s",
071    TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
072    RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
073
074  /** Define the HFile Link name parser in the form of: table=region-hfile */
075  public static final Pattern LINK_NAME_PATTERN =
076    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
077      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
078      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
079
080  /**
081   * The pattern should be used for hfile and reference links that can be found in
082   * /hbase/table/region/family/
083   */
084  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
085    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX,
086      TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
087
088  private final Path archivePath;
089  private final Path originPath;
090  private final Path mobPath;
091  private final Path tempPath;
092
093  /**
094   * Dead simple hfile link constructor
095   */
096  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
097    final Path archivePath) {
098    this.tempPath = tempPath;
099    this.originPath = originPath;
100    this.mobPath = mobPath;
101    this.archivePath = archivePath;
102    setLocations(originPath, archivePath, tempPath, mobPath);
103  }
104
105  /**
106   * @param conf             {@link Configuration} from which to extract specific archive locations
107   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
108   * @throws IOException on unexpected error.
109   */
110  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
111    throws IOException {
112    return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf),
113      HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
114  }
115
116  /**
117   * @param rootDir          Path to the root directory where hbase files are stored
118   * @param archiveDir       Path to the hbase archive directory
119   * @param hFileLinkPattern The path of the HFile Link.
120   */
121  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir,
122    final Path hFileLinkPattern) {
123    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
124    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
125    Path originPath = new Path(rootDir, hfilePath);
126    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
127    Path archivePath = new Path(archiveDir, hfilePath);
128    return new HFileLink(originPath, tempPath, mobPath, archivePath);
129  }
130
131  /**
132   * Create an HFileLink relative path for the table/region/family/hfile location
133   * @param table  Table name
134   * @param region Region Name
135   * @param family Family Name
136   * @param hfile  HFile Name
137   * @return the relative Path to open the specified table/region/family/hfile link
138   */
139  public static Path createPath(final TableName table, final String region, final String family,
140    final String hfile) {
141    if (HFileLink.isHFileLink(hfile)) {
142      return new Path(family, hfile);
143    }
144    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
145  }
146
147  /**
148   * Create an HFileLink instance from table/region/family/hfile location
149   * @param conf   {@link Configuration} from which to extract specific archive locations
150   * @param table  Table name
151   * @param region Region Name
152   * @param family Family Name
153   * @param hfile  HFile Name
154   * @return Link to the file with the specified table/region/family/hfile location
155   * @throws IOException on unexpected error.
156   */
157  public static HFileLink build(final Configuration conf, final TableName table,
158    final String region, final String family, final String hfile) throws IOException {
159    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
160  }
161
162  /** Returns the origin path of the hfile. */
163  public Path getOriginPath() {
164    return this.originPath;
165  }
166
167  /** Returns the path of the archived hfile. */
168  public Path getArchivePath() {
169    return this.archivePath;
170  }
171
172  /** Returns the path of the mob hfiles. */
173  public Path getMobPath() {
174    return this.mobPath;
175  }
176
177  /**
178   * @param path Path to check.
179   * @return True if the path is a HFileLink.
180   */
181  public static boolean isHFileLink(final Path path) {
182    return isHFileLink(path.getName());
183  }
184
185  /**
186   * @param fileName File name to check.
187   * @return True if the path is a HFileLink.
188   */
189  public static boolean isHFileLink(String fileName) {
190    // The LINK_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail
191    // on a simple heuristic first. The regex contains a literal "=", so if that character
192    // isn't in the fileName, then the regex cannot match.
193    if (!fileName.contains("=")) {
194      return false;
195    }
196
197    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
198    if (!m.matches()) {
199      return false;
200    }
201    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
202  }
203
204  /**
205   * Convert a HFileLink path to a table relative path. e.g. the link:
206   * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd
207   * @param path HFileLink path
208   * @return Relative table path
209   * @throws IOException on unexpected error.
210   */
211  private static Path getHFileLinkPatternRelativePath(final Path path) {
212    // table=region-hfile
213    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
214    if (!m.matches()) {
215      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
216    }
217
218    // Convert the HFileLink name into a real table/region/cf/hfile path.
219    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
220    String regionName = m.group(3);
221    String hfileName = m.group(4);
222    String familyName = path.getParent().getName();
223    Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName);
224    return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName)));
225  }
226
227  /**
228   * Get the HFile name of the referenced link
229   * @param fileName HFileLink file name
230   * @return the name of the referenced HFile
231   */
232  public static String getReferencedHFileName(final String fileName) {
233    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
234    if (!m.matches()) {
235      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
236    }
237    return (m.group(4));
238  }
239
240  /**
241   * Get the Region name of the referenced link
242   * @param fileName HFileLink file name
243   * @return the name of the referenced Region
244   */
245  public static String getReferencedRegionName(final String fileName) {
246    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
247    if (!m.matches()) {
248      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
249    }
250    return (m.group(3));
251  }
252
253  /**
254   * Get the Table name of the referenced link
255   * @param fileName HFileLink file name
256   * @return the name of the referenced Table
257   */
258  public static TableName getReferencedTableName(final String fileName) {
259    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
260    if (!m.matches()) {
261      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
262    }
263    return (TableName.valueOf(m.group(1), m.group(2)));
264  }
265
266  /**
267   * Create a new HFileLink name
268   * @param hfileRegionInfo - Linked HFile Region Info
269   * @param hfileName       - Linked HFile name
270   * @return file name of the HFile Link
271   */
272  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
273    final String hfileName) {
274    return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(),
275      hfileName);
276  }
277
278  /**
279   * Create a new HFileLink name
280   * @param tableName  - Linked HFile table name
281   * @param regionName - Linked HFile region name
282   * @param hfileName  - Linked HFile name
283   * @return file name of the HFile Link
284   */
285  public static String createHFileLinkName(final TableName tableName, final String regionName,
286    final String hfileName) {
287    String s = String.format("%s=%s-%s",
288      tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName);
289    return s;
290  }
291
292  /**
293   * Create the back reference name
294   */
295  // package-private for testing
296  public static String createBackReferenceName(final String tableNameStr, final String regionName) {
297
298    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
299  }
300
301  /**
302   * Get the full path of the HFile referenced by the back reference
303   * @param rootDir     root hbase directory
304   * @param linkRefPath Link Back Reference path
305   * @return full path of the referenced hfile
306   */
307  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
308    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
309    TableName linkTableName = p.getFirst();
310    String linkRegionName = p.getSecond();
311
312    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
313    Path familyPath = linkRefPath.getParent().getParent();
314    Path regionPath = familyPath.getParent();
315    Path tablePath = regionPath.getParent();
316
317    String linkName =
318      createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName);
319    Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName);
320    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
321    return new Path(new Path(regionDir, familyPath.getName()), linkName);
322  }
323
324  public static Pair<TableName, String> parseBackReferenceName(String name) {
325    int separatorIndex = name.indexOf('.');
326    String linkRegionName = name.substring(0, separatorIndex);
327    String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM);
328    TableName linkTableName = TableName.valueOf(tableSubstr);
329    return new Pair<>(linkTableName, linkRegionName);
330  }
331
332  /**
333   * Get the full path of the HFile referenced by the back reference
334   * @param conf        {@link Configuration} to read for the archive directory name
335   * @param linkRefPath Link Back Reference path
336   * @return full path of the referenced hfile
337   * @throws IOException on unexpected error.
338   */
339  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
340    throws IOException {
341    return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath);
342  }
343
344}