001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.io.HFileLink.LINK_NAME_PATTERN;
021
022import edu.umd.cs.findbugs.annotations.Nullable;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.InterruptedIOException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.HashMap;
029import java.util.List;
030import java.util.Map;
031import java.util.Objects;
032import java.util.Optional;
033import java.util.UUID;
034import java.util.regex.Matcher;
035import org.apache.hadoop.conf.Configuration;
036import org.apache.hadoop.fs.FSDataInputStream;
037import org.apache.hadoop.fs.FSDataOutputStream;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.FileUtil;
041import org.apache.hadoop.fs.LocatedFileStatus;
042import org.apache.hadoop.fs.Path;
043import org.apache.hadoop.fs.permission.FsPermission;
044import org.apache.hadoop.hbase.Cell;
045import org.apache.hadoop.hbase.ExtendedCell;
046import org.apache.hadoop.hbase.HConstants;
047import org.apache.hadoop.hbase.PrivateCellUtil;
048import org.apache.hadoop.hbase.TableName;
049import org.apache.hadoop.hbase.backup.HFileArchiver;
050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
051import org.apache.hadoop.hbase.client.RegionInfo;
052import org.apache.hadoop.hbase.client.TableDescriptor;
053import org.apache.hadoop.hbase.fs.HFileSystem;
054import org.apache.hadoop.hbase.io.HFileLink;
055import org.apache.hadoop.hbase.io.Reference;
056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
057import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
058import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
059import org.apache.hadoop.hbase.util.Bytes;
060import org.apache.hadoop.hbase.util.CommonFSUtils;
061import org.apache.hadoop.hbase.util.FSUtils;
062import org.apache.hadoop.hbase.util.Pair;
063import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
064import org.apache.yetus.audience.InterfaceAudience;
065import org.slf4j.Logger;
066import org.slf4j.LoggerFactory;
067
068import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
069
070/**
071 * View to an on-disk Region. Provides the set of methods necessary to interact with the on-disk
072 * region data.
073 */
074@InterfaceAudience.Private
075public class HRegionFileSystem {
076  private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class);
077
078  /** Name of the region info file that resides just under the region directory. */
079  public final static String REGION_INFO_FILE = ".regioninfo";
080
081  /** Temporary subdirectory of the region directory used for merges. */
082  public static final String REGION_MERGES_DIR = ".merges";
083
084  /** Temporary subdirectory of the region directory used for splits. */
085  public static final String REGION_SPLITS_DIR = ".splits";
086
087  /** Temporary subdirectory of the region directory used for compaction output. */
088  static final String REGION_TEMP_DIR = ".tmp";
089
090  private final RegionInfo regionInfo;
091  // regionInfo for interacting with FS (getting encodedName, etc)
092  final RegionInfo regionInfoForFs;
093  final Configuration conf;
094  private final Path tableDir;
095  final FileSystem fs;
096  private final Path regionDir;
097
098  /**
099   * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
100   * client level.
101   */
102  private final int hdfsClientRetriesNumber;
103  private final int baseSleepBeforeRetries;
104  private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
105  private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
106
107  /**
108   * Create a view to the on-disk region
109   * @param conf       the {@link Configuration} to use
110   * @param fs         {@link FileSystem} that contains the region
111   * @param tableDir   {@link Path} to where the table is being stored
112   * @param regionInfo {@link RegionInfo} for region
113   */
114  HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
115    final RegionInfo regionInfo) {
116    this.fs = fs;
117    this.conf = conf;
118    this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null");
119    this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null");
120    this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
121    this.regionDir = FSUtils.getRegionDirFromTableDir(tableDir, regionInfo);
122    this.hdfsClientRetriesNumber =
123      conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
124    this.baseSleepBeforeRetries =
125      conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
126  }
127
128  /** Returns the underlying {@link FileSystem} */
129  public FileSystem getFileSystem() {
130    return this.fs;
131  }
132
133  /** Returns the {@link RegionInfo} that describe this on-disk region view */
134  public RegionInfo getRegionInfo() {
135    return this.regionInfo;
136  }
137
138  public RegionInfo getRegionInfoForFS() {
139    return this.regionInfoForFs;
140  }
141
142  /** Returns {@link Path} to the region's root directory. */
143  public Path getTableDir() {
144    return this.tableDir;
145  }
146
147  /** Returns {@link Path} to the region directory. */
148  public Path getRegionDir() {
149    return regionDir;
150  }
151
152  // ===========================================================================
153  // Temp Helpers
154  // ===========================================================================
155  /** Returns {@link Path} to the region's temp directory, used for file creations */
156  public Path getTempDir() {
157    return new Path(getRegionDir(), REGION_TEMP_DIR);
158  }
159
160  /**
161   * Clean up any temp detritus that may have been left around from previous operation attempts.
162   */
163  void cleanupTempDir() throws IOException {
164    deleteDir(getTempDir());
165  }
166
167  // ===========================================================================
168  // Store/StoreFile Helpers
169  // ===========================================================================
170  /**
171   * Returns the directory path of the specified family
172   * @param familyName Column Family Name
173   * @return {@link Path} to the directory of the specified family
174   */
175  public Path getStoreDir(final String familyName) {
176    return new Path(this.getRegionDir(), familyName);
177  }
178
179  /**
180   * @param tabledir {@link Path} to where the table is being stored
181   * @param hri      {@link RegionInfo} for the region.
182   * @param family   {@link ColumnFamilyDescriptor} describing the column family
183   * @return Path to family/Store home directory.
184   */
185  public static Path getStoreHomedir(final Path tabledir, final RegionInfo hri,
186    final byte[] family) {
187    return getStoreHomedir(tabledir, hri.getEncodedName(), family);
188  }
189
190  /**
191   * @param tabledir    {@link Path} to where the table is being stored
192   * @param encodedName Encoded region name.
193   * @param family      {@link ColumnFamilyDescriptor} describing the column family
194   * @return Path to family/Store home directory.
195   */
196  public static Path getStoreHomedir(final Path tabledir, final String encodedName,
197    final byte[] family) {
198    return new Path(tabledir, new Path(encodedName, Bytes.toString(family)));
199  }
200
201  /**
202   * Create the store directory for the specified family name
203   * @param familyName Column Family Name
204   * @return {@link Path} to the directory of the specified family
205   * @throws IOException if the directory creation fails.
206   */
207  Path createStoreDir(final String familyName) throws IOException {
208    Path storeDir = getStoreDir(familyName);
209    if (!fs.exists(storeDir) && !createDir(storeDir))
210      throw new IOException("Failed creating " + storeDir);
211    return storeDir;
212  }
213
214  /**
215   * Set the directory of CF to the specified storage policy. <br>
216   * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
217   * <i>"COLD"</i> <br>
218   * <br>
219   * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
220   * @param familyName The name of column family.
221   * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+
222   *                   org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD',
223   *                   'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
224   */
225  public void setStoragePolicy(String familyName, String policyName) {
226    CommonFSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName);
227  }
228
229  /**
230   * Set storage policy for a whole region. <br>
231   * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
232   * <i>"COLD"</i> <br>
233   * <br>
234   * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
235   * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+
236   *                   org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD',
237   *                   'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
238   */
239  public void setStoragePolicy(String policyName) {
240    CommonFSUtils.setStoragePolicy(this.fs, getRegionDir(), policyName);
241  }
242
243  /**
244   * Get the storage policy of the directory of CF.
245   * @param familyName The name of column family.
246   * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
247   *         thrown when trying to get policy
248   */
249  @Nullable
250  public String getStoragePolicyName(String familyName) {
251    if (this.fs instanceof HFileSystem) {
252      Path storeDir = getStoreDir(familyName);
253      return ((HFileSystem) this.fs).getStoragePolicyName(storeDir);
254    }
255
256    return null;
257  }
258
259  /**
260   * Returns the store files' LocatedFileStatus which available for the family. This methods
261   * performs the filtering based on the valid store files.
262   * @param familyName Column Family Name
263   * @return a list of store files' LocatedFileStatus for the specified family.
264   */
265  public static List<LocatedFileStatus> getStoreFilesLocatedStatus(final HRegionFileSystem regionfs,
266    final String familyName, final boolean validate) throws IOException {
267    Path familyDir = regionfs.getStoreDir(familyName);
268    List<LocatedFileStatus> locatedFileStatuses =
269      CommonFSUtils.listLocatedStatus(regionfs.getFileSystem(), familyDir);
270    if (locatedFileStatuses == null) {
271      if (LOG.isTraceEnabled()) {
272        LOG.trace("No StoreFiles for: " + familyDir);
273      }
274      return null;
275    }
276
277    List<LocatedFileStatus> validStoreFiles = Lists.newArrayList();
278    for (LocatedFileStatus status : locatedFileStatuses) {
279      if (validate && !StoreFileInfo.isValid(status)) {
280        // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to
281        // true, refer HBASE-23740
282        if (!HConstants.RECOVERED_HFILES_DIR.equals(status.getPath().getName())) {
283          LOG.warn("Invalid StoreFile: {}", status.getPath());
284        }
285      } else {
286        validStoreFiles.add(status);
287      }
288    }
289    return validStoreFiles;
290  }
291
292  /**
293   * Return Qualified Path of the specified family/file
294   * @param familyName Column Family Name
295   * @param fileName   File Name
296   * @return The qualified Path for the specified family/file
297   */
298  Path getStoreFilePath(final String familyName, final String fileName) {
299    Path familyDir = getStoreDir(familyName);
300    return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory());
301  }
302
303  /**
304   * Return the store file information of the specified family/file.
305   * @param familyName Column Family Name
306   * @param fileName   File Name
307   * @return The {@link StoreFileInfo} for the specified family/file
308   */
309  StoreFileInfo getStoreFileInfo(final String familyName, final String fileName,
310    final StoreFileTracker tracker) throws IOException {
311    Path familyDir = getStoreDir(familyName);
312    return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, regionInfoForFs,
313      familyName, new Path(familyDir, fileName), tracker);
314  }
315
316  /**
317   * Returns true if the specified family has reference files
318   * @param familyName Column Family Name
319   * @return true if family contains reference files
320   */
321  public boolean hasReferences(final String familyName) throws IOException {
322    Path storeDir = getStoreDir(familyName);
323    FileStatus[] files = CommonFSUtils.listStatus(fs, storeDir);
324    if (files != null) {
325      for (FileStatus stat : files) {
326        if (stat.isDirectory()) {
327          continue;
328        }
329        if (StoreFileInfo.isReference(stat.getPath())) {
330          LOG.trace("Reference {}", stat.getPath());
331          return true;
332        }
333      }
334    }
335    return false;
336  }
337
338  /** Returns the set of families present on disk n */
339  public Collection<String> getFamilies() throws IOException {
340    FileStatus[] fds =
341      CommonFSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
342    if (fds == null) return null;
343
344    ArrayList<String> families = new ArrayList<>(fds.length);
345    for (FileStatus status : fds) {
346      families.add(status.getPath().getName());
347    }
348
349    return families;
350  }
351
352  /**
353   * Remove the region family from disk, archiving the store files.
354   * @param familyName Column Family Name
355   * @throws IOException if an error occours during the archiving
356   */
357  public void deleteFamily(final String familyName) throws IOException {
358    // archive family store files
359    HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
360
361    // delete the family folder
362    Path familyDir = getStoreDir(familyName);
363    if (fs.exists(familyDir) && !deleteDir(familyDir))
364      throw new IOException("Could not delete family " + familyName + " from FileSystem for region "
365        + regionInfoForFs.getRegionNameAsString() + "(" + regionInfoForFs.getEncodedName() + ")");
366  }
367
368  /**
369   * Generate a unique file name, used by createTempName() and commitStoreFile()
370   * @param suffix extra information to append to the generated name
371   * @return Unique file name
372   */
373  private static String generateUniqueName(final String suffix) {
374    String name = UUID.randomUUID().toString().replaceAll("-", "");
375    if (suffix != null) name += suffix;
376    return name;
377  }
378
379  /**
380   * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file
381   * creation. <code>
382   * Path file = fs.createTempName();
383   * ...StoreFile.Writer(file)...
384   * fs.commitStoreFile("family", file);
385   * </code>
386   * @return Unique {@link Path} of the temporary file
387   */
388  public Path createTempName() {
389    return createTempName(null);
390  }
391
392  /**
393   * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file
394   * creation. <code>
395   * Path file = fs.createTempName();
396   * ...StoreFile.Writer(file)...
397   * fs.commitStoreFile("family", file);
398   * </code>
399   * @param suffix extra information to append to the generated name
400   * @return Unique {@link Path} of the temporary file
401   */
402  public Path createTempName(final String suffix) {
403    return new Path(getTempDir(), generateUniqueName(suffix));
404  }
405
406  /**
407   * Move the file from a build/temp location to the main family store directory.
408   * @param familyName Family that will gain the file
409   * @param buildPath  {@link Path} to the file to commit.
410   * @return The new {@link Path} of the committed file
411   */
412  public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
413    Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
414    return commitStoreFile(buildPath, dstPath);
415  }
416
417  /**
418   * Generate the filename in the main family store directory for moving the file from a build/temp
419   * location.
420   * @param familyName      Family that will gain the file
421   * @param buildPath       {@link Path} to the file to commit.
422   * @param seqNum          Sequence Number to append to the file name (less then 0 if no sequence
423   *                        number)
424   * @param generateNewName False if you want to keep the buildPath name
425   * @return The new {@link Path} of the to be committed file
426   */
427  private Path preCommitStoreFile(final String familyName, final Path buildPath, final long seqNum,
428    final boolean generateNewName) throws IOException {
429    Path storeDir = getStoreDir(familyName);
430    if (!fs.exists(storeDir) && !createDir(storeDir))
431      throw new IOException("Failed creating " + storeDir);
432
433    String name = buildPath.getName();
434    if (generateNewName) {
435      name = generateUniqueName((seqNum < 0) ? null : StoreFileInfo.formatBulkloadSeqId(seqNum));
436    }
437    Path dstPath = new Path(storeDir, name);
438    if (!fs.exists(buildPath)) {
439      throw new FileNotFoundException(buildPath.toString());
440    }
441    if (LOG.isDebugEnabled()) {
442      LOG.debug("Committing " + buildPath + " as " + dstPath);
443    }
444    return dstPath;
445  }
446
447  /*
448   * Moves file from staging dir to region dir
449   * @param buildPath {@link Path} to the file to commit.
450   * @param dstPath {@link Path} to the file under region dir
451   * @return The {@link Path} of the committed file
452   */
453  Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
454    // rename is not necessary in case of direct-insert stores
455    if (buildPath.equals(dstPath)) {
456      return dstPath;
457    }
458    // buildPath exists, therefore not doing an exists() check.
459    if (!rename(buildPath, dstPath)) {
460      throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
461    }
462    return dstPath;
463  }
464
465  /**
466   * Archives the specified store file from the specified family.
467   * @param familyName Family that contains the store files
468   * @param filePath   {@link Path} to the store file to remove
469   * @throws IOException if the archiving fails
470   */
471  public void removeStoreFile(final String familyName, final Path filePath) throws IOException {
472    HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs, this.tableDir,
473      Bytes.toBytes(familyName), filePath);
474  }
475
476  /**
477   * Closes and archives the specified store files from the specified family.
478   * @param familyName Family that contains the store files
479   * @param storeFiles set of store files to remove
480   * @throws IOException if the archiving fails
481   */
482  public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles)
483    throws IOException {
484    HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs, this.tableDir,
485      Bytes.toBytes(familyName), storeFiles);
486  }
487
488  /**
489   * Bulk load: Add a specified store file to the specified family. If the source file is on the
490   * same different file-system is moved from the source location to the destination location,
491   * otherwise is copied over.
492   * @param familyName Family that will gain the file
493   * @param srcPath    {@link Path} to the file to import
494   * @param seqNum     Bulk Load sequence number
495   * @return The destination {@link Path} of the bulk loaded file
496   */
497  Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
498    throws IOException {
499    // Copy the file if it's on another filesystem
500    FileSystem srcFs = srcPath.getFileSystem(conf);
501    srcPath = srcFs.resolvePath(srcPath);
502    FileSystem realSrcFs = srcPath.getFileSystem(conf);
503    FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem) fs).getBackingFs() : fs;
504
505    // We can't compare FileSystem instances as equals() includes UGI instance
506    // as part of the comparison and won't work when doing SecureBulkLoad
507    // TODO deal with viewFS
508    if (!FSUtils.isSameHdfs(conf, realSrcFs, desFs)) {
509      LOG.info("Bulk-load file " + srcPath + " is on different filesystem than "
510        + "the destination store. Copying file over to destination filesystem.");
511      Path tmpPath = createTempName();
512      FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf);
513      LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
514      srcPath = tmpPath;
515    }
516
517    return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
518  }
519
520  // ===========================================================================
521  // Splits Helpers
522  // ===========================================================================
523
524  public Path getSplitsDir(final RegionInfo hri) {
525    return new Path(getTableDir(), hri.getEncodedName());
526  }
527
528  /**
529   * Remove daughter region
530   * @param regionInfo daughter {@link RegionInfo}
531   */
532  void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
533    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
534    if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
535      throw new IOException("Failed delete of " + regionDir);
536    }
537  }
538
539  /**
540   * Commit a daughter region, moving it from the split temporary directory to the proper location
541   * in the filesystem.
542   * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
543   */
544  public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles,
545    MasterProcedureEnv env) throws IOException {
546    Path regionDir = this.getSplitsDir(regionInfo);
547    if (fs.exists(regionDir)) {
548      // Write HRI to a file in case we need to recover hbase:meta
549      Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
550      byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
551      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
552      HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
553        env.getMasterConfiguration(), fs, getTableDir(), regionInfo, false);
554      insertRegionFilesIntoStoreTracker(allRegionFiles, env, regionFs);
555    }
556    return regionDir;
557  }
558
559  private void insertRegionFilesIntoStoreTracker(List<Path> allFiles, MasterProcedureEnv env,
560    HRegionFileSystem regionFs) throws IOException {
561    TableDescriptor tblDesc =
562      env.getMasterServices().getTableDescriptors().get(regionInfo.getTable());
563    // we need to map trackers per store
564    Map<String, StoreFileTracker> trackerMap = new HashMap<>();
565    // we need to map store files per store
566    Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>();
567    for (Path file : allFiles) {
568      String familyName = file.getParent().getName();
569      trackerMap.computeIfAbsent(familyName, t -> StoreFileTrackerFactory.create(conf, tblDesc,
570        tblDesc.getColumnFamily(Bytes.toBytes(familyName)), regionFs));
571      fileInfoMap.computeIfAbsent(familyName, l -> new ArrayList<>());
572      List<StoreFileInfo> infos = fileInfoMap.get(familyName);
573      infos.add(trackerMap.get(familyName).getStoreFileInfo(file, true));
574    }
575    for (Map.Entry<String, StoreFileTracker> entry : trackerMap.entrySet()) {
576      entry.getValue().add(fileInfoMap.get(entry.getKey()));
577    }
578  }
579
580  /**
581   * Creates region split daughter directories under the table dir. If the daughter regions already
582   * exist, for example, in the case of a recovery from a previous failed split procedure, this
583   * method deletes the given region dir recursively, then recreates it again.
584   */
585  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException {
586    Path daughterADir = getSplitsDir(daughterA);
587    if (fs.exists(daughterADir) && !deleteDir(daughterADir)) {
588      throw new IOException("Failed deletion of " + daughterADir + " before creating them again.");
589
590    }
591    if (!createDir(daughterADir)) {
592      throw new IOException("Failed create of " + daughterADir);
593    }
594    Path daughterBDir = getSplitsDir(daughterB);
595    if (fs.exists(daughterBDir) && !deleteDir(daughterBDir)) {
596      throw new IOException("Failed deletion of " + daughterBDir + " before creating them again.");
597
598    }
599    if (!createDir(daughterBDir)) {
600      throw new IOException("Failed create of " + daughterBDir);
601    }
602  }
603
604  /**
605   * Write out a split reference. Package local so it doesnt leak out of regionserver.
606   * @param hri         {@link RegionInfo} of the destination
607   * @param familyName  Column Family Name
608   * @param f           File to split.
609   * @param splitRow    Split Row
610   * @param top         True if we are referring to the top half of the hfile.
611   * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if this
612   *                    method is invoked on the Master side, then the RegionSplitPolicy will NOT
613   *                    have a reference to a Region.
614   * @return Path to created reference.
615   */
616  public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow,
617    boolean top, RegionSplitPolicy splitPolicy, StoreFileTracker tracker) throws IOException {
618    Path splitDir = new Path(getSplitsDir(hri), familyName);
619    // Add the referred-to regions name as a dot separated suffix.
620    // See REF_NAME_REGEX regex above. The referred-to regions name is
621    // up in the path of the passed in <code>f</code> -- parentdir is family,
622    // then the directory above is the region name.
623    String parentRegionName = regionInfoForFs.getEncodedName();
624    // Write reference with same file id only with the other region name as
625    // suffix and into the new region location (under same family).
626    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
627    if (fs.exists(p)) {
628      LOG.warn("Found an already existing split file for {}. Assuming this is a recovery.", p);
629      return p;
630    }
631    boolean createLinkFile = false;
632    if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
633      // Check whether the split row lies in the range of the store file
634      // If it is outside the range, return directly.
635      f.initReader();
636      try {
637        Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
638        Optional<ExtendedCell> lastKey = f.getLastKey();
639        Optional<ExtendedCell> firstKey = f.getFirstKey();
640        if (top) {
641          // check if larger than last key.
642          // If lastKey is null means storefile is empty.
643          if (!lastKey.isPresent()) {
644            return null;
645          }
646          if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
647            return null;
648          }
649          if (firstKey.isPresent() && f.getComparator().compare(splitKey, firstKey.get()) <= 0) {
650            LOG.debug("Will create HFileLink file for {}, top=true", f.getPath());
651            createLinkFile = true;
652          }
653        } else {
654          // check if smaller than first key
655          // If firstKey is null means storefile is empty.
656          if (!firstKey.isPresent()) {
657            return null;
658          }
659          if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
660            return null;
661          }
662          if (lastKey.isPresent() && f.getComparator().compare(splitKey, lastKey.get()) >= 0) {
663            LOG.debug("Will create HFileLink file for {}, top=false", f.getPath());
664            createLinkFile = true;
665          }
666        }
667      } finally {
668        f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
669      }
670    }
671    if (createLinkFile) {
672      // create HFileLink file instead of Reference file for child
673      String hfileName = f.getPath().getName();
674      TableName linkedTable = regionInfoForFs.getTable();
675      String linkedRegion = regionInfoForFs.getEncodedName();
676      try {
677        if (HFileLink.isHFileLink(hfileName)) {
678          Matcher m = LINK_NAME_PATTERN.matcher(hfileName);
679          if (!m.matches()) {
680            throw new IllegalArgumentException(hfileName + " is not a valid HFileLink name!");
681          }
682          linkedTable = TableName.valueOf(m.group(1), m.group(2));
683          linkedRegion = m.group(3);
684          hfileName = m.group(4);
685        }
686        // must create back reference here
687        HFileLink.create(conf, fs, splitDir, familyName, hri.getTable().getNameAsString(),
688          hri.getEncodedName(), linkedTable, linkedRegion, hfileName, true);
689        Path path =
690          new Path(splitDir, HFileLink.createHFileLinkName(linkedTable, linkedRegion, hfileName));
691        LOG.info("Created linkFile:" + path.toString() + " for child: " + hri.getEncodedName()
692          + ", parent: " + regionInfoForFs.getEncodedName());
693        return path;
694      } catch (IOException e) {
695        // if create HFileLink file failed, then just skip the error and create Reference file
696        LOG.error("Create link file for " + hfileName + " for child " + hri.getEncodedName()
697          + "failed, will create Reference file", e);
698      }
699    }
700    // A reference to the bottom half of the hsf store file.
701    Reference r =
702      top ? Reference.createTopReference(splitRow) : Reference.createBottomReference(splitRow);
703    tracker.createReference(r, p);
704    return p;
705  }
706
707  // ===========================================================================
708  // Merge Helpers
709  // ===========================================================================
710
711  Path getMergesDir(final RegionInfo hri) {
712    return new Path(getTableDir(), hri.getEncodedName());
713  }
714
715  /**
716   * Remove merged region
717   * @param mergedRegion {@link RegionInfo}
718   */
719  public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException {
720    Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
721    if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
722      throw new IOException("Failed delete of " + regionDir);
723    }
724  }
725
726  static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
727    if (
728      FSUtils.isDistributedFileSystem(fs)
729        || !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)
730    ) {
731      return fs.mkdirs(dir);
732    }
733    FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
734    return fs.mkdirs(dir, perms);
735  }
736
737  /**
738   * Write out a merge reference under the given merges directory.
739   * @param mergingRegion {@link RegionInfo} for one of the regions being merged.
740   * @param familyName    Column Family Name
741   * @param f             File to create reference.
742   * @return Path to created reference.
743   * @throws IOException if the merge write fails.
744   */
745  public Path mergeStoreFile(RegionInfo mergingRegion, String familyName, HStoreFile f,
746    StoreFileTracker tracker) throws IOException {
747    Path referenceDir = new Path(getMergesDir(regionInfoForFs), familyName);
748    // A whole reference to the store file.
749    Reference r = Reference.createTopReference(mergingRegion.getStartKey());
750    // Add the referred-to regions name as a dot separated suffix.
751    // See REF_NAME_REGEX regex above. The referred-to regions name is
752    // up in the path of the passed in <code>f</code> -- parentdir is family,
753    // then the directory above is the region name.
754    String mergingRegionName = mergingRegion.getEncodedName();
755    // Write reference with same file id only with the other region name as
756    // suffix and into the new region location (under same family).
757    Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName);
758    tracker.createReference(r, p);
759    return p;
760  }
761
762  /**
763   * Commit a merged region, making it ready for use.
764   */
765  public void commitMergedRegion(List<Path> allMergedFiles, MasterProcedureEnv env)
766    throws IOException {
767    Path regionDir = getMergesDir(regionInfoForFs);
768    if (regionDir != null && fs.exists(regionDir)) {
769      // Write HRI to a file in case we need to recover hbase:meta
770      Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE);
771      byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
772      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
773      insertRegionFilesIntoStoreTracker(allMergedFiles, env, this);
774    }
775  }
776
777  // ===========================================================================
778  // Create/Open/Delete Helpers
779  // ===========================================================================
780
781  /** Returns Content of the file we write out to the filesystem under a region */
782  private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException {
783    return RegionInfo.toDelimitedByteArray(hri);
784  }
785
786  /**
787   * Create a {@link RegionInfo} from the serialized version on-disk.
788   * @param fs        {@link FileSystem} that contains the Region Info file
789   * @param regionDir {@link Path} to the Region Directory that contains the Info file
790   * @return An {@link RegionInfo} instance gotten from the Region Info file.
791   * @throws IOException if an error occurred during file open/read operation.
792   */
793  public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
794    throws IOException {
795    FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
796    try {
797      return RegionInfo.parseFrom(in);
798    } finally {
799      in.close();
800    }
801  }
802
803  /**
804   * Write the .regioninfo file on-disk.
805   * <p/>
806   * Overwrites if exists already.
807   */
808  private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
809    final Path regionInfoFile, final byte[] content) throws IOException {
810    // First check to get the permissions
811    FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
812    // Write the RegionInfo file content
813    try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) {
814      out.write(content);
815    }
816  }
817
818  /**
819   * Write out an info file under the stored region directory. Useful recovering mangled regions. If
820   * the regionInfo already exists on-disk, then we fast exit.
821   */
822  void checkRegionInfoOnFilesystem() throws IOException {
823    // Compose the content of the file so we can compare to length in filesystem. If not same,
824    // rewrite it (it may have been written in the old format using Writables instead of pb). The
825    // pb version is much shorter -- we write now w/o the toString version -- so checking length
826    // only should be sufficient. I don't want to read the file every time to check if it pb
827    // serialized.
828    byte[] content = getRegionInfoFileContent(regionInfoForFs);
829
830    // Verify if the region directory exists before opening a region. We need to do this since if
831    // the region directory doesn't exist we will re-create the region directory and a new HRI
832    // when HRegion.openHRegion() is called.
833    try {
834      FileStatus status = fs.getFileStatus(getRegionDir());
835    } catch (FileNotFoundException e) {
836      LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName()
837        + " on table " + regionInfo.getTable());
838    }
839
840    try {
841      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
842      FileStatus status = fs.getFileStatus(regionInfoFile);
843      if (status != null && status.getLen() == content.length) {
844        // Then assume the content good and move on.
845        // NOTE: that the length is not sufficient to define the the content matches.
846        return;
847      }
848
849      LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
850      if (!fs.delete(regionInfoFile, false)) {
851        throw new IOException("Unable to remove existing " + regionInfoFile);
852      }
853    } catch (FileNotFoundException e) {
854      LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName()
855        + " on table " + regionInfo.getTable());
856    }
857
858    // Write HRI to a file in case we need to recover hbase:meta
859    writeRegionInfoOnFilesystem(content, true);
860  }
861
862  /**
863   * Write out an info file under the region directory. Useful recovering mangled regions.
864   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
865   */
866  private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
867    byte[] content = getRegionInfoFileContent(regionInfoForFs);
868    writeRegionInfoOnFilesystem(content, useTempDir);
869  }
870
871  /**
872   * Write out an info file under the region directory. Useful recovering mangled regions.
873   * @param regionInfoContent serialized version of the {@link RegionInfo}
874   * @param useTempDir        indicate whether or not using the region .tmp dir for a safer file
875   *                          creation.
876   */
877  private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final boolean useTempDir)
878    throws IOException {
879    Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
880    if (useTempDir) {
881      // Create in tmpDir and then move into place in case we crash after
882      // create but before close. If we don't successfully close the file,
883      // subsequent region reopens will fail the below because create is
884      // registered in NN.
885
886      // And then create the file
887      Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
888
889      // If datanode crashes or if the RS goes down just before the close is called while trying to
890      // close the created regioninfo file in the .tmp directory then on next
891      // creation we will be getting AlreadyCreatedException.
892      // Hence delete and create the file if exists.
893      if (CommonFSUtils.isExists(fs, tmpPath)) {
894        CommonFSUtils.delete(fs, tmpPath, true);
895      }
896
897      // Write HRI to a file in case we need to recover hbase:meta
898      writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
899
900      // Move the created file to the original path
901      if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) {
902        throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
903      }
904    } else {
905      // Write HRI to a file in case we need to recover hbase:meta
906      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
907    }
908  }
909
910  /**
911   * Create a new Region on file-system.
912   * @param conf       the {@link Configuration} to use
913   * @param fs         {@link FileSystem} from which to add the region
914   * @param tableDir   {@link Path} to where the table is being stored
915   * @param regionInfo {@link RegionInfo} for region to be added
916   * @throws IOException if the region creation fails due to a FileSystem exception.
917   */
918  public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
919    final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
920    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
921
922    // We only create a .regioninfo and the region directory if this is the default region replica
923    if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
924      Path regionDir = regionFs.getRegionDir();
925      if (fs.exists(regionDir)) {
926        LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
927      } else {
928        // Create the region directory
929        if (!createDirOnFileSystem(fs, conf, regionDir)) {
930          LOG.warn("Unable to create the region directory: " + regionDir);
931          throw new IOException("Unable to create region directory: " + regionDir);
932        }
933      }
934
935      // Write HRI to a file in case we need to recover hbase:meta
936      regionFs.writeRegionInfoOnFilesystem(false);
937    } else {
938      if (LOG.isDebugEnabled())
939        LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
940    }
941    return regionFs;
942  }
943
944  /**
945   * Open Region from file-system.
946   * @param conf       the {@link Configuration} to use
947   * @param fs         {@link FileSystem} from which to add the region
948   * @param tableDir   {@link Path} to where the table is being stored
949   * @param regionInfo {@link RegionInfo} for region to be added
950   * @param readOnly   True if you don't want to edit the region data
951   * @throws IOException if the region creation fails due to a FileSystem exception.
952   */
953  public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
954    final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly)
955    throws IOException {
956    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
957    Path regionDir = regionFs.getRegionDir();
958
959    if (!fs.exists(regionDir)) {
960      LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
961      throw new IOException("The specified region do not exists on disk: " + regionDir);
962    }
963
964    if (!readOnly) {
965      // Cleanup temporary directories
966      regionFs.cleanupTempDir();
967
968      // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
969      // Only create HRI if we are the default replica
970      if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
971        regionFs.checkRegionInfoOnFilesystem();
972      } else {
973        if (LOG.isDebugEnabled()) {
974          LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
975        }
976      }
977    }
978
979    return regionFs;
980  }
981
982  /**
983   * Remove the region from the table directory, archiving the region's hfiles.
984   * @param conf       the {@link Configuration} to use
985   * @param fs         {@link FileSystem} from which to remove the region
986   * @param tableDir   {@link Path} to where the table is being stored
987   * @param regionInfo {@link RegionInfo} for region to be deleted
988   * @throws IOException if the request cannot be completed
989   */
990  public static void deleteRegionFromFileSystem(final Configuration conf, final FileSystem fs,
991    final Path tableDir, final RegionInfo regionInfo) throws IOException {
992    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
993    Path regionDir = regionFs.getRegionDir();
994
995    if (!fs.exists(regionDir)) {
996      LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
997      return;
998    }
999
1000    if (LOG.isDebugEnabled()) {
1001      LOG.debug("DELETING region " + regionDir);
1002    }
1003
1004    // Archive region
1005    Path rootDir = CommonFSUtils.getRootDir(conf);
1006    HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
1007
1008    // Delete empty region dir
1009    if (!fs.delete(regionDir, true)) {
1010      LOG.warn("Failed delete of " + regionDir);
1011    }
1012  }
1013
1014  /**
1015   * Creates a directory. Assumes the user has already checked for this directory existence.
1016   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1017   *         whether the directory exists or not, and returns true if it exists.
1018   */
1019  boolean createDir(Path dir) throws IOException {
1020    int i = 0;
1021    IOException lastIOE = null;
1022    do {
1023      try {
1024        return mkdirs(fs, conf, dir);
1025      } catch (IOException ioe) {
1026        lastIOE = ioe;
1027        if (fs.exists(dir)) return true; // directory is present
1028        try {
1029          sleepBeforeRetry("Create Directory", i + 1);
1030        } catch (InterruptedException e) {
1031          throw (InterruptedIOException) new InterruptedIOException().initCause(e);
1032        }
1033      }
1034    } while (++i <= hdfsClientRetriesNumber);
1035    throw new IOException("Exception in createDir", lastIOE);
1036  }
1037
1038  /**
1039   * Renames a directory. Assumes the user has already checked for this directory existence.
1040   * @return true if rename is successful.
1041   */
1042  boolean rename(Path srcpath, Path dstPath) throws IOException {
1043    IOException lastIOE = null;
1044    int i = 0;
1045    do {
1046      try {
1047        return fs.rename(srcpath, dstPath);
1048      } catch (IOException ioe) {
1049        lastIOE = ioe;
1050        if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1051        // dir is not there, retry after some time.
1052        try {
1053          sleepBeforeRetry("Rename Directory", i + 1);
1054        } catch (InterruptedException e) {
1055          throw (InterruptedIOException) new InterruptedIOException().initCause(e);
1056        }
1057      }
1058    } while (++i <= hdfsClientRetriesNumber);
1059
1060    throw new IOException("Exception in rename", lastIOE);
1061  }
1062
1063  /**
1064   * Deletes a directory. Assumes the user has already checked for this directory existence.
1065   * @return true if the directory is deleted.
1066   */
1067  boolean deleteDir(Path dir) throws IOException {
1068    IOException lastIOE = null;
1069    int i = 0;
1070    do {
1071      try {
1072        return fs.delete(dir, true);
1073      } catch (IOException ioe) {
1074        lastIOE = ioe;
1075        if (!fs.exists(dir)) return true;
1076        // dir is there, retry deleting after some time.
1077        try {
1078          sleepBeforeRetry("Delete Directory", i + 1);
1079        } catch (InterruptedException e) {
1080          throw (InterruptedIOException) new InterruptedIOException().initCause(e);
1081        }
1082      }
1083    } while (++i <= hdfsClientRetriesNumber);
1084
1085    throw new IOException("Exception in DeleteDir", lastIOE);
1086  }
1087
1088  /**
1089   * sleeping logic; handles the interrupt exception.
1090   */
1091  private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1092    sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1093  }
1094
1095  /**
1096   * Creates a directory for a filesystem and configuration object. Assumes the user has already
1097   * checked for this directory existence.
1098   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1099   *         whether the directory exists or not, and returns true if it exists.
1100   */
1101  private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1102    throws IOException {
1103    int i = 0;
1104    IOException lastIOE = null;
1105    int hdfsClientRetriesNumber =
1106      conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1107    int baseSleepBeforeRetries =
1108      conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1109    do {
1110      try {
1111        return fs.mkdirs(dir);
1112      } catch (IOException ioe) {
1113        lastIOE = ioe;
1114        if (fs.exists(dir)) return true; // directory is present
1115        try {
1116          sleepBeforeRetry("Create Directory", i + 1, baseSleepBeforeRetries,
1117            hdfsClientRetriesNumber);
1118        } catch (InterruptedException e) {
1119          throw (InterruptedIOException) new InterruptedIOException().initCause(e);
1120        }
1121      }
1122    } while (++i <= hdfsClientRetriesNumber);
1123
1124    throw new IOException("Exception in createDir", lastIOE);
1125  }
1126
1127  /**
1128   * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1129   * for this to avoid re-looking for the integer values.
1130   */
1131  private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1132    int hdfsClientRetriesNumber) throws InterruptedException {
1133    if (sleepMultiplier > hdfsClientRetriesNumber) {
1134      if (LOG.isDebugEnabled()) {
1135        LOG.debug(msg + ", retries exhausted");
1136      }
1137      return;
1138    }
1139    if (LOG.isDebugEnabled()) {
1140      LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1141    }
1142    Thread.sleep((long) baseSleepBeforeRetries * sleepMultiplier);
1143  }
1144
1145  public static HRegionFileSystem create(final Configuration conf, final FileSystem fs,
1146    final Path tableDir, final RegionInfo regionInfo) throws IOException {
1147    return new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1148  }
1149}