001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mob;
019
020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND;
022
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.nio.ByteBuffer;
026import java.text.ParseException;
027import java.text.SimpleDateFormat;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Calendar;
031import java.util.Collection;
032import java.util.Date;
033import java.util.List;
034import java.util.Optional;
035import java.util.UUID;
036import java.util.function.Consumer;
037import org.apache.hadoop.conf.Configuration;
038import org.apache.hadoop.fs.FileStatus;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.fs.Path;
041import org.apache.hadoop.hbase.Cell;
042import org.apache.hadoop.hbase.ExtendedCell;
043import org.apache.hadoop.hbase.HConstants;
044import org.apache.hadoop.hbase.PrivateCellUtil;
045import org.apache.hadoop.hbase.TableName;
046import org.apache.hadoop.hbase.Tag;
047import org.apache.hadoop.hbase.TagType;
048import org.apache.hadoop.hbase.TagUtil;
049import org.apache.hadoop.hbase.backup.HFileArchiver;
050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
051import org.apache.hadoop.hbase.client.RegionInfo;
052import org.apache.hadoop.hbase.client.RegionInfoBuilder;
053import org.apache.hadoop.hbase.client.Scan;
054import org.apache.hadoop.hbase.client.TableDescriptor;
055import org.apache.hadoop.hbase.io.HFileLink;
056import org.apache.hadoop.hbase.io.compress.Compression;
057import org.apache.hadoop.hbase.io.crypto.Encryption;
058import org.apache.hadoop.hbase.io.hfile.CacheConfig;
059import org.apache.hadoop.hbase.io.hfile.HFile;
060import org.apache.hadoop.hbase.io.hfile.HFileContext;
061import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
062import org.apache.hadoop.hbase.regionserver.BloomType;
063import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
064import org.apache.hadoop.hbase.regionserver.HStoreFile;
065import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
066import org.apache.hadoop.hbase.regionserver.StoreUtils;
067import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
068import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
069import org.apache.hadoop.hbase.util.Bytes;
070import org.apache.hadoop.hbase.util.ChecksumType;
071import org.apache.hadoop.hbase.util.CommonFSUtils;
072import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
073import org.apache.yetus.audience.InterfaceAudience;
074import org.slf4j.Logger;
075import org.slf4j.LoggerFactory;
076
077import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap;
078import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap;
079
080/**
081 * The mob utilities
082 */
083@InterfaceAudience.Private
084public final class MobUtils {
085
086  private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class);
087  public static final String SEP = "_";
088
089  private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT =
090    new ThreadLocal<SimpleDateFormat>() {
091      @Override
092      protected SimpleDateFormat initialValue() {
093        return new SimpleDateFormat("yyyyMMdd");
094      }
095    };
096
097  /**
098   * Private constructor to keep this class from being instantiated.
099   */
100  private MobUtils() {
101  }
102
103  /**
104   * Formats a date to a string.
105   * @param date The date.
106   * @return The string format of the date, it's yyyymmdd.
107   */
108  public static String formatDate(Date date) {
109    return LOCAL_FORMAT.get().format(date);
110  }
111
112  /**
113   * Parses the string to a date.
114   * @param dateString The string format of a date, it's yyyymmdd.
115   * @return A date.
116   */
117  public static Date parseDate(String dateString) throws ParseException {
118    return LOCAL_FORMAT.get().parse(dateString);
119  }
120
121  /**
122   * Whether the current cell is a mob reference cell.
123   * @param cell The current cell.
124   * @return True if the cell has a mob reference tag, false if it doesn't.
125   */
126  public static boolean isMobReferenceCell(ExtendedCell cell) {
127    if (cell.getTagsLength() > 0) {
128      Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE);
129      if (tag.isPresent()) {
130        return true;
131      }
132    }
133    return false;
134  }
135
136  /**
137   * Gets the table name tag.
138   * @param cell The current cell.
139   * @return The table name tag.
140   */
141  private static Optional<Tag> getTableNameTag(ExtendedCell cell) {
142    Optional<Tag> tag = Optional.empty();
143    if (cell.getTagsLength() > 0) {
144      tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE);
145    }
146    return tag;
147  }
148
149  /**
150   * Gets the table name from when this cell was written into a mob hfile as a string.
151   * @param cell to extract tag from
152   * @return table name as a string. empty if the tag is not found.
153   */
154  public static Optional<String> getTableNameString(ExtendedCell cell) {
155    Optional<Tag> tag = getTableNameTag(cell);
156    Optional<String> name = Optional.empty();
157    if (tag.isPresent()) {
158      name = Optional.of(Tag.getValueAsString(tag.get()));
159    }
160    return name;
161  }
162
163  /**
164   * Get the table name from when this cell was written into a mob hfile as a TableName.
165   * @param cell to extract tag from
166   * @return name of table as a TableName. empty if the tag is not found.
167   */
168  public static Optional<TableName> getTableName(ExtendedCell cell) {
169    Optional<Tag> maybe = getTableNameTag(cell);
170    Optional<TableName> name = Optional.empty();
171    if (maybe.isPresent()) {
172      final Tag tag = maybe.get();
173      if (tag.hasArray()) {
174        name = Optional
175          .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength()));
176      } else {
177        // TODO ByteBuffer handling in tags looks busted. revisit.
178        ByteBuffer buffer = tag.getValueByteBuffer().duplicate();
179        buffer.mark();
180        buffer.position(tag.getValueOffset());
181        buffer.limit(tag.getValueOffset() + tag.getValueLength());
182        name = Optional.of(TableName.valueOf(buffer));
183      }
184    }
185    return name;
186  }
187
188  /**
189   * Whether the tag list has a mob reference tag.
190   * @param tags The tag list.
191   * @return True if the list has a mob reference tag, false if it doesn't.
192   */
193  public static boolean hasMobReferenceTag(List<Tag> tags) {
194    if (!tags.isEmpty()) {
195      for (Tag tag : tags) {
196        if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) {
197          return true;
198        }
199      }
200    }
201    return false;
202  }
203
204  /**
205   * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw"
206   * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob
207   * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the
208   * mob file.
209   * @param scan The current scan.
210   * @return True if it's a raw scan.
211   */
212  public static boolean isRawMobScan(Scan scan) {
213    byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW);
214    try {
215      return raw != null && Bytes.toBoolean(raw);
216    } catch (IllegalArgumentException e) {
217      return false;
218    }
219  }
220
221  /**
222   * Indicates whether it's a reference only scan. The information is set in the attribute
223   * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are
224   * returned.
225   * @param scan The current scan.
226   * @return True if it's a ref only scan.
227   */
228  public static boolean isRefOnlyScan(Scan scan) {
229    byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY);
230    try {
231      return refOnly != null && Bytes.toBoolean(refOnly);
232    } catch (IllegalArgumentException e) {
233      return false;
234    }
235  }
236
237  /**
238   * Indicates whether the scan contains the information of caching blocks. The information is set
239   * in the attribute "hbase.mob.cache.blocks" of scan.
240   * @param scan The current scan.
241   * @return True when the Scan attribute specifies to cache the MOB blocks.
242   */
243  public static boolean isCacheMobBlocks(Scan scan) {
244    byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS);
245    try {
246      return cache != null && Bytes.toBoolean(cache);
247    } catch (IllegalArgumentException e) {
248      return false;
249    }
250  }
251
252  /**
253   * Sets the attribute of caching blocks in the scan.
254   * @param scan        The current scan.
255   * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with
256   *                    this scan caches blocks. False, the scanner doesn't cache blocks for this
257   *                    scan.
258   */
259  public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) {
260    scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks));
261  }
262
263  /**
264   * Cleans the expired mob files. Cleans the files whose creation date is older than (current -
265   * columnFamily.ttl), and the minVersions of that column family is 0.
266   * @param fs               The current file system.
267   * @param conf             The current configuration.
268   * @param tableName        The current table name.
269   * @param columnDescriptor The descriptor of the current column family.
270   * @param cacheConfig      The cacheConfig that disables the block cache.
271   * @param current          The current time.
272   */
273  public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableDescriptor htd,
274    ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current)
275    throws IOException {
276    long timeToLive = columnDescriptor.getTimeToLive();
277    if (Integer.MAX_VALUE == timeToLive) {
278      // no need to clean, because the TTL is not set.
279      return;
280    }
281
282    Calendar calendar = Calendar.getInstance();
283    calendar.setTimeInMillis(current - timeToLive * 1000);
284    calendar.set(Calendar.HOUR_OF_DAY, 0);
285    calendar.set(Calendar.MINUTE, 0);
286    calendar.set(Calendar.SECOND, 0);
287    calendar.set(Calendar.MILLISECOND, 0);
288
289    Date expireDate = calendar.getTime();
290
291    LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!");
292
293    FileStatus[] stats = null;
294    TableName tableName = htd.getTableName();
295    Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName);
296    HRegionFileSystem regionFS =
297      HRegionFileSystem.create(conf, fs, mobTableDir, getMobRegionInfo(tableName));
298    StoreFileTracker sft = StoreFileTrackerFactory.create(conf, htd, columnDescriptor, regionFS);
299    Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString());
300    try {
301      stats = fs.listStatus(path);
302    } catch (FileNotFoundException e) {
303      LOG.warn("Failed to find the mob file " + path, e);
304    }
305    if (null == stats) {
306      // no file found
307      return;
308    }
309    List<HStoreFile> filesToClean = new ArrayList<>();
310    int deletedFileCount = 0;
311    for (FileStatus file : stats) {
312      String fileName = file.getPath().getName();
313      try {
314        if (HFileLink.isHFileLink(file.getPath())) {
315          HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
316          fileName = hfileLink.getOriginPath().getName();
317        }
318
319        Date fileDate = parseDate(MobFileName.getDateFromName(fileName));
320
321        if (LOG.isDebugEnabled()) {
322          LOG.debug("Checking file {}", fileName);
323        }
324        if (fileDate.getTime() < expireDate.getTime()) {
325          if (LOG.isDebugEnabled()) {
326            LOG.debug("{} is an expired file", fileName);
327          }
328          filesToClean
329            .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true, sft));
330          if (
331            filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND,
332              DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND)
333          ) {
334            if (
335              removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(),
336                filesToClean)
337            ) {
338              deletedFileCount += filesToClean.size();
339            }
340            filesToClean.clear();
341          }
342        }
343      } catch (Exception e) {
344        LOG.error("Cannot parse the fileName " + fileName, e);
345      }
346    }
347    if (
348      !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir,
349        columnDescriptor.getName(), filesToClean)
350    ) {
351      deletedFileCount += filesToClean.size();
352    }
353    LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount);
354  }
355
356  /**
357   * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir.
358   * @param conf The current configuration.
359   * @return the root dir of the mob file.
360   */
361  public static Path getMobHome(Configuration conf) {
362    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
363    return getMobHome(hbaseDir);
364  }
365
366  /**
367   * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir.
368   * @param rootDir The qualified path of HBase root directory.
369   * @return The root dir of the mob file.
370   */
371  public static Path getMobHome(Path rootDir) {
372    return new Path(rootDir, MobConstants.MOB_DIR_NAME);
373  }
374
375  /**
376   * Gets the qualified root dir of the mob files.
377   * @param conf The current configuration.
378   * @return The qualified root dir.
379   */
380  public static Path getQualifiedMobRootDir(Configuration conf) throws IOException {
381    Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR));
382    Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME);
383    FileSystem fs = mobRootDir.getFileSystem(conf);
384    return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory());
385  }
386
387  /**
388   * Gets the table dir of the mob files under the qualified HBase root dir. It's
389   * {rootDir}/mobdir/data/${namespace}/${tableName}
390   * @param rootDir   The qualified path of HBase root directory.
391   * @param tableName The name of table.
392   * @return The table dir of the mob file.
393   */
394  public static Path getMobTableDir(Path rootDir, TableName tableName) {
395    return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
396  }
397
398  public static Path getMobTableDir(Configuration conf, TableName tableName) {
399    return getMobTableDir(new Path(conf.get(HConstants.HBASE_DIR)), tableName);
400  }
401
402  /**
403   * Gets the region dir of the mob files. It's
404   * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
405   * @param conf      The current configuration.
406   * @param tableName The current table name.
407   * @return The region dir of the mob files.
408   */
409  public static Path getMobRegionPath(Configuration conf, TableName tableName) {
410    return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName);
411  }
412
413  /**
414   * Gets the region dir of the mob files under the specified root dir. It's
415   * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}.
416   * @param rootDir   The qualified path of HBase root directory.
417   * @param tableName The current table name.
418   * @return The region dir of the mob files.
419   */
420  public static Path getMobRegionPath(Path rootDir, TableName tableName) {
421    Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName);
422    RegionInfo regionInfo = getMobRegionInfo(tableName);
423    return new Path(tablePath, regionInfo.getEncodedName());
424  }
425
426  /**
427   * Gets the family dir of the mob files. It's
428   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
429   * @param conf       The current configuration.
430   * @param tableName  The current table name.
431   * @param familyName The current family name.
432   * @return The family dir of the mob files.
433   */
434  public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) {
435    return new Path(getMobRegionPath(conf, tableName), familyName);
436  }
437
438  /**
439   * Gets the family dir of the mob files. It's
440   * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}.
441   * @param regionPath The path of mob region which is a dummy one.
442   * @param familyName The current family name.
443   * @return The family dir of the mob files.
444   */
445  public static Path getMobFamilyPath(Path regionPath, String familyName) {
446    return new Path(regionPath, familyName);
447  }
448
449  /**
450   * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a
451   * region in HBase. It's internally used only.
452   * @return A dummy mob region info.
453   */
454  public static RegionInfo getMobRegionInfo(TableName tableName) {
455    return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES)
456      .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build();
457  }
458
459  /**
460   * Gets whether the current RegionInfo is a mob one.
461   * @param regionInfo The current RegionInfo.
462   * @return If true, the current RegionInfo is a mob one.
463   */
464  public static boolean isMobRegionInfo(RegionInfo regionInfo) {
465    return regionInfo == null
466      ? false
467      : getMobRegionInfo(regionInfo.getTable()).getEncodedName()
468        .equals(regionInfo.getEncodedName());
469  }
470
471  /**
472   * Gets whether the current region name follows the pattern of a mob region name.
473   * @param tableName  The current table name.
474   * @param regionName The current region name.
475   * @return True if the current region name follows the pattern of a mob region name.
476   */
477  public static boolean isMobRegionName(TableName tableName, byte[] regionName) {
478    return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName());
479  }
480
481  /**
482   * Archives the mob files.
483   * @param conf       The current configuration.
484   * @param fs         The current file system.
485   * @param tableName  The table name.
486   * @param tableDir   The table directory.
487   * @param family     The name of the column family.
488   * @param storeFiles The files to be deleted.
489   */
490  public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName,
491    Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) {
492    try {
493      HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family,
494        storeFiles);
495      LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size());
496      return true;
497    } catch (IOException e) {
498      LOG.error("Failed to delete the mob files, table {}", tableName, e);
499    }
500    return false;
501  }
502
503  /**
504   * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize +
505   * mobFileName.
506   * @param cell         The original Cell.
507   * @param fileName     The mob file name where the mob reference KeyValue is written.
508   * @param tableNameTag The tag of the current table name. It's very important in cloning the
509   *                     snapshot.
510   * @return The mob reference KeyValue.
511   */
512  public static ExtendedCell createMobRefCell(ExtendedCell cell, byte[] fileName,
513    Tag tableNameTag) {
514    // Append the tags to the KeyValue.
515    // The key is same, the value is the filename of the mob file
516    List<Tag> tags = new ArrayList<>();
517    // Add the ref tag as the 1st one.
518    tags.add(MobConstants.MOB_REF_TAG);
519    // Add the tag of the source table name, this table is where this mob file is flushed
520    // from.
521    // It's very useful in cloning the snapshot. When reading from the cloning table, we need to
522    // find the original mob files by this table name. For details please see cloning
523    // snapshot for mob files.
524    tags.add(tableNameTag);
525    return createMobRefCell(cell, fileName, TagUtil.fromList(tags));
526  }
527
528  public static ExtendedCell createMobRefCell(ExtendedCell cell, byte[] fileName,
529    byte[] refCellTags) {
530    byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName);
531    return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell));
532  }
533
534  /**
535   * Creates a writer for the mob file in temp directory.
536   * @param conf          The current configuration.
537   * @param fs            The current file system.
538   * @param family        The descriptor of the current column family.
539   * @param date          The date string, its format is yyyymmmdd.
540   * @param basePath      The basic path for a temp directory.
541   * @param maxKeyCount   The key count.
542   * @param compression   The compression algorithm.
543   * @param startKey      The hex string of the start key.
544   * @param cacheConfig   The current cache config.
545   * @param cryptoContext The encryption context.
546   * @param isCompaction  If the writer is used in compaction.
547   * @return The writer for the mob file.
548   */
549  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
550    ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount,
551    Compression.Algorithm compression, String startKey, CacheConfig cacheConfig,
552    Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException {
553    MobFileName mobFileName = MobFileName.create(startKey, date,
554      UUID.randomUUID().toString().replaceAll("-", ""), regionName);
555    return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression,
556      cacheConfig, cryptoContext, isCompaction);
557  }
558
559  /**
560   * Creates a writer for the mob file in temp directory.
561   * @param conf          The current configuration.
562   * @param fs            The current file system.
563   * @param family        The descriptor of the current column family.
564   * @param mobFileName   The mob file name.
565   * @param basePath      The basic path for a temp directory.
566   * @param maxKeyCount   The key count.
567   * @param compression   The compression algorithm.
568   * @param cacheConfig   The current cache config.
569   * @param cryptoContext The encryption context.
570   * @param isCompaction  If the writer is used in compaction.
571   * @return The writer for the mob file.
572   */
573  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
574    ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount,
575    Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext,
576    boolean isCompaction) throws IOException {
577    return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()),
578      maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf),
579      StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction);
580  }
581
582  /**
583   * Creates a writer for the mob file in temp directory.
584   * @param conf             The current configuration.
585   * @param fs               The current file system.
586   * @param family           The descriptor of the current column family.
587   * @param path             The path for a temp directory.
588   * @param maxKeyCount      The key count.
589   * @param compression      The compression algorithm.
590   * @param cacheConfig      The current cache config.
591   * @param cryptoContext    The encryption context.
592   * @param checksumType     The checksum type.
593   * @param bytesPerChecksum The bytes per checksum.
594   * @param blocksize        The HFile block size.
595   * @param bloomType        The bloom filter type.
596   * @param isCompaction     If the writer is used in compaction.
597   * @return The writer for the mob file.
598   */
599  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
600    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
601    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
602    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction)
603    throws IOException {
604    return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig,
605      cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null);
606  }
607
608  /**
609   * Creates a writer for the mob file in temp directory.
610   * @param conf                  The current configuration.
611   * @param fs                    The current file system.
612   * @param family                The descriptor of the current column family.
613   * @param path                  The path for a temp directory.
614   * @param maxKeyCount           The key count.
615   * @param compression           The compression algorithm.
616   * @param cacheConfig           The current cache config.
617   * @param cryptoContext         The encryption context.
618   * @param checksumType          The checksum type.
619   * @param bytesPerChecksum      The bytes per checksum.
620   * @param blocksize             The HFile block size.
621   * @param bloomType             The bloom filter type.
622   * @param isCompaction          If the writer is used in compaction.
623   * @param writerCreationTracker to track the current writer in the store
624   * @return The writer for the mob file.
625   */
626  public static StoreFileWriter createWriter(Configuration conf, FileSystem fs,
627    ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression,
628    CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType,
629    int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction,
630    Consumer<Path> writerCreationTracker) throws IOException {
631    if (compression == null) {
632      compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
633    }
634    final CacheConfig writerCacheConf;
635    if (isCompaction) {
636      writerCacheConf = new CacheConfig(cacheConfig);
637      writerCacheConf.setCacheDataOnWrite(false);
638    } else {
639      writerCacheConf = cacheConfig;
640    }
641    HFileContext hFileContext = new HFileContextBuilder().withCompression(compression)
642      .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags())
643      .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum)
644      .withBlockSize(blocksize).withHBaseCheckSum(true)
645      .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext)
646      .withCreateTime(EnvironmentEdgeManager.currentTime()).build();
647
648    StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path)
649      .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext)
650      .withWriterCreationTracker(writerCreationTracker).build();
651    return w;
652  }
653
654  /**
655   * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob
656   * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob
657   * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name.
658   * @param cell The mob ref cell.
659   * @return True if the cell has a valid value.
660   */
661  public static boolean hasValidMobRefCellValue(Cell cell) {
662    return cell.getValueLength() > Bytes.SIZEOF_INT;
663  }
664
665  /**
666   * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The
667   * value of a mob ref cell consists of two parts, real mob value length and mob file name. The
668   * real mob value length takes 4 bytes. The remaining part is the mob file name.
669   * @param cell The mob ref cell.
670   * @return The real mob value length.
671   */
672  public static int getMobValueLength(Cell cell) {
673    return PrivateCellUtil.getValueAsInt(cell);
674  }
675
676  /**
677   * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value
678   * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob
679   * value length takes 4 bytes. The remaining part is the mob file name.
680   * @param cell The mob ref cell.
681   * @return The mob file name.
682   */
683  public static String getMobFileName(Cell cell) {
684    return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT,
685      cell.getValueLength() - Bytes.SIZEOF_INT);
686  }
687
688  /**
689   * Checks whether this table has mob-enabled columns.
690   * @param htd The current table descriptor.
691   * @return Whether this table has mob-enabled columns.
692   */
693  public static boolean hasMobColumns(TableDescriptor htd) {
694    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
695    for (ColumnFamilyDescriptor hcd : hcds) {
696      if (hcd.isMobEnabled()) {
697        return true;
698      }
699    }
700    return false;
701  }
702
703  /**
704   * Get list of Mob column families (if any exists)
705   * @param htd table descriptor
706   * @return list of Mob column families
707   */
708  public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) {
709
710    List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>();
711    ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies();
712    for (ColumnFamilyDescriptor hcd : hcds) {
713      if (hcd.isMobEnabled()) {
714        fams.add(hcd);
715      }
716    }
717    return fams;
718  }
719
720  /**
721   * Indicates whether return null value when the mob file is missing or corrupt. The information is
722   * set in the attribute "empty.value.on.mobcell.miss" of scan.
723   * @param scan The current scan.
724   * @return True if the readEmptyValueOnMobCellMiss is enabled.
725   */
726  public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) {
727    byte[] readEmptyValueOnMobCellMiss =
728      scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS);
729    try {
730      return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss);
731    } catch (IllegalArgumentException e) {
732      return false;
733    }
734  }
735
736  /**
737   * Checks if the mob file is expired.
738   * @param column   The descriptor of the current column family.
739   * @param current  The current time.
740   * @param fileDate The date string parsed from the mob file name.
741   * @return True if the mob file is expired.
742   */
743  public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current,
744    String fileDate) {
745    if (column.getMinVersions() > 0) {
746      return false;
747    }
748    long timeToLive = column.getTimeToLive();
749    if (Integer.MAX_VALUE == timeToLive) {
750      return false;
751    }
752
753    Date expireDate = new Date(current - timeToLive * 1000);
754    expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate());
755    try {
756      Date date = parseDate(fileDate);
757      if (date.getTime() < expireDate.getTime()) {
758        return true;
759      }
760    } catch (ParseException e) {
761      LOG.warn("Failed to parse the date " + fileDate, e);
762      return false;
763    }
764    return false;
765  }
766
767  /**
768   * Serialize a set of referenced mob hfiles
769   * @param mobRefSet to serialize, may be null
770   * @return byte array to i.e. put into store file metadata. will not be null
771   */
772  public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) {
773    if (mobRefSet != null && mobRefSet.size() > 0) {
774      // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile
775      // names for serialization.
776      //
777      // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4
778      //
779      // to approximate the needed capacity we use the fact that there will usually be 1 table name
780      // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most"
781      // single table name lengths
782      StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105);
783      boolean doubleSlash = false;
784      for (TableName tableName : mobRefSet.keySet()) {
785        if (doubleSlash) {
786          sb.append("//");
787        } else {
788          doubleSlash = true;
789        }
790        sb.append(tableName).append("/");
791        boolean comma = false;
792        for (String refs : mobRefSet.get(tableName)) {
793          if (comma) {
794            sb.append(",");
795          } else {
796            comma = true;
797          }
798          sb.append(refs);
799        }
800      }
801      return Bytes.toBytes(sb.toString());
802    } else {
803      return HStoreFile.NULL_VALUE;
804    }
805  }
806
807  /**
808   * Deserialize the set of referenced mob hfiles from store file metadata.
809   * @param bytes compatibly serialized data. can not be null
810   * @return a setmultimap of original table to list of hfile names. will be empty if no values.
811   * @throws IllegalStateException if there are values but no table name
812   */
813  public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes)
814    throws IllegalStateException {
815    ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder();
816    if (bytes.length > 1) {
817      // TODO avoid turning the tablename pieces in to strings.
818      String s = Bytes.toString(bytes);
819      String[] tables = s.split("//");
820      for (String tableEnc : tables) {
821        final int delim = tableEnc.indexOf('/');
822        if (delim <= 0) {
823          throw new IllegalStateException("MOB reference data does not match expected encoding: "
824            + "no table name included before list of mob refs.");
825        }
826        TableName table = TableName.valueOf(tableEnc.substring(0, delim));
827        String[] refs = tableEnc.substring(delim + 1).split(",");
828        map.putAll(table, refs);
829      }
830    } else {
831      if (LOG.isDebugEnabled()) {
832        // array length 1 should be the NULL_VALUE.
833        if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) {
834          LOG.debug(
835            "Serialized MOB file refs array was treated as the placeholder 'no entries' but"
836              + " didn't have the expected placeholder byte. expected={} and actual={}",
837            Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes));
838        }
839      }
840    }
841    return map;
842  }
843}