001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 022 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.nio.ByteBuffer; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Calendar; 031import java.util.Collection; 032import java.util.Date; 033import java.util.List; 034import java.util.Optional; 035import java.util.UUID; 036import java.util.function.Consumer; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.HConstants; 043import org.apache.hadoop.hbase.PrivateCellUtil; 044import org.apache.hadoop.hbase.TableName; 045import org.apache.hadoop.hbase.Tag; 046import org.apache.hadoop.hbase.TagType; 047import org.apache.hadoop.hbase.TagUtil; 048import org.apache.hadoop.hbase.backup.HFileArchiver; 049import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 050import org.apache.hadoop.hbase.client.RegionInfo; 051import org.apache.hadoop.hbase.client.RegionInfoBuilder; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.TableDescriptor; 054import org.apache.hadoop.hbase.io.HFileLink; 055import org.apache.hadoop.hbase.io.compress.Compression; 056import org.apache.hadoop.hbase.io.crypto.Encryption; 057import org.apache.hadoop.hbase.io.hfile.CacheConfig; 058import org.apache.hadoop.hbase.io.hfile.HFile; 059import org.apache.hadoop.hbase.io.hfile.HFileContext; 060import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 061import org.apache.hadoop.hbase.regionserver.BloomType; 062import org.apache.hadoop.hbase.regionserver.HStoreFile; 063import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 064import org.apache.hadoop.hbase.regionserver.StoreUtils; 065import org.apache.hadoop.hbase.util.Bytes; 066import org.apache.hadoop.hbase.util.ChecksumType; 067import org.apache.hadoop.hbase.util.CommonFSUtils; 068import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 069import org.apache.yetus.audience.InterfaceAudience; 070import org.slf4j.Logger; 071import org.slf4j.LoggerFactory; 072 073import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap; 074import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap; 075 076/** 077 * The mob utilities 078 */ 079@InterfaceAudience.Private 080public final class MobUtils { 081 082 private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class); 083 public static final String SEP = "_"; 084 085 private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT = 086 new ThreadLocal<SimpleDateFormat>() { 087 @Override 088 protected SimpleDateFormat initialValue() { 089 return new SimpleDateFormat("yyyyMMdd"); 090 } 091 }; 092 093 /** 094 * Private constructor to keep this class from being instantiated. 095 */ 096 private MobUtils() { 097 } 098 099 /** 100 * Formats a date to a string. 101 * @param date The date. 102 * @return The string format of the date, it's yyyymmdd. 103 */ 104 public static String formatDate(Date date) { 105 return LOCAL_FORMAT.get().format(date); 106 } 107 108 /** 109 * Parses the string to a date. 110 * @param dateString The string format of a date, it's yyyymmdd. 111 * @return A date. 112 */ 113 public static Date parseDate(String dateString) throws ParseException { 114 return LOCAL_FORMAT.get().parse(dateString); 115 } 116 117 /** 118 * Whether the current cell is a mob reference cell. 119 * @param cell The current cell. 120 * @return True if the cell has a mob reference tag, false if it doesn't. 121 */ 122 public static boolean isMobReferenceCell(Cell cell) { 123 if (cell.getTagsLength() > 0) { 124 Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE); 125 if (tag.isPresent()) { 126 return true; 127 } 128 } 129 return false; 130 } 131 132 /** 133 * Gets the table name tag. 134 * @param cell The current cell. 135 * @return The table name tag. 136 */ 137 private static Optional<Tag> getTableNameTag(Cell cell) { 138 Optional<Tag> tag = Optional.empty(); 139 if (cell.getTagsLength() > 0) { 140 tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE); 141 } 142 return tag; 143 } 144 145 /** 146 * Gets the table name from when this cell was written into a mob hfile as a string. 147 * @param cell to extract tag from 148 * @return table name as a string. empty if the tag is not found. 149 */ 150 public static Optional<String> getTableNameString(Cell cell) { 151 Optional<Tag> tag = getTableNameTag(cell); 152 Optional<String> name = Optional.empty(); 153 if (tag.isPresent()) { 154 name = Optional.of(Tag.getValueAsString(tag.get())); 155 } 156 return name; 157 } 158 159 /** 160 * Get the table name from when this cell was written into a mob hfile as a TableName. 161 * @param cell to extract tag from 162 * @return name of table as a TableName. empty if the tag is not found. 163 */ 164 public static Optional<TableName> getTableName(Cell cell) { 165 Optional<Tag> maybe = getTableNameTag(cell); 166 Optional<TableName> name = Optional.empty(); 167 if (maybe.isPresent()) { 168 final Tag tag = maybe.get(); 169 if (tag.hasArray()) { 170 name = Optional 171 .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength())); 172 } else { 173 // TODO ByteBuffer handling in tags looks busted. revisit. 174 ByteBuffer buffer = tag.getValueByteBuffer().duplicate(); 175 buffer.mark(); 176 buffer.position(tag.getValueOffset()); 177 buffer.limit(tag.getValueOffset() + tag.getValueLength()); 178 name = Optional.of(TableName.valueOf(buffer)); 179 } 180 } 181 return name; 182 } 183 184 /** 185 * Whether the tag list has a mob reference tag. 186 * @param tags The tag list. 187 * @return True if the list has a mob reference tag, false if it doesn't. 188 */ 189 public static boolean hasMobReferenceTag(List<Tag> tags) { 190 if (!tags.isEmpty()) { 191 for (Tag tag : tags) { 192 if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) { 193 return true; 194 } 195 } 196 } 197 return false; 198 } 199 200 /** 201 * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw" 202 * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob 203 * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the 204 * mob file. 205 * @param scan The current scan. 206 * @return True if it's a raw scan. 207 */ 208 public static boolean isRawMobScan(Scan scan) { 209 byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW); 210 try { 211 return raw != null && Bytes.toBoolean(raw); 212 } catch (IllegalArgumentException e) { 213 return false; 214 } 215 } 216 217 /** 218 * Indicates whether it's a reference only scan. The information is set in the attribute 219 * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are 220 * returned. 221 * @param scan The current scan. 222 * @return True if it's a ref only scan. 223 */ 224 public static boolean isRefOnlyScan(Scan scan) { 225 byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY); 226 try { 227 return refOnly != null && Bytes.toBoolean(refOnly); 228 } catch (IllegalArgumentException e) { 229 return false; 230 } 231 } 232 233 /** 234 * Indicates whether the scan contains the information of caching blocks. The information is set 235 * in the attribute "hbase.mob.cache.blocks" of scan. 236 * @param scan The current scan. 237 * @return True when the Scan attribute specifies to cache the MOB blocks. 238 */ 239 public static boolean isCacheMobBlocks(Scan scan) { 240 byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS); 241 try { 242 return cache != null && Bytes.toBoolean(cache); 243 } catch (IllegalArgumentException e) { 244 return false; 245 } 246 } 247 248 /** 249 * Sets the attribute of caching blocks in the scan. 250 * @param scan The current scan. 251 * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with 252 * this scan caches blocks. False, the scanner doesn't cache blocks for this 253 * scan. 254 */ 255 public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) { 256 scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks)); 257 } 258 259 /** 260 * Cleans the expired mob files. Cleans the files whose creation date is older than (current - 261 * columnFamily.ttl), and the minVersions of that column family is 0. 262 * @param fs The current file system. 263 * @param conf The current configuration. 264 * @param tableName The current table name. 265 * @param columnDescriptor The descriptor of the current column family. 266 * @param cacheConfig The cacheConfig that disables the block cache. 267 * @param current The current time. 268 */ 269 public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableName tableName, 270 ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current) 271 throws IOException { 272 long timeToLive = columnDescriptor.getTimeToLive(); 273 if (Integer.MAX_VALUE == timeToLive) { 274 // no need to clean, because the TTL is not set. 275 return; 276 } 277 278 Calendar calendar = Calendar.getInstance(); 279 calendar.setTimeInMillis(current - timeToLive * 1000); 280 calendar.set(Calendar.HOUR_OF_DAY, 0); 281 calendar.set(Calendar.MINUTE, 0); 282 calendar.set(Calendar.SECOND, 0); 283 calendar.set(Calendar.MILLISECOND, 0); 284 285 Date expireDate = calendar.getTime(); 286 287 LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!"); 288 289 FileStatus[] stats = null; 290 Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName); 291 Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString()); 292 try { 293 stats = fs.listStatus(path); 294 } catch (FileNotFoundException e) { 295 LOG.warn("Failed to find the mob file " + path, e); 296 } 297 if (null == stats) { 298 // no file found 299 return; 300 } 301 List<HStoreFile> filesToClean = new ArrayList<>(); 302 int deletedFileCount = 0; 303 for (FileStatus file : stats) { 304 String fileName = file.getPath().getName(); 305 try { 306 if (HFileLink.isHFileLink(file.getPath())) { 307 HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath()); 308 fileName = hfileLink.getOriginPath().getName(); 309 } 310 311 Date fileDate = parseDate(MobFileName.getDateFromName(fileName)); 312 313 if (LOG.isDebugEnabled()) { 314 LOG.debug("Checking file {}", fileName); 315 } 316 if (fileDate.getTime() < expireDate.getTime()) { 317 if (LOG.isDebugEnabled()) { 318 LOG.debug("{} is an expired file", fileName); 319 } 320 filesToClean 321 .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true)); 322 if ( 323 filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND, 324 DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND) 325 ) { 326 if ( 327 removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(), 328 filesToClean) 329 ) { 330 deletedFileCount += filesToClean.size(); 331 } 332 filesToClean.clear(); 333 } 334 } 335 } catch (Exception e) { 336 LOG.error("Cannot parse the fileName " + fileName, e); 337 } 338 } 339 if ( 340 !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir, 341 columnDescriptor.getName(), filesToClean) 342 ) { 343 deletedFileCount += filesToClean.size(); 344 } 345 LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount); 346 } 347 348 /** 349 * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir. 350 * @param conf The current configuration. 351 * @return the root dir of the mob file. 352 */ 353 public static Path getMobHome(Configuration conf) { 354 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 355 return getMobHome(hbaseDir); 356 } 357 358 /** 359 * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir. 360 * @param rootDir The qualified path of HBase root directory. 361 * @return The root dir of the mob file. 362 */ 363 public static Path getMobHome(Path rootDir) { 364 return new Path(rootDir, MobConstants.MOB_DIR_NAME); 365 } 366 367 /** 368 * Gets the qualified root dir of the mob files. 369 * @param conf The current configuration. 370 * @return The qualified root dir. 371 */ 372 public static Path getQualifiedMobRootDir(Configuration conf) throws IOException { 373 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 374 Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME); 375 FileSystem fs = mobRootDir.getFileSystem(conf); 376 return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 377 } 378 379 /** 380 * Gets the table dir of the mob files under the qualified HBase root dir. It's 381 * {rootDir}/mobdir/data/${namespace}/${tableName} 382 * @param rootDir The qualified path of HBase root directory. 383 * @param tableName The name of table. 384 * @return The table dir of the mob file. 385 */ 386 public static Path getMobTableDir(Path rootDir, TableName tableName) { 387 return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 388 } 389 390 /** 391 * Gets the region dir of the mob files. It's 392 * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 393 * @param conf The current configuration. 394 * @param tableName The current table name. 395 * @return The region dir of the mob files. 396 */ 397 public static Path getMobRegionPath(Configuration conf, TableName tableName) { 398 return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName); 399 } 400 401 /** 402 * Gets the region dir of the mob files under the specified root dir. It's 403 * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 404 * @param rootDir The qualified path of HBase root directory. 405 * @param tableName The current table name. 406 * @return The region dir of the mob files. 407 */ 408 public static Path getMobRegionPath(Path rootDir, TableName tableName) { 409 Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 410 RegionInfo regionInfo = getMobRegionInfo(tableName); 411 return new Path(tablePath, regionInfo.getEncodedName()); 412 } 413 414 /** 415 * Gets the family dir of the mob files. It's 416 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 417 * @param conf The current configuration. 418 * @param tableName The current table name. 419 * @param familyName The current family name. 420 * @return The family dir of the mob files. 421 */ 422 public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) { 423 return new Path(getMobRegionPath(conf, tableName), familyName); 424 } 425 426 /** 427 * Gets the family dir of the mob files. It's 428 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 429 * @param regionPath The path of mob region which is a dummy one. 430 * @param familyName The current family name. 431 * @return The family dir of the mob files. 432 */ 433 public static Path getMobFamilyPath(Path regionPath, String familyName) { 434 return new Path(regionPath, familyName); 435 } 436 437 /** 438 * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a 439 * region in HBase. It's internally used only. 440 * @return A dummy mob region info. 441 */ 442 public static RegionInfo getMobRegionInfo(TableName tableName) { 443 return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES) 444 .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build(); 445 } 446 447 /** 448 * Gets whether the current RegionInfo is a mob one. 449 * @param regionInfo The current RegionInfo. 450 * @return If true, the current RegionInfo is a mob one. 451 */ 452 public static boolean isMobRegionInfo(RegionInfo regionInfo) { 453 return regionInfo == null 454 ? false 455 : getMobRegionInfo(regionInfo.getTable()).getEncodedName() 456 .equals(regionInfo.getEncodedName()); 457 } 458 459 /** 460 * Gets whether the current region name follows the pattern of a mob region name. 461 * @param tableName The current table name. 462 * @param regionName The current region name. 463 * @return True if the current region name follows the pattern of a mob region name. 464 */ 465 public static boolean isMobRegionName(TableName tableName, byte[] regionName) { 466 return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName()); 467 } 468 469 /** 470 * Archives the mob files. 471 * @param conf The current configuration. 472 * @param fs The current file system. 473 * @param tableName The table name. 474 * @param tableDir The table directory. 475 * @param family The name of the column family. 476 * @param storeFiles The files to be deleted. 477 */ 478 public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName, 479 Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) { 480 try { 481 HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family, 482 storeFiles); 483 LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size()); 484 return true; 485 } catch (IOException e) { 486 LOG.error("Failed to delete the mob files, table {}", tableName, e); 487 } 488 return false; 489 } 490 491 /** 492 * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize + 493 * mobFileName. 494 * @param cell The original Cell. 495 * @param fileName The mob file name where the mob reference KeyValue is written. 496 * @param tableNameTag The tag of the current table name. It's very important in cloning the 497 * snapshot. 498 * @return The mob reference KeyValue. 499 */ 500 public static Cell createMobRefCell(Cell cell, byte[] fileName, Tag tableNameTag) { 501 // Append the tags to the KeyValue. 502 // The key is same, the value is the filename of the mob file 503 List<Tag> tags = new ArrayList<>(); 504 // Add the ref tag as the 1st one. 505 tags.add(MobConstants.MOB_REF_TAG); 506 // Add the tag of the source table name, this table is where this mob file is flushed 507 // from. 508 // It's very useful in cloning the snapshot. When reading from the cloning table, we need to 509 // find the original mob files by this table name. For details please see cloning 510 // snapshot for mob files. 511 tags.add(tableNameTag); 512 return createMobRefCell(cell, fileName, TagUtil.fromList(tags)); 513 } 514 515 public static Cell createMobRefCell(Cell cell, byte[] fileName, byte[] refCellTags) { 516 byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName); 517 return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell)); 518 } 519 520 /** 521 * Creates a writer for the mob file in temp directory. 522 * @param conf The current configuration. 523 * @param fs The current file system. 524 * @param family The descriptor of the current column family. 525 * @param date The date string, its format is yyyymmmdd. 526 * @param basePath The basic path for a temp directory. 527 * @param maxKeyCount The key count. 528 * @param compression The compression algorithm. 529 * @param startKey The hex string of the start key. 530 * @param cacheConfig The current cache config. 531 * @param cryptoContext The encryption context. 532 * @param isCompaction If the writer is used in compaction. 533 * @return The writer for the mob file. 534 */ 535 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 536 ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount, 537 Compression.Algorithm compression, String startKey, CacheConfig cacheConfig, 538 Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException { 539 MobFileName mobFileName = MobFileName.create(startKey, date, 540 UUID.randomUUID().toString().replaceAll("-", ""), regionName); 541 return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression, 542 cacheConfig, cryptoContext, isCompaction); 543 } 544 545 /** 546 * Creates a writer for the mob file in temp directory. 547 * @param conf The current configuration. 548 * @param fs The current file system. 549 * @param family The descriptor of the current column family. 550 * @param mobFileName The mob file name. 551 * @param basePath The basic path for a temp directory. 552 * @param maxKeyCount The key count. 553 * @param compression The compression algorithm. 554 * @param cacheConfig The current cache config. 555 * @param cryptoContext The encryption context. 556 * @param isCompaction If the writer is used in compaction. 557 * @return The writer for the mob file. 558 */ 559 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 560 ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount, 561 Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext, 562 boolean isCompaction) throws IOException { 563 return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()), 564 maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf), 565 StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction); 566 } 567 568 /** 569 * Creates a writer for the mob file in temp directory. 570 * @param conf The current configuration. 571 * @param fs The current file system. 572 * @param family The descriptor of the current column family. 573 * @param path The path for a temp directory. 574 * @param maxKeyCount The key count. 575 * @param compression The compression algorithm. 576 * @param cacheConfig The current cache config. 577 * @param cryptoContext The encryption context. 578 * @param checksumType The checksum type. 579 * @param bytesPerChecksum The bytes per checksum. 580 * @param blocksize The HFile block size. 581 * @param bloomType The bloom filter type. 582 * @param isCompaction If the writer is used in compaction. 583 * @return The writer for the mob file. 584 */ 585 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 586 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 587 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 588 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction) 589 throws IOException { 590 return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig, 591 cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null); 592 } 593 594 /** 595 * Creates a writer for the mob file in temp directory. 596 * @param conf The current configuration. 597 * @param fs The current file system. 598 * @param family The descriptor of the current column family. 599 * @param path The path for a temp directory. 600 * @param maxKeyCount The key count. 601 * @param compression The compression algorithm. 602 * @param cacheConfig The current cache config. 603 * @param cryptoContext The encryption context. 604 * @param checksumType The checksum type. 605 * @param bytesPerChecksum The bytes per checksum. 606 * @param blocksize The HFile block size. 607 * @param bloomType The bloom filter type. 608 * @param isCompaction If the writer is used in compaction. 609 * @param writerCreationTracker to track the current writer in the store 610 * @return The writer for the mob file. 611 */ 612 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 613 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 614 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 615 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction, 616 Consumer<Path> writerCreationTracker) throws IOException { 617 if (compression == null) { 618 compression = HFile.DEFAULT_COMPRESSION_ALGORITHM; 619 } 620 final CacheConfig writerCacheConf; 621 if (isCompaction) { 622 writerCacheConf = new CacheConfig(cacheConfig); 623 writerCacheConf.setCacheDataOnWrite(false); 624 } else { 625 writerCacheConf = cacheConfig; 626 } 627 HFileContext hFileContext = new HFileContextBuilder().withCompression(compression) 628 .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags()) 629 .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum) 630 .withBlockSize(blocksize).withHBaseCheckSum(true) 631 .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext) 632 .withCreateTime(EnvironmentEdgeManager.currentTime()).build(); 633 634 StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path) 635 .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext) 636 .withWriterCreationTracker(writerCreationTracker).build(); 637 return w; 638 } 639 640 /** 641 * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob 642 * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob 643 * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name. 644 * @param cell The mob ref cell. 645 * @return True if the cell has a valid value. 646 */ 647 public static boolean hasValidMobRefCellValue(Cell cell) { 648 return cell.getValueLength() > Bytes.SIZEOF_INT; 649 } 650 651 /** 652 * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The 653 * value of a mob ref cell consists of two parts, real mob value length and mob file name. The 654 * real mob value length takes 4 bytes. The remaining part is the mob file name. 655 * @param cell The mob ref cell. 656 * @return The real mob value length. 657 */ 658 public static int getMobValueLength(Cell cell) { 659 return PrivateCellUtil.getValueAsInt(cell); 660 } 661 662 /** 663 * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value 664 * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob 665 * value length takes 4 bytes. The remaining part is the mob file name. 666 * @param cell The mob ref cell. 667 * @return The mob file name. 668 */ 669 public static String getMobFileName(Cell cell) { 670 return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT, 671 cell.getValueLength() - Bytes.SIZEOF_INT); 672 } 673 674 /** 675 * Checks whether this table has mob-enabled columns. 676 * @param htd The current table descriptor. 677 * @return Whether this table has mob-enabled columns. 678 */ 679 public static boolean hasMobColumns(TableDescriptor htd) { 680 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 681 for (ColumnFamilyDescriptor hcd : hcds) { 682 if (hcd.isMobEnabled()) { 683 return true; 684 } 685 } 686 return false; 687 } 688 689 /** 690 * Get list of Mob column families (if any exists) 691 * @param htd table descriptor 692 * @return list of Mob column families 693 */ 694 public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) { 695 696 List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>(); 697 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 698 for (ColumnFamilyDescriptor hcd : hcds) { 699 if (hcd.isMobEnabled()) { 700 fams.add(hcd); 701 } 702 } 703 return fams; 704 } 705 706 /** 707 * Indicates whether return null value when the mob file is missing or corrupt. The information is 708 * set in the attribute "empty.value.on.mobcell.miss" of scan. 709 * @param scan The current scan. 710 * @return True if the readEmptyValueOnMobCellMiss is enabled. 711 */ 712 public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) { 713 byte[] readEmptyValueOnMobCellMiss = 714 scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS); 715 try { 716 return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss); 717 } catch (IllegalArgumentException e) { 718 return false; 719 } 720 } 721 722 /** 723 * Checks if the mob file is expired. 724 * @param column The descriptor of the current column family. 725 * @param current The current time. 726 * @param fileDate The date string parsed from the mob file name. 727 * @return True if the mob file is expired. 728 */ 729 public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current, 730 String fileDate) { 731 if (column.getMinVersions() > 0) { 732 return false; 733 } 734 long timeToLive = column.getTimeToLive(); 735 if (Integer.MAX_VALUE == timeToLive) { 736 return false; 737 } 738 739 Date expireDate = new Date(current - timeToLive * 1000); 740 expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate()); 741 try { 742 Date date = parseDate(fileDate); 743 if (date.getTime() < expireDate.getTime()) { 744 return true; 745 } 746 } catch (ParseException e) { 747 LOG.warn("Failed to parse the date " + fileDate, e); 748 return false; 749 } 750 return false; 751 } 752 753 /** 754 * Serialize a set of referenced mob hfiles 755 * @param mobRefSet to serialize, may be null 756 * @return byte array to i.e. put into store file metadata. will not be null 757 */ 758 public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) { 759 if (mobRefSet != null && mobRefSet.size() > 0) { 760 // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile 761 // names for serialization. 762 // 763 // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4 764 // 765 // to approximate the needed capacity we use the fact that there will usually be 1 table name 766 // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most" 767 // single table name lengths 768 StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105); 769 boolean doubleSlash = false; 770 for (TableName tableName : mobRefSet.keySet()) { 771 if (doubleSlash) { 772 sb.append("//"); 773 } else { 774 doubleSlash = true; 775 } 776 sb.append(tableName).append("/"); 777 boolean comma = false; 778 for (String refs : mobRefSet.get(tableName)) { 779 if (comma) { 780 sb.append(","); 781 } else { 782 comma = true; 783 } 784 sb.append(refs); 785 } 786 } 787 return Bytes.toBytes(sb.toString()); 788 } else { 789 return HStoreFile.NULL_VALUE; 790 } 791 } 792 793 /** 794 * Deserialize the set of referenced mob hfiles from store file metadata. 795 * @param bytes compatibly serialized data. can not be null 796 * @return a setmultimap of original table to list of hfile names. will be empty if no values. 797 * @throws IllegalStateException if there are values but no table name 798 */ 799 public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes) 800 throws IllegalStateException { 801 ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder(); 802 if (bytes.length > 1) { 803 // TODO avoid turning the tablename pieces in to strings. 804 String s = Bytes.toString(bytes); 805 String[] tables = s.split("//"); 806 for (String tableEnc : tables) { 807 final int delim = tableEnc.indexOf('/'); 808 if (delim <= 0) { 809 throw new IllegalStateException("MOB reference data does not match expected encoding: " 810 + "no table name included before list of mob refs."); 811 } 812 TableName table = TableName.valueOf(tableEnc.substring(0, delim)); 813 String[] refs = tableEnc.substring(delim + 1).split(","); 814 map.putAll(table, refs); 815 } 816 } else { 817 if (LOG.isDebugEnabled()) { 818 // array length 1 should be the NULL_VALUE. 819 if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) { 820 LOG.debug( 821 "Serialized MOB file refs array was treated as the placeholder 'no entries' but" 822 + " didn't have the expected placeholder byte. expected={} and actual={}", 823 Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes)); 824 } 825 } 826 827 } 828 return map; 829 } 830 831}