001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mob; 019 020import static org.apache.hadoop.hbase.mob.MobConstants.DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 021import static org.apache.hadoop.hbase.mob.MobConstants.MOB_CLEANER_BATCH_SIZE_UPPER_BOUND; 022 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.nio.ByteBuffer; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Calendar; 031import java.util.Collection; 032import java.util.Date; 033import java.util.List; 034import java.util.Optional; 035import java.util.UUID; 036import java.util.function.Consumer; 037import org.apache.hadoop.conf.Configuration; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.Path; 041import org.apache.hadoop.hbase.Cell; 042import org.apache.hadoop.hbase.ExtendedCell; 043import org.apache.hadoop.hbase.HConstants; 044import org.apache.hadoop.hbase.PrivateCellUtil; 045import org.apache.hadoop.hbase.TableName; 046import org.apache.hadoop.hbase.Tag; 047import org.apache.hadoop.hbase.TagType; 048import org.apache.hadoop.hbase.TagUtil; 049import org.apache.hadoop.hbase.backup.HFileArchiver; 050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 051import org.apache.hadoop.hbase.client.RegionInfo; 052import org.apache.hadoop.hbase.client.RegionInfoBuilder; 053import org.apache.hadoop.hbase.client.Scan; 054import org.apache.hadoop.hbase.client.TableDescriptor; 055import org.apache.hadoop.hbase.io.HFileLink; 056import org.apache.hadoop.hbase.io.compress.Compression; 057import org.apache.hadoop.hbase.io.crypto.Encryption; 058import org.apache.hadoop.hbase.io.hfile.CacheConfig; 059import org.apache.hadoop.hbase.io.hfile.HFile; 060import org.apache.hadoop.hbase.io.hfile.HFileContext; 061import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder; 062import org.apache.hadoop.hbase.regionserver.BloomType; 063import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 064import org.apache.hadoop.hbase.regionserver.HStoreFile; 065import org.apache.hadoop.hbase.regionserver.StoreFileWriter; 066import org.apache.hadoop.hbase.regionserver.StoreUtils; 067import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 068import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 069import org.apache.hadoop.hbase.util.Bytes; 070import org.apache.hadoop.hbase.util.ChecksumType; 071import org.apache.hadoop.hbase.util.CommonFSUtils; 072import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 073import org.apache.yetus.audience.InterfaceAudience; 074import org.slf4j.Logger; 075import org.slf4j.LoggerFactory; 076 077import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSetMultimap; 078import org.apache.hbase.thirdparty.com.google.common.collect.SetMultimap; 079 080/** 081 * The mob utilities 082 */ 083@InterfaceAudience.Private 084public final class MobUtils { 085 086 private static final Logger LOG = LoggerFactory.getLogger(MobUtils.class); 087 public static final String SEP = "_"; 088 089 private static final ThreadLocal<SimpleDateFormat> LOCAL_FORMAT = 090 new ThreadLocal<SimpleDateFormat>() { 091 @Override 092 protected SimpleDateFormat initialValue() { 093 return new SimpleDateFormat("yyyyMMdd"); 094 } 095 }; 096 097 /** 098 * Private constructor to keep this class from being instantiated. 099 */ 100 private MobUtils() { 101 } 102 103 /** 104 * Formats a date to a string. 105 * @param date The date. 106 * @return The string format of the date, it's yyyymmdd. 107 */ 108 public static String formatDate(Date date) { 109 return LOCAL_FORMAT.get().format(date); 110 } 111 112 /** 113 * Parses the string to a date. 114 * @param dateString The string format of a date, it's yyyymmdd. 115 * @return A date. 116 */ 117 public static Date parseDate(String dateString) throws ParseException { 118 return LOCAL_FORMAT.get().parse(dateString); 119 } 120 121 /** 122 * Whether the current cell is a mob reference cell. 123 * @param cell The current cell. 124 * @return True if the cell has a mob reference tag, false if it doesn't. 125 */ 126 public static boolean isMobReferenceCell(ExtendedCell cell) { 127 if (cell.getTagsLength() > 0) { 128 Optional<Tag> tag = PrivateCellUtil.getTag(cell, TagType.MOB_REFERENCE_TAG_TYPE); 129 if (tag.isPresent()) { 130 return true; 131 } 132 } 133 return false; 134 } 135 136 /** 137 * Gets the table name tag. 138 * @param cell The current cell. 139 * @return The table name tag. 140 */ 141 private static Optional<Tag> getTableNameTag(ExtendedCell cell) { 142 Optional<Tag> tag = Optional.empty(); 143 if (cell.getTagsLength() > 0) { 144 tag = PrivateCellUtil.getTag(cell, TagType.MOB_TABLE_NAME_TAG_TYPE); 145 } 146 return tag; 147 } 148 149 /** 150 * Gets the table name from when this cell was written into a mob hfile as a string. 151 * @param cell to extract tag from 152 * @return table name as a string. empty if the tag is not found. 153 */ 154 public static Optional<String> getTableNameString(ExtendedCell cell) { 155 Optional<Tag> tag = getTableNameTag(cell); 156 Optional<String> name = Optional.empty(); 157 if (tag.isPresent()) { 158 name = Optional.of(Tag.getValueAsString(tag.get())); 159 } 160 return name; 161 } 162 163 /** 164 * Get the table name from when this cell was written into a mob hfile as a TableName. 165 * @param cell to extract tag from 166 * @return name of table as a TableName. empty if the tag is not found. 167 */ 168 public static Optional<TableName> getTableName(ExtendedCell cell) { 169 Optional<Tag> maybe = getTableNameTag(cell); 170 Optional<TableName> name = Optional.empty(); 171 if (maybe.isPresent()) { 172 final Tag tag = maybe.get(); 173 if (tag.hasArray()) { 174 name = Optional 175 .of(TableName.valueOf(tag.getValueArray(), tag.getValueOffset(), tag.getValueLength())); 176 } else { 177 // TODO ByteBuffer handling in tags looks busted. revisit. 178 ByteBuffer buffer = tag.getValueByteBuffer().duplicate(); 179 buffer.mark(); 180 buffer.position(tag.getValueOffset()); 181 buffer.limit(tag.getValueOffset() + tag.getValueLength()); 182 name = Optional.of(TableName.valueOf(buffer)); 183 } 184 } 185 return name; 186 } 187 188 /** 189 * Whether the tag list has a mob reference tag. 190 * @param tags The tag list. 191 * @return True if the list has a mob reference tag, false if it doesn't. 192 */ 193 public static boolean hasMobReferenceTag(List<Tag> tags) { 194 if (!tags.isEmpty()) { 195 for (Tag tag : tags) { 196 if (tag.getType() == TagType.MOB_REFERENCE_TAG_TYPE) { 197 return true; 198 } 199 } 200 } 201 return false; 202 } 203 204 /** 205 * Indicates whether it's a raw scan. The information is set in the attribute "hbase.mob.scan.raw" 206 * of scan. For a mob cell, in a normal scan the scanners retrieves the mob cell from the mob 207 * file. In a raw scan, the scanner directly returns cell in HBase without retrieve the one in the 208 * mob file. 209 * @param scan The current scan. 210 * @return True if it's a raw scan. 211 */ 212 public static boolean isRawMobScan(Scan scan) { 213 byte[] raw = scan.getAttribute(MobConstants.MOB_SCAN_RAW); 214 try { 215 return raw != null && Bytes.toBoolean(raw); 216 } catch (IllegalArgumentException e) { 217 return false; 218 } 219 } 220 221 /** 222 * Indicates whether it's a reference only scan. The information is set in the attribute 223 * "hbase.mob.scan.ref.only" of scan. If it's a ref only scan, only the cells with ref tag are 224 * returned. 225 * @param scan The current scan. 226 * @return True if it's a ref only scan. 227 */ 228 public static boolean isRefOnlyScan(Scan scan) { 229 byte[] refOnly = scan.getAttribute(MobConstants.MOB_SCAN_REF_ONLY); 230 try { 231 return refOnly != null && Bytes.toBoolean(refOnly); 232 } catch (IllegalArgumentException e) { 233 return false; 234 } 235 } 236 237 /** 238 * Indicates whether the scan contains the information of caching blocks. The information is set 239 * in the attribute "hbase.mob.cache.blocks" of scan. 240 * @param scan The current scan. 241 * @return True when the Scan attribute specifies to cache the MOB blocks. 242 */ 243 public static boolean isCacheMobBlocks(Scan scan) { 244 byte[] cache = scan.getAttribute(MobConstants.MOB_CACHE_BLOCKS); 245 try { 246 return cache != null && Bytes.toBoolean(cache); 247 } catch (IllegalArgumentException e) { 248 return false; 249 } 250 } 251 252 /** 253 * Sets the attribute of caching blocks in the scan. 254 * @param scan The current scan. 255 * @param cacheBlocks True, set the attribute of caching blocks into the scan, the scanner with 256 * this scan caches blocks. False, the scanner doesn't cache blocks for this 257 * scan. 258 */ 259 public static void setCacheMobBlocks(Scan scan, boolean cacheBlocks) { 260 scan.setAttribute(MobConstants.MOB_CACHE_BLOCKS, Bytes.toBytes(cacheBlocks)); 261 } 262 263 /** 264 * Cleans the expired mob files. Cleans the files whose creation date is older than (current - 265 * columnFamily.ttl), and the minVersions of that column family is 0. 266 * @param fs The current file system. 267 * @param conf The current configuration. 268 * @param tableName The current table name. 269 * @param columnDescriptor The descriptor of the current column family. 270 * @param cacheConfig The cacheConfig that disables the block cache. 271 * @param current The current time. 272 */ 273 public static void cleanExpiredMobFiles(FileSystem fs, Configuration conf, TableDescriptor htd, 274 ColumnFamilyDescriptor columnDescriptor, CacheConfig cacheConfig, long current) 275 throws IOException { 276 long timeToLive = columnDescriptor.getTimeToLive(); 277 if (Integer.MAX_VALUE == timeToLive) { 278 // no need to clean, because the TTL is not set. 279 return; 280 } 281 282 Calendar calendar = Calendar.getInstance(); 283 calendar.setTimeInMillis(current - timeToLive * 1000); 284 calendar.set(Calendar.HOUR_OF_DAY, 0); 285 calendar.set(Calendar.MINUTE, 0); 286 calendar.set(Calendar.SECOND, 0); 287 calendar.set(Calendar.MILLISECOND, 0); 288 289 Date expireDate = calendar.getTime(); 290 291 LOG.info("MOB HFiles older than " + expireDate.toGMTString() + " will be deleted!"); 292 293 FileStatus[] stats = null; 294 TableName tableName = htd.getTableName(); 295 Path mobTableDir = CommonFSUtils.getTableDir(getMobHome(conf), tableName); 296 HRegionFileSystem regionFS = 297 HRegionFileSystem.create(conf, fs, mobTableDir, getMobRegionInfo(tableName)); 298 StoreFileTracker sft = StoreFileTrackerFactory.create(conf, htd, columnDescriptor, regionFS); 299 Path path = getMobFamilyPath(conf, tableName, columnDescriptor.getNameAsString()); 300 try { 301 stats = fs.listStatus(path); 302 } catch (FileNotFoundException e) { 303 LOG.warn("Failed to find the mob file " + path, e); 304 } 305 if (null == stats) { 306 // no file found 307 return; 308 } 309 List<HStoreFile> filesToClean = new ArrayList<>(); 310 int deletedFileCount = 0; 311 for (FileStatus file : stats) { 312 String fileName = file.getPath().getName(); 313 try { 314 if (HFileLink.isHFileLink(file.getPath())) { 315 HFileLink hfileLink = HFileLink.buildFromHFileLinkPattern(conf, file.getPath()); 316 fileName = hfileLink.getOriginPath().getName(); 317 } 318 319 Date fileDate = parseDate(MobFileName.getDateFromName(fileName)); 320 321 if (LOG.isDebugEnabled()) { 322 LOG.debug("Checking file {}", fileName); 323 } 324 if (fileDate.getTime() < expireDate.getTime()) { 325 if (LOG.isDebugEnabled()) { 326 LOG.debug("{} is an expired file", fileName); 327 } 328 filesToClean 329 .add(new HStoreFile(fs, file.getPath(), conf, cacheConfig, BloomType.NONE, true, sft)); 330 if ( 331 filesToClean.size() >= conf.getInt(MOB_CLEANER_BATCH_SIZE_UPPER_BOUND, 332 DEFAULT_MOB_CLEANER_BATCH_SIZE_UPPER_BOUND) 333 ) { 334 if ( 335 removeMobFiles(conf, fs, tableName, mobTableDir, columnDescriptor.getName(), 336 filesToClean) 337 ) { 338 deletedFileCount += filesToClean.size(); 339 } 340 filesToClean.clear(); 341 } 342 } 343 } catch (Exception e) { 344 LOG.error("Cannot parse the fileName " + fileName, e); 345 } 346 } 347 if ( 348 !filesToClean.isEmpty() && removeMobFiles(conf, fs, tableName, mobTableDir, 349 columnDescriptor.getName(), filesToClean) 350 ) { 351 deletedFileCount += filesToClean.size(); 352 } 353 LOG.info("Table {} {} expired mob files in total are deleted", tableName, deletedFileCount); 354 } 355 356 /** 357 * Gets the root dir of the mob files. It's {HBASE_DIR}/mobdir. 358 * @param conf The current configuration. 359 * @return the root dir of the mob file. 360 */ 361 public static Path getMobHome(Configuration conf) { 362 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 363 return getMobHome(hbaseDir); 364 } 365 366 /** 367 * Gets the root dir of the mob files under the qualified HBase root dir. It's {rootDir}/mobdir. 368 * @param rootDir The qualified path of HBase root directory. 369 * @return The root dir of the mob file. 370 */ 371 public static Path getMobHome(Path rootDir) { 372 return new Path(rootDir, MobConstants.MOB_DIR_NAME); 373 } 374 375 /** 376 * Gets the qualified root dir of the mob files. 377 * @param conf The current configuration. 378 * @return The qualified root dir. 379 */ 380 public static Path getQualifiedMobRootDir(Configuration conf) throws IOException { 381 Path hbaseDir = new Path(conf.get(HConstants.HBASE_DIR)); 382 Path mobRootDir = new Path(hbaseDir, MobConstants.MOB_DIR_NAME); 383 FileSystem fs = mobRootDir.getFileSystem(conf); 384 return mobRootDir.makeQualified(fs.getUri(), fs.getWorkingDirectory()); 385 } 386 387 /** 388 * Gets the table dir of the mob files under the qualified HBase root dir. It's 389 * {rootDir}/mobdir/data/${namespace}/${tableName} 390 * @param rootDir The qualified path of HBase root directory. 391 * @param tableName The name of table. 392 * @return The table dir of the mob file. 393 */ 394 public static Path getMobTableDir(Path rootDir, TableName tableName) { 395 return CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 396 } 397 398 public static Path getMobTableDir(Configuration conf, TableName tableName) { 399 return getMobTableDir(new Path(conf.get(HConstants.HBASE_DIR)), tableName); 400 } 401 402 /** 403 * Gets the region dir of the mob files. It's 404 * {HBASE_DIR}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 405 * @param conf The current configuration. 406 * @param tableName The current table name. 407 * @return The region dir of the mob files. 408 */ 409 public static Path getMobRegionPath(Configuration conf, TableName tableName) { 410 return getMobRegionPath(new Path(conf.get(HConstants.HBASE_DIR)), tableName); 411 } 412 413 /** 414 * Gets the region dir of the mob files under the specified root dir. It's 415 * {rootDir}/mobdir/data/{namespace}/{tableName}/{regionEncodedName}. 416 * @param rootDir The qualified path of HBase root directory. 417 * @param tableName The current table name. 418 * @return The region dir of the mob files. 419 */ 420 public static Path getMobRegionPath(Path rootDir, TableName tableName) { 421 Path tablePath = CommonFSUtils.getTableDir(getMobHome(rootDir), tableName); 422 RegionInfo regionInfo = getMobRegionInfo(tableName); 423 return new Path(tablePath, regionInfo.getEncodedName()); 424 } 425 426 /** 427 * Gets the family dir of the mob files. It's 428 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 429 * @param conf The current configuration. 430 * @param tableName The current table name. 431 * @param familyName The current family name. 432 * @return The family dir of the mob files. 433 */ 434 public static Path getMobFamilyPath(Configuration conf, TableName tableName, String familyName) { 435 return new Path(getMobRegionPath(conf, tableName), familyName); 436 } 437 438 /** 439 * Gets the family dir of the mob files. It's 440 * {HBASE_DIR}/mobdir/{namespace}/{tableName}/{regionEncodedName}/{columnFamilyName}. 441 * @param regionPath The path of mob region which is a dummy one. 442 * @param familyName The current family name. 443 * @return The family dir of the mob files. 444 */ 445 public static Path getMobFamilyPath(Path regionPath, String familyName) { 446 return new Path(regionPath, familyName); 447 } 448 449 /** 450 * Gets the RegionInfo of the mob files. This is a dummy region. The mob files are not saved in a 451 * region in HBase. It's internally used only. 452 * @return A dummy mob region info. 453 */ 454 public static RegionInfo getMobRegionInfo(TableName tableName) { 455 return RegionInfoBuilder.newBuilder(tableName).setStartKey(MobConstants.MOB_REGION_NAME_BYTES) 456 .setEndKey(HConstants.EMPTY_END_ROW).setSplit(false).setRegionId(0).build(); 457 } 458 459 /** 460 * Gets whether the current RegionInfo is a mob one. 461 * @param regionInfo The current RegionInfo. 462 * @return If true, the current RegionInfo is a mob one. 463 */ 464 public static boolean isMobRegionInfo(RegionInfo regionInfo) { 465 return regionInfo == null 466 ? false 467 : getMobRegionInfo(regionInfo.getTable()).getEncodedName() 468 .equals(regionInfo.getEncodedName()); 469 } 470 471 /** 472 * Gets whether the current region name follows the pattern of a mob region name. 473 * @param tableName The current table name. 474 * @param regionName The current region name. 475 * @return True if the current region name follows the pattern of a mob region name. 476 */ 477 public static boolean isMobRegionName(TableName tableName, byte[] regionName) { 478 return Bytes.equals(regionName, getMobRegionInfo(tableName).getRegionName()); 479 } 480 481 /** 482 * Archives the mob files. 483 * @param conf The current configuration. 484 * @param fs The current file system. 485 * @param tableName The table name. 486 * @param tableDir The table directory. 487 * @param family The name of the column family. 488 * @param storeFiles The files to be deleted. 489 */ 490 public static boolean removeMobFiles(Configuration conf, FileSystem fs, TableName tableName, 491 Path tableDir, byte[] family, Collection<HStoreFile> storeFiles) { 492 try { 493 HFileArchiver.archiveStoreFiles(conf, fs, getMobRegionInfo(tableName), tableDir, family, 494 storeFiles); 495 LOG.info("Table {} {} expired mob files are deleted", tableName, storeFiles.size()); 496 return true; 497 } catch (IOException e) { 498 LOG.error("Failed to delete the mob files, table {}", tableName, e); 499 } 500 return false; 501 } 502 503 /** 504 * Creates a mob reference KeyValue. The value of the mob reference KeyValue is mobCellValueSize + 505 * mobFileName. 506 * @param cell The original Cell. 507 * @param fileName The mob file name where the mob reference KeyValue is written. 508 * @param tableNameTag The tag of the current table name. It's very important in cloning the 509 * snapshot. 510 * @return The mob reference KeyValue. 511 */ 512 public static ExtendedCell createMobRefCell(ExtendedCell cell, byte[] fileName, 513 Tag tableNameTag) { 514 // Append the tags to the KeyValue. 515 // The key is same, the value is the filename of the mob file 516 List<Tag> tags = new ArrayList<>(); 517 // Add the ref tag as the 1st one. 518 tags.add(MobConstants.MOB_REF_TAG); 519 // Add the tag of the source table name, this table is where this mob file is flushed 520 // from. 521 // It's very useful in cloning the snapshot. When reading from the cloning table, we need to 522 // find the original mob files by this table name. For details please see cloning 523 // snapshot for mob files. 524 tags.add(tableNameTag); 525 return createMobRefCell(cell, fileName, TagUtil.fromList(tags)); 526 } 527 528 public static ExtendedCell createMobRefCell(ExtendedCell cell, byte[] fileName, 529 byte[] refCellTags) { 530 byte[] refValue = Bytes.add(Bytes.toBytes(cell.getValueLength()), fileName); 531 return PrivateCellUtil.createCell(cell, refValue, TagUtil.concatTags(refCellTags, cell)); 532 } 533 534 /** 535 * Creates a writer for the mob file in temp directory. 536 * @param conf The current configuration. 537 * @param fs The current file system. 538 * @param family The descriptor of the current column family. 539 * @param date The date string, its format is yyyymmmdd. 540 * @param basePath The basic path for a temp directory. 541 * @param maxKeyCount The key count. 542 * @param compression The compression algorithm. 543 * @param startKey The hex string of the start key. 544 * @param cacheConfig The current cache config. 545 * @param cryptoContext The encryption context. 546 * @param isCompaction If the writer is used in compaction. 547 * @return The writer for the mob file. 548 */ 549 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 550 ColumnFamilyDescriptor family, String date, Path basePath, long maxKeyCount, 551 Compression.Algorithm compression, String startKey, CacheConfig cacheConfig, 552 Encryption.Context cryptoContext, boolean isCompaction, String regionName) throws IOException { 553 MobFileName mobFileName = MobFileName.create(startKey, date, 554 UUID.randomUUID().toString().replaceAll("-", ""), regionName); 555 return createWriter(conf, fs, family, mobFileName, basePath, maxKeyCount, compression, 556 cacheConfig, cryptoContext, isCompaction); 557 } 558 559 /** 560 * Creates a writer for the mob file in temp directory. 561 * @param conf The current configuration. 562 * @param fs The current file system. 563 * @param family The descriptor of the current column family. 564 * @param mobFileName The mob file name. 565 * @param basePath The basic path for a temp directory. 566 * @param maxKeyCount The key count. 567 * @param compression The compression algorithm. 568 * @param cacheConfig The current cache config. 569 * @param cryptoContext The encryption context. 570 * @param isCompaction If the writer is used in compaction. 571 * @return The writer for the mob file. 572 */ 573 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 574 ColumnFamilyDescriptor family, MobFileName mobFileName, Path basePath, long maxKeyCount, 575 Compression.Algorithm compression, CacheConfig cacheConfig, Encryption.Context cryptoContext, 576 boolean isCompaction) throws IOException { 577 return createWriter(conf, fs, family, new Path(basePath, mobFileName.getFileName()), 578 maxKeyCount, compression, cacheConfig, cryptoContext, StoreUtils.getChecksumType(conf), 579 StoreUtils.getBytesPerChecksum(conf), family.getBlocksize(), BloomType.NONE, isCompaction); 580 } 581 582 /** 583 * Creates a writer for the mob file in temp directory. 584 * @param conf The current configuration. 585 * @param fs The current file system. 586 * @param family The descriptor of the current column family. 587 * @param path The path for a temp directory. 588 * @param maxKeyCount The key count. 589 * @param compression The compression algorithm. 590 * @param cacheConfig The current cache config. 591 * @param cryptoContext The encryption context. 592 * @param checksumType The checksum type. 593 * @param bytesPerChecksum The bytes per checksum. 594 * @param blocksize The HFile block size. 595 * @param bloomType The bloom filter type. 596 * @param isCompaction If the writer is used in compaction. 597 * @return The writer for the mob file. 598 */ 599 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 600 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 601 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 602 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction) 603 throws IOException { 604 return createWriter(conf, fs, family, path, maxKeyCount, compression, cacheConfig, 605 cryptoContext, checksumType, bytesPerChecksum, blocksize, bloomType, isCompaction, null); 606 } 607 608 /** 609 * Creates a writer for the mob file in temp directory. 610 * @param conf The current configuration. 611 * @param fs The current file system. 612 * @param family The descriptor of the current column family. 613 * @param path The path for a temp directory. 614 * @param maxKeyCount The key count. 615 * @param compression The compression algorithm. 616 * @param cacheConfig The current cache config. 617 * @param cryptoContext The encryption context. 618 * @param checksumType The checksum type. 619 * @param bytesPerChecksum The bytes per checksum. 620 * @param blocksize The HFile block size. 621 * @param bloomType The bloom filter type. 622 * @param isCompaction If the writer is used in compaction. 623 * @param writerCreationTracker to track the current writer in the store 624 * @return The writer for the mob file. 625 */ 626 public static StoreFileWriter createWriter(Configuration conf, FileSystem fs, 627 ColumnFamilyDescriptor family, Path path, long maxKeyCount, Compression.Algorithm compression, 628 CacheConfig cacheConfig, Encryption.Context cryptoContext, ChecksumType checksumType, 629 int bytesPerChecksum, int blocksize, BloomType bloomType, boolean isCompaction, 630 Consumer<Path> writerCreationTracker) throws IOException { 631 if (compression == null) { 632 compression = HFile.DEFAULT_COMPRESSION_ALGORITHM; 633 } 634 final CacheConfig writerCacheConf; 635 if (isCompaction) { 636 writerCacheConf = new CacheConfig(cacheConfig); 637 writerCacheConf.setCacheDataOnWrite(false); 638 } else { 639 writerCacheConf = cacheConfig; 640 } 641 HFileContext hFileContext = new HFileContextBuilder().withCompression(compression) 642 .withIncludesMvcc(true).withIncludesTags(true).withCompressTags(family.isCompressTags()) 643 .withChecksumType(checksumType).withBytesPerCheckSum(bytesPerChecksum) 644 .withBlockSize(blocksize).withHBaseCheckSum(true) 645 .withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(cryptoContext) 646 .withCreateTime(EnvironmentEdgeManager.currentTime()).build(); 647 648 StoreFileWriter w = new StoreFileWriter.Builder(conf, writerCacheConf, fs).withFilePath(path) 649 .withBloomType(bloomType).withMaxKeyCount(maxKeyCount).withFileContext(hFileContext) 650 .withWriterCreationTracker(writerCreationTracker).build(); 651 return w; 652 } 653 654 /** 655 * Indicates whether the current mob ref cell has a valid value. A mob ref cell has a mob 656 * reference tag. The value of a mob ref cell consists of two parts, real mob value length and mob 657 * file name. The real mob value length takes 4 bytes. The remaining part is the mob file name. 658 * @param cell The mob ref cell. 659 * @return True if the cell has a valid value. 660 */ 661 public static boolean hasValidMobRefCellValue(Cell cell) { 662 return cell.getValueLength() > Bytes.SIZEOF_INT; 663 } 664 665 /** 666 * Gets the mob value length from the mob ref cell. A mob ref cell has a mob reference tag. The 667 * value of a mob ref cell consists of two parts, real mob value length and mob file name. The 668 * real mob value length takes 4 bytes. The remaining part is the mob file name. 669 * @param cell The mob ref cell. 670 * @return The real mob value length. 671 */ 672 public static int getMobValueLength(Cell cell) { 673 return PrivateCellUtil.getValueAsInt(cell); 674 } 675 676 /** 677 * Gets the mob file name from the mob ref cell. A mob ref cell has a mob reference tag. The value 678 * of a mob ref cell consists of two parts, real mob value length and mob file name. The real mob 679 * value length takes 4 bytes. The remaining part is the mob file name. 680 * @param cell The mob ref cell. 681 * @return The mob file name. 682 */ 683 public static String getMobFileName(Cell cell) { 684 return Bytes.toString(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_INT, 685 cell.getValueLength() - Bytes.SIZEOF_INT); 686 } 687 688 /** 689 * Checks whether this table has mob-enabled columns. 690 * @param htd The current table descriptor. 691 * @return Whether this table has mob-enabled columns. 692 */ 693 public static boolean hasMobColumns(TableDescriptor htd) { 694 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 695 for (ColumnFamilyDescriptor hcd : hcds) { 696 if (hcd.isMobEnabled()) { 697 return true; 698 } 699 } 700 return false; 701 } 702 703 /** 704 * Get list of Mob column families (if any exists) 705 * @param htd table descriptor 706 * @return list of Mob column families 707 */ 708 public static List<ColumnFamilyDescriptor> getMobColumnFamilies(TableDescriptor htd) { 709 710 List<ColumnFamilyDescriptor> fams = new ArrayList<ColumnFamilyDescriptor>(); 711 ColumnFamilyDescriptor[] hcds = htd.getColumnFamilies(); 712 for (ColumnFamilyDescriptor hcd : hcds) { 713 if (hcd.isMobEnabled()) { 714 fams.add(hcd); 715 } 716 } 717 return fams; 718 } 719 720 /** 721 * Indicates whether return null value when the mob file is missing or corrupt. The information is 722 * set in the attribute "empty.value.on.mobcell.miss" of scan. 723 * @param scan The current scan. 724 * @return True if the readEmptyValueOnMobCellMiss is enabled. 725 */ 726 public static boolean isReadEmptyValueOnMobCellMiss(Scan scan) { 727 byte[] readEmptyValueOnMobCellMiss = 728 scan.getAttribute(MobConstants.EMPTY_VALUE_ON_MOBCELL_MISS); 729 try { 730 return readEmptyValueOnMobCellMiss != null && Bytes.toBoolean(readEmptyValueOnMobCellMiss); 731 } catch (IllegalArgumentException e) { 732 return false; 733 } 734 } 735 736 /** 737 * Checks if the mob file is expired. 738 * @param column The descriptor of the current column family. 739 * @param current The current time. 740 * @param fileDate The date string parsed from the mob file name. 741 * @return True if the mob file is expired. 742 */ 743 public static boolean isMobFileExpired(ColumnFamilyDescriptor column, long current, 744 String fileDate) { 745 if (column.getMinVersions() > 0) { 746 return false; 747 } 748 long timeToLive = column.getTimeToLive(); 749 if (Integer.MAX_VALUE == timeToLive) { 750 return false; 751 } 752 753 Date expireDate = new Date(current - timeToLive * 1000); 754 expireDate = new Date(expireDate.getYear(), expireDate.getMonth(), expireDate.getDate()); 755 try { 756 Date date = parseDate(fileDate); 757 if (date.getTime() < expireDate.getTime()) { 758 return true; 759 } 760 } catch (ParseException e) { 761 LOG.warn("Failed to parse the date " + fileDate, e); 762 return false; 763 } 764 return false; 765 } 766 767 /** 768 * Serialize a set of referenced mob hfiles 769 * @param mobRefSet to serialize, may be null 770 * @return byte array to i.e. put into store file metadata. will not be null 771 */ 772 public static byte[] serializeMobFileRefs(SetMultimap<TableName, String> mobRefSet) { 773 if (mobRefSet != null && mobRefSet.size() > 0) { 774 // Here we rely on the fact that '/' and ',' are not allowed in either table names nor hfile 775 // names for serialization. 776 // 777 // exampleTable/filename1,filename2//example:table/filename5//otherTable/filename3,filename4 778 // 779 // to approximate the needed capacity we use the fact that there will usually be 1 table name 780 // and each mob filename is around 105 bytes. we pick an arbitrary number to cover "most" 781 // single table name lengths 782 StringBuilder sb = new StringBuilder(100 + mobRefSet.size() * 105); 783 boolean doubleSlash = false; 784 for (TableName tableName : mobRefSet.keySet()) { 785 if (doubleSlash) { 786 sb.append("//"); 787 } else { 788 doubleSlash = true; 789 } 790 sb.append(tableName).append("/"); 791 boolean comma = false; 792 for (String refs : mobRefSet.get(tableName)) { 793 if (comma) { 794 sb.append(","); 795 } else { 796 comma = true; 797 } 798 sb.append(refs); 799 } 800 } 801 return Bytes.toBytes(sb.toString()); 802 } else { 803 return HStoreFile.NULL_VALUE; 804 } 805 } 806 807 /** 808 * Deserialize the set of referenced mob hfiles from store file metadata. 809 * @param bytes compatibly serialized data. can not be null 810 * @return a setmultimap of original table to list of hfile names. will be empty if no values. 811 * @throws IllegalStateException if there are values but no table name 812 */ 813 public static ImmutableSetMultimap.Builder<TableName, String> deserializeMobFileRefs(byte[] bytes) 814 throws IllegalStateException { 815 ImmutableSetMultimap.Builder<TableName, String> map = ImmutableSetMultimap.builder(); 816 if (bytes.length > 1) { 817 // TODO avoid turning the tablename pieces in to strings. 818 String s = Bytes.toString(bytes); 819 String[] tables = s.split("//"); 820 for (String tableEnc : tables) { 821 final int delim = tableEnc.indexOf('/'); 822 if (delim <= 0) { 823 throw new IllegalStateException("MOB reference data does not match expected encoding: " 824 + "no table name included before list of mob refs."); 825 } 826 TableName table = TableName.valueOf(tableEnc.substring(0, delim)); 827 String[] refs = tableEnc.substring(delim + 1).split(","); 828 map.putAll(table, refs); 829 } 830 } else { 831 if (LOG.isDebugEnabled()) { 832 // array length 1 should be the NULL_VALUE. 833 if (!Arrays.equals(HStoreFile.NULL_VALUE, bytes)) { 834 LOG.debug( 835 "Serialized MOB file refs array was treated as the placeholder 'no entries' but" 836 + " didn't have the expected placeholder byte. expected={} and actual={}", 837 Arrays.toString(HStoreFile.NULL_VALUE), Arrays.toString(bytes)); 838 } 839 } 840 } 841 return map; 842 } 843}