001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.SortedMap; 035import java.util.TreeMap; 036import java.util.concurrent.ThreadLocalRandom; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.Consistency; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableState; 055import org.apache.hadoop.hbase.client.coprocessor.Batch; 056import org.apache.hadoop.hbase.exceptions.DeserializationException; 057import org.apache.hadoop.hbase.filter.Filter; 058import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 059import org.apache.hadoop.hbase.filter.RowFilter; 060import org.apache.hadoop.hbase.filter.SubstringComparator; 061import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; 062import org.apache.hadoop.hbase.ipc.ServerRpcController; 063import org.apache.hadoop.hbase.master.RegionState; 064import org.apache.hadoop.hbase.master.RegionState.State; 065import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 067import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 068import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 070import org.apache.hadoop.hbase.util.Bytes; 071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 072import org.apache.hadoop.hbase.util.ExceptionUtil; 073import org.apache.hadoop.hbase.util.Pair; 074import org.apache.hadoop.hbase.util.PairOfSameType; 075import org.apache.yetus.audience.InterfaceAudience; 076import org.slf4j.Logger; 077import org.slf4j.LoggerFactory; 078 079import org.apache.hbase.thirdparty.com.google.common.base.Throwables; 080 081/** 082 * <p> 083 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 084 * to <code>hbase:meta</code>. 085 * </p> 086 * <p> 087 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 088 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 089 * (opened before each operation, closed right after), while when used on HM or HRS (like in 090 * AssignmentManager) we want permanent connection. 091 * </p> 092 * <p> 093 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 094 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 095 * called default replica. 096 * </p> 097 * <p> 098 * <h2>Meta layout</h2> 099 * 100 * <pre> 101 * For each table there is single row named for the table with a 'table' column family. 102 * The column family currently has one column in it, the 'state' column: 103 * 104 * table:state => contains table state 105 * 106 * Then for each table range ('Region'), there is a single row, formatted as: 107 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 108 * This row is the serialized regionName of the default region replica. 109 * Columns are: 110 * info:regioninfo => contains serialized HRI for the default region replica 111 * info:server => contains hostname:port (in string form) for the server hosting 112 * the default regionInfo replica 113 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 114 * the regionInfo replica with replicaId 115 * info:serverstartcode => contains server start code (in binary long form) for the server 116 * hosting the default regionInfo replica 117 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 118 * the server hosting the regionInfo replica with 119 * replicaId 120 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 121 * the server opened the region with default replicaId 122 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 123 * at the time the server opened the region with 124 * replicaId 125 * info:splitA => contains a serialized HRI for the first daughter region if the 126 * region is split 127 * info:splitB => contains a serialized HRI for the second daughter region if the 128 * region is split 129 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 130 * or more of these columns in a row. A row that has these columns is 131 * undergoing a merge and is the result of the merge. Columns listed 132 * in marge* columns are the parents of this merged region. Example 133 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 134 * and 'mergeB'. This is old form replaced by the new format that allows 135 * for more than two parents to be merged at a time. 136 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker. 137 * </pre> 138 * </p> 139 * <p> 140 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 141 * leak out of it (through Result objects, etc) 142 * </p> 143 */ 144@InterfaceAudience.Private 145public class MetaTableAccessor { 146 147 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 148 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 149 150 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 151 152 private static final byte ESCAPE_BYTE = (byte) 0xFF; 153 154 private static final byte SEPARATED_BYTE = 0x00; 155 156 @InterfaceAudience.Private 157 @SuppressWarnings("ImmutableEnumChecker") 158 public enum QueryType { 159 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 160 REGION(HConstants.CATALOG_FAMILY), 161 TABLE(HConstants.TABLE_FAMILY), 162 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 163 164 private final byte[][] families; 165 166 QueryType(byte[]... families) { 167 this.families = families; 168 } 169 170 byte[][] getFamilies() { 171 return this.families; 172 } 173 } 174 175 /** The delimiter for meta columns for replicaIds > 0 */ 176 static final char META_REPLICA_ID_DELIMITER = '_'; 177 178 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 179 private static final Pattern SERVER_COLUMN_PATTERN = 180 Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 181 182 //////////////////////// 183 // Reading operations // 184 //////////////////////// 185 186 /** 187 * Performs a full scan of <code>hbase:meta</code> for regions. 188 * @param connection connection we're using 189 * @param visitor Visitor invoked against each row in regions family. 190 */ 191 public static void fullScanRegions(Connection connection, final Visitor visitor) 192 throws IOException { 193 scanMeta(connection, null, null, QueryType.REGION, visitor); 194 } 195 196 /** 197 * Performs a full scan of <code>hbase:meta</code> for regions. 198 * @param connection connection we're using 199 */ 200 public static List<Result> fullScanRegions(Connection connection) throws IOException { 201 return fullScan(connection, QueryType.REGION); 202 } 203 204 /** 205 * Performs a full scan of <code>hbase:meta</code> for tables. 206 * @param connection connection we're using 207 * @param visitor Visitor invoked against each row in tables family. 208 */ 209 public static void fullScanTables(Connection connection, final Visitor visitor) 210 throws IOException { 211 scanMeta(connection, null, null, QueryType.TABLE, visitor); 212 } 213 214 /** 215 * Performs a full scan of <code>hbase:meta</code>. 216 * @param connection connection we're using 217 * @param type scanned part of meta 218 * @return List of {@link Result} 219 */ 220 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 221 CollectAllVisitor v = new CollectAllVisitor(); 222 scanMeta(connection, null, null, type, v); 223 return v.getResults(); 224 } 225 226 /** 227 * Callers should call close on the returned {@link Table} instance. 228 * @param connection connection we're using to access Meta 229 * @return An {@link Table} for <code>hbase:meta</code> 230 */ 231 public static Table getMetaHTable(final Connection connection) throws IOException { 232 // We used to pass whole CatalogTracker in here, now we just pass in Connection 233 if (connection == null) { 234 throw new NullPointerException("No connection"); 235 } else if (connection.isClosed()) { 236 throw new IOException("connection is closed"); 237 } 238 return connection.getTable(TableName.META_TABLE_NAME); 239 } 240 241 /** 242 * @param t Table to use (will be closed when done). 243 * @param g Get to run 244 */ 245 private static Result get(final Table t, final Get g) throws IOException { 246 if (t == null) return null; 247 try { 248 return t.get(g); 249 } finally { 250 t.close(); 251 } 252 } 253 254 /** 255 * Gets the region info and assignment for the specified region. 256 * @param connection connection we're using 257 * @param regionName Region to lookup. 258 * @return Location and RegionInfo for <code>regionName</code> 259 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 260 */ 261 @Deprecated 262 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte[] regionName) 263 throws IOException { 264 HRegionLocation location = getRegionLocation(connection, regionName); 265 return location == null ? null : new Pair<>(location.getRegionInfo(), location.getServerName()); 266 } 267 268 /** 269 * Returns the HRegionLocation from meta for the given region 270 * @param connection connection we're using 271 * @param regionName region we're looking for 272 * @return HRegionLocation for the given region 273 */ 274 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 275 throws IOException { 276 byte[] row = regionName; 277 RegionInfo parsedInfo = null; 278 try { 279 parsedInfo = parseRegionInfoFromRegionName(regionName); 280 row = getMetaKeyForRegion(parsedInfo); 281 } catch (Exception parseEx) { 282 // If it is not a valid regionName(i.e, tableName), it needs to return null here 283 // as querying meta table wont help. 284 return null; 285 } 286 Get get = new Get(row); 287 get.addFamily(HConstants.CATALOG_FAMILY); 288 Result r = get(getMetaHTable(connection), get); 289 RegionLocations locations = getRegionLocations(r); 290 return locations == null 291 ? null 292 : locations.getRegionLocation( 293 parsedInfo == null ? RegionInfo.DEFAULT_REPLICA_ID : parsedInfo.getReplicaId()); 294 } 295 296 /** 297 * Returns the HRegionLocation from meta for the given region 298 * @param connection connection we're using 299 * @param regionInfo region information 300 * @return HRegionLocation for the given region 301 */ 302 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 303 throws IOException { 304 return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), regionInfo, 305 regionInfo.getReplicaId()); 306 } 307 308 /** Returns Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. */ 309 public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri) 310 throws IOException { 311 Get get = new Get(getMetaKeyForRegion(ri)); 312 get.addFamily(HConstants.CATALOG_FAMILY); 313 return get(getMetaHTable(connection), get); 314 } 315 316 /** Returns the row key to use for this regionInfo */ 317 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 318 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 319 } 320 321 /** 322 * Returns an HRI parsed from this regionName. Not all the fields of the HRI is stored in the 323 * name, so the returned object should only be used for the fields in the regionName. 324 */ 325 // This should be moved to RegionInfo? TODO. 326 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 327 byte[][] fields = RegionInfo.parseRegionName(regionName); 328 long regionId = Long.parseLong(Bytes.toString(fields[2])); 329 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 330 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])).setStartKey(fields[1]) 331 .setRegionId(regionId).setReplicaId(replicaId).build(); 332 } 333 334 /** 335 * Gets the result in hbase:meta for the specified region. 336 * @param connection connection we're using 337 * @param regionInfo region we're looking for 338 * @return result of the specified region 339 */ 340 public static Result getRegionResult(Connection connection, RegionInfo regionInfo) 341 throws IOException { 342 Get get = new Get(getMetaKeyForRegion(regionInfo)); 343 get.addFamily(HConstants.CATALOG_FAMILY); 344 return get(getMetaHTable(connection), get); 345 } 346 347 /** 348 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, returning 349 * a single related <code>Result</code> instance if any row is found, null otherwise. 350 * @param connection the connection to query META table. 351 * @param regionEncodedName the region encoded name to look for at META. 352 * @return <code>Result</code> instance with the row related info in META, null otherwise. 353 * @throws IOException if any errors occur while querying META. 354 */ 355 public static Result scanByRegionEncodedName(Connection connection, String regionEncodedName) 356 throws IOException { 357 RowFilter rowFilter = 358 new RowFilter(CompareOperator.EQUAL, new SubstringComparator(regionEncodedName)); 359 Scan scan = getMetaScan(connection.getConfiguration(), 1); 360 scan.setFilter(rowFilter); 361 try (Table table = getMetaHTable(connection); 362 ResultScanner resultScanner = table.getScanner(scan)) { 363 return resultScanner.next(); 364 } 365 } 366 367 /** 368 * Returns Return all regioninfos listed in the 'info:merge*' columns of the {@code regionInfo} 369 * row. 370 */ 371 @Nullable 372 public static List<RegionInfo> getMergeRegions(Connection connection, RegionInfo regionInfo) 373 throws IOException { 374 return getMergeRegions(getRegionResult(connection, regionInfo).rawCells()); 375 } 376 377 /** 378 * Check whether the given {@code regionInfo} has any 'info:merge*' columns. 379 */ 380 public static boolean hasMergeRegions(Connection conn, RegionInfo regionInfo) throws IOException { 381 return hasMergeRegions(getRegionResult(conn, regionInfo).rawCells()); 382 } 383 384 /** 385 * Returns Deserialized values of <qualifier,regioninfo> pairs taken from column values that 386 * match the regex 'info:merge.*' in array of <code>cells</code>. 387 */ 388 @Nullable 389 public static Map<String, RegionInfo> getMergeRegionsWithName(Cell[] cells) { 390 if (cells == null) { 391 return null; 392 } 393 Map<String, RegionInfo> regionsToMerge = null; 394 for (Cell cell : cells) { 395 if (!isMergeQualifierPrefix(cell)) { 396 continue; 397 } 398 // Ok. This cell is that of a info:merge* column. 399 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 400 cell.getValueLength()); 401 if (ri != null) { 402 if (regionsToMerge == null) { 403 regionsToMerge = new LinkedHashMap<>(); 404 } 405 regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri); 406 } 407 } 408 return regionsToMerge; 409 } 410 411 /** 412 * Returns Deserialized regioninfo values taken from column values that match the regex 413 * 'info:merge.*' in array of <code>cells</code>. 414 */ 415 @Nullable 416 public static List<RegionInfo> getMergeRegions(Cell[] cells) { 417 Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells); 418 return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values()); 419 } 420 421 /** 422 * Returns True if any merge regions present in <code>cells</code>; i.e. the column in 423 * <code>cell</code> matches the regex 'info:merge.*'. 424 */ 425 public static boolean hasMergeRegions(Cell[] cells) { 426 for (Cell cell : cells) { 427 if (!isMergeQualifierPrefix(cell)) { 428 continue; 429 } 430 return true; 431 } 432 return false; 433 } 434 435 /** Returns True if the column in <code>cell</code> matches the regex 'info:merge.*'. */ 436 private static boolean isMergeQualifierPrefix(Cell cell) { 437 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 438 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) 439 && PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 440 } 441 442 /** 443 * Lists all of the regions currently in META. 444 * @param connection to connect with 445 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 446 * true and we'll leave out offlined regions from returned list 447 * @return List of all user-space regions. 448 */ 449 public static List<RegionInfo> getAllRegions(Connection connection, 450 boolean excludeOfflinedSplitParents) throws IOException { 451 List<Pair<RegionInfo, ServerName>> result; 452 453 result = getTableRegionsAndLocations(connection, null, excludeOfflinedSplitParents); 454 455 return getListOfRegionInfos(result); 456 457 } 458 459 /** 460 * Gets all of the regions of the specified table. Do not use this method to get meta table 461 * regions, use methods in MetaTableLocator instead. 462 * @param connection connection we're using 463 * @param tableName table we're looking for 464 * @return Ordered list of {@link RegionInfo}. 465 */ 466 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 467 throws IOException { 468 return getTableRegions(connection, tableName, false); 469 } 470 471 /** 472 * Gets all of the regions of the specified table. Do not use this method to get meta table 473 * regions, use methods in MetaTableLocator instead. 474 * @param connection connection we're using 475 * @param tableName table we're looking for 476 * @param excludeOfflinedSplitParents If true, do not include offlined split parents in the 477 * return. 478 * @return Ordered list of {@link RegionInfo}. 479 */ 480 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 481 final boolean excludeOfflinedSplitParents) throws IOException { 482 List<Pair<RegionInfo, ServerName>> result = 483 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 484 return getListOfRegionInfos(result); 485 } 486 487 @SuppressWarnings("MixedMutabilityReturnType") 488 private static List<RegionInfo> 489 getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) { 490 if (pairs == null || pairs.isEmpty()) { 491 return Collections.emptyList(); 492 } 493 List<RegionInfo> result = new ArrayList<>(pairs.size()); 494 for (Pair<RegionInfo, ServerName> pair : pairs) { 495 result.add(pair.getFirst()); 496 } 497 return result; 498 } 499 500 /** 501 * Returns start row for scanning META according to query type 502 */ 503 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 504 if (tableName == null) { 505 return null; 506 } 507 switch (type) { 508 case REGION: 509 byte[] startRow = new byte[tableName.getName().length + 2]; 510 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 511 startRow[startRow.length - 2] = HConstants.DELIMITER; 512 startRow[startRow.length - 1] = HConstants.DELIMITER; 513 return startRow; 514 case ALL: 515 case TABLE: 516 default: 517 return tableName.getName(); 518 } 519 } 520 521 /** 522 * Returns stop row for scanning META according to query type 523 */ 524 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 525 if (tableName == null) { 526 return null; 527 } 528 final byte[] stopRow; 529 switch (type) { 530 case REGION: 531 stopRow = new byte[tableName.getName().length + 3]; 532 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 533 stopRow[stopRow.length - 3] = ' '; 534 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 535 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 536 break; 537 case ALL: 538 case TABLE: 539 default: 540 stopRow = new byte[tableName.getName().length + 1]; 541 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 542 stopRow[stopRow.length - 1] = ' '; 543 break; 544 } 545 return stopRow; 546 } 547 548 /** 549 * This method creates a Scan object that will only scan catalog rows that belong to the specified 550 * table. It doesn't specify any columns. This is a better alternative to just using a start row 551 * and scan until it hits a new table since that requires parsing the HRI to get the table name. 552 * @param tableName bytes of table's name 553 * @return configured Scan object 554 */ 555 public static Scan getScanForTableName(Configuration conf, TableName tableName) { 556 // Start key is just the table name with delimiters 557 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 558 // Stop key appends the smallest possible char to the table name 559 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 560 561 Scan scan = getMetaScan(conf, -1); 562 scan.setStartRow(startKey); 563 scan.setStopRow(stopKey); 564 return scan; 565 } 566 567 private static Scan getMetaScan(Configuration conf, int rowUpperLimit) { 568 Scan scan = new Scan(); 569 int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING, 570 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 571 if (conf.getBoolean(HConstants.USE_META_REPLICAS, HConstants.DEFAULT_USE_META_REPLICAS)) { 572 scan.setConsistency(Consistency.TIMELINE); 573 } 574 if (rowUpperLimit > 0) { 575 scan.setLimit(rowUpperLimit); 576 scan.setReadType(Scan.ReadType.PREAD); 577 } 578 scan.setCaching(scannerCaching); 579 return scan; 580 } 581 582 /** 583 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 584 * @param connection connection we're using 585 * @param tableName table we're looking for 586 * @return Return list of regioninfos and server. 587 */ 588 public static List<Pair<RegionInfo, ServerName>> 589 getTableRegionsAndLocations(Connection connection, TableName tableName) throws IOException { 590 return getTableRegionsAndLocations(connection, tableName, true); 591 } 592 593 /** 594 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 595 * @param connection connection we're using 596 * @param tableName table to work with, can be null for getting all regions 597 * @param excludeOfflinedSplitParents don't return split parents 598 * @return Return list of regioninfos and server addresses. 599 */ 600 // What happens here when 1M regions in hbase:meta? This won't scale? 601 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 602 Connection connection, @Nullable final TableName tableName, 603 final boolean excludeOfflinedSplitParents) throws IOException { 604 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 605 throw new IOException( 606 "This method can't be used to locate meta regions;" + " use MetaTableLocator instead"); 607 } 608 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 609 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 610 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 611 private RegionLocations current = null; 612 613 @Override 614 public boolean visit(Result r) throws IOException { 615 current = getRegionLocations(r); 616 if (current == null || current.getRegionLocation().getRegion() == null) { 617 LOG.warn("No serialized RegionInfo in " + r); 618 return true; 619 } 620 RegionInfo hri = current.getRegionLocation().getRegion(); 621 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 622 // Else call super and add this Result to the collection. 623 return super.visit(r); 624 } 625 626 @Override 627 void add(Result r) { 628 if (current == null) { 629 return; 630 } 631 for (HRegionLocation loc : current.getRegionLocations()) { 632 if (loc != null) { 633 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 634 } 635 } 636 } 637 }; 638 scanMeta(connection, getTableStartRowForMeta(tableName, QueryType.REGION), 639 getTableStopRowForMeta(tableName, QueryType.REGION), QueryType.REGION, visitor); 640 return visitor.getResults(); 641 } 642 643 /** 644 * Get the user regions a given server is hosting. 645 * @param connection connection we're using 646 * @param serverName server whose regions we're interested in 647 * @return List of user regions installed on this server (does not include catalog regions). 648 */ 649 public static NavigableMap<RegionInfo, Result> getServerUserRegions(Connection connection, 650 final ServerName serverName) throws IOException { 651 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 652 // Fill the above hris map with entries from hbase:meta that have the passed 653 // servername. 654 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 655 @Override 656 void add(Result r) { 657 if (r == null || r.isEmpty()) return; 658 RegionLocations locations = getRegionLocations(r); 659 if (locations == null) return; 660 for (HRegionLocation loc : locations.getRegionLocations()) { 661 if (loc != null) { 662 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 663 hris.put(loc.getRegion(), r); 664 } 665 } 666 } 667 } 668 }; 669 scanMeta(connection, null, null, QueryType.REGION, v); 670 return hris; 671 } 672 673 public static void fullScanMetaAndPrint(Connection connection) throws IOException { 674 Visitor v = r -> { 675 if (r == null || r.isEmpty()) { 676 return true; 677 } 678 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 679 TableState state = getTableState(r); 680 if (state != null) { 681 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 682 } else { 683 RegionLocations locations = getRegionLocations(r); 684 if (locations == null) { 685 return true; 686 } 687 for (HRegionLocation loc : locations.getRegionLocations()) { 688 if (loc != null) { 689 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 690 } 691 } 692 } 693 return true; 694 }; 695 scanMeta(connection, null, null, QueryType.ALL, v); 696 } 697 698 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 699 TableName tableName, CatalogReplicaMode metaReplicaMode) throws IOException { 700 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor, metaReplicaMode); 701 } 702 703 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 704 TableName tableName) throws IOException { 705 scanMetaForTableRegions(connection, visitor, tableName, CatalogReplicaMode.NONE); 706 } 707 708 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 709 final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException { 710 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 711 type, null, maxRows, visitor, metaReplicaMode); 712 713 } 714 715 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 716 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 717 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 718 } 719 720 /** 721 * Performs a scan of META table for given table starting from given row. 722 * @param connection connection we're using 723 * @param visitor visitor to call 724 * @param tableName table withing we scan 725 * @param row start scan from this row 726 * @param rowLimit max number of rows to return 727 */ 728 public static void scanMeta(Connection connection, final Visitor visitor, 729 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 730 byte[] startRow = null; 731 byte[] stopRow = null; 732 if (tableName != null) { 733 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 734 if (row != null) { 735 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 736 startRow = 737 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 738 } 739 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 740 } 741 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 742 } 743 744 /** 745 * Performs a scan of META table. 746 * @param connection connection we're using 747 * @param startRow Where to start the scan. Pass null if want to begin scan at first row. 748 * @param stopRow Where to stop the scan. Pass null if want to scan all rows from the start one 749 * @param type scanned part of meta 750 * @param maxRows maximum rows to return 751 * @param visitor Visitor invoked against each row. 752 */ 753 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 754 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 755 throws IOException { 756 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor, CatalogReplicaMode.NONE); 757 } 758 759 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 760 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 761 final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException { 762 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 763 Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit); 764 765 for (byte[] family : type.getFamilies()) { 766 scan.addFamily(family); 767 } 768 if (startRow != null) { 769 scan.withStartRow(startRow); 770 } 771 if (stopRow != null) { 772 scan.withStopRow(stopRow); 773 } 774 if (filter != null) { 775 scan.setFilter(filter); 776 } 777 778 if (LOG.isTraceEnabled()) { 779 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) 780 + " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit 781 + " with caching=" + scan.getCaching()); 782 } 783 784 int currentRow = 0; 785 try (Table metaTable = getMetaHTable(connection)) { 786 switch (metaReplicaMode) { 787 case LOAD_BALANCE: 788 int numOfReplicas = metaTable.getDescriptor().getRegionReplication(); 789 if (numOfReplicas > 1) { 790 int replicaId = ThreadLocalRandom.current().nextInt(numOfReplicas); 791 792 // When the replicaId is 0, do not set to Consistency.TIMELINE 793 if (replicaId > 0) { 794 scan.setReplicaId(replicaId); 795 scan.setConsistency(Consistency.TIMELINE); 796 } 797 } 798 break; 799 case HEDGED_READ: 800 scan.setConsistency(Consistency.TIMELINE); 801 break; 802 default: 803 // Do nothing 804 } 805 try (ResultScanner scanner = metaTable.getScanner(scan)) { 806 Result data; 807 while ((data = scanner.next()) != null) { 808 if (data.isEmpty()) continue; 809 // Break if visit returns false. 810 if (!visitor.visit(data)) break; 811 if (++currentRow >= rowUpperLimit) break; 812 } 813 } 814 } 815 if (visitor instanceof Closeable) { 816 try { 817 ((Closeable) visitor).close(); 818 } catch (Throwable t) { 819 ExceptionUtil.rethrowIfInterrupt(t); 820 LOG.debug("Got exception in closing the meta scanner visitor", t); 821 } 822 } 823 } 824 825 /** Returns Get closest metatable region row to passed <code>row</code> */ 826 @NonNull 827 private static RegionInfo getClosestRegionInfo(Connection connection, 828 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 829 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 830 Scan scan = getMetaScan(connection.getConfiguration(), 1); 831 scan.setReversed(true); 832 scan.withStartRow(searchRow); 833 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 834 Result result = resultScanner.next(); 835 if (result == null) { 836 throw new TableNotFoundException("Cannot find row in META " + " for table: " + tableName 837 + ", row=" + Bytes.toStringBinary(row)); 838 } 839 RegionInfo regionInfo = getRegionInfo(result); 840 if (regionInfo == null) { 841 throw new IOException("RegionInfo was null or empty in Meta for " + tableName + ", row=" 842 + Bytes.toStringBinary(row)); 843 } 844 return regionInfo; 845 } 846 } 847 848 /** 849 * Returns the column family used for meta columns. 850 * @return HConstants.CATALOG_FAMILY. 851 */ 852 public static byte[] getCatalogFamily() { 853 return HConstants.CATALOG_FAMILY; 854 } 855 856 /** 857 * Returns the column family used for table columns. 858 * @return HConstants.TABLE_FAMILY. 859 */ 860 private static byte[] getTableFamily() { 861 return HConstants.TABLE_FAMILY; 862 } 863 864 /** 865 * Returns the column qualifier for serialized region info 866 * @return HConstants.REGIONINFO_QUALIFIER 867 */ 868 public static byte[] getRegionInfoColumn() { 869 return HConstants.REGIONINFO_QUALIFIER; 870 } 871 872 /** 873 * Returns the column qualifier for serialized table state 874 * @return HConstants.TABLE_STATE_QUALIFIER 875 */ 876 private static byte[] getTableStateColumn() { 877 return HConstants.TABLE_STATE_QUALIFIER; 878 } 879 880 /** 881 * Returns the column qualifier for serialized region state 882 * @return HConstants.STATE_QUALIFIER 883 */ 884 private static byte[] getRegionStateColumn() { 885 return HConstants.STATE_QUALIFIER; 886 } 887 888 /** 889 * Returns the column qualifier for serialized region state 890 * @param replicaId the replicaId of the region 891 * @return a byte[] for state qualifier 892 */ 893 public static byte[] getRegionStateColumn(int replicaId) { 894 return replicaId == 0 895 ? HConstants.STATE_QUALIFIER 896 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 897 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 898 } 899 900 /** 901 * Returns the column qualifier for serialized region state 902 * @param replicaId the replicaId of the region 903 * @return a byte[] for sn column qualifier 904 */ 905 public static byte[] getServerNameColumn(int replicaId) { 906 return replicaId == 0 907 ? HConstants.SERVERNAME_QUALIFIER 908 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 909 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 910 } 911 912 /** 913 * Returns the column qualifier for server column for replicaId 914 * @param replicaId the replicaId of the region 915 * @return a byte[] for server column qualifier 916 */ 917 public static byte[] getServerColumn(int replicaId) { 918 return replicaId == 0 919 ? HConstants.SERVER_QUALIFIER 920 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 921 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 922 } 923 924 /** 925 * Returns the column qualifier for server start code column for replicaId 926 * @param replicaId the replicaId of the region 927 * @return a byte[] for server start code column qualifier 928 */ 929 public static byte[] getStartCodeColumn(int replicaId) { 930 return replicaId == 0 931 ? HConstants.STARTCODE_QUALIFIER 932 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 933 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 934 } 935 936 /** 937 * Returns the column qualifier for seqNum column for replicaId 938 * @param replicaId the replicaId of the region 939 * @return a byte[] for seqNum column qualifier 940 */ 941 public static byte[] getSeqNumColumn(int replicaId) { 942 return replicaId == 0 943 ? HConstants.SEQNUM_QUALIFIER 944 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 945 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 946 } 947 948 /** 949 * Parses the replicaId from the server column qualifier. See top of the class javadoc for the 950 * actual meta layout 951 * @param serverColumn the column qualifier 952 * @return an int for the replicaId 953 */ 954 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 955 String serverStr = Bytes.toString(serverColumn); 956 957 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 958 if (matcher.matches() && matcher.groupCount() > 0) { 959 String group = matcher.group(1); 960 if (group != null && group.length() > 0) { 961 return Integer.parseInt(group.substring(1), 16); 962 } else { 963 return 0; 964 } 965 } 966 return -1; 967 } 968 969 /** 970 * Returns a {@link ServerName} from catalog table {@link Result}. 971 * @param r Result to pull from 972 * @return A ServerName instance or null if necessary fields not found or empty. 973 */ 974 @Nullable 975 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 976 public static ServerName getServerName(final Result r, final int replicaId) { 977 byte[] serverColumn = getServerColumn(replicaId); 978 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 979 if (cell == null || cell.getValueLength() == 0) return null; 980 String hostAndPort = 981 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 982 byte[] startcodeColumn = getStartCodeColumn(replicaId); 983 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 984 if (cell == null || cell.getValueLength() == 0) return null; 985 try { 986 return ServerName.valueOf(hostAndPort, 987 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 988 } catch (IllegalArgumentException e) { 989 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 990 return null; 991 } 992 } 993 994 /** 995 * Returns the {@link ServerName} from catalog table {@link Result} where the region is 996 * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)} 997 * if the server is at OPEN state. 998 * @param r Result to pull the transitioning server name from 999 * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} if 1000 * necessary fields not found or empty. 1001 */ 1002 @Nullable 1003 public static ServerName getTargetServerName(final Result r, final int replicaId) { 1004 final Cell cell = 1005 r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId)); 1006 if (cell == null || cell.getValueLength() == 0) { 1007 RegionLocations locations = MetaTableAccessor.getRegionLocations(r); 1008 if (locations != null) { 1009 HRegionLocation location = locations.getRegionLocation(replicaId); 1010 if (location != null) { 1011 return location.getServerName(); 1012 } 1013 } 1014 return null; 1015 } 1016 return ServerName.parseServerName( 1017 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 1018 } 1019 1020 /** 1021 * The latest seqnum that the server writing to meta observed when opening the region. E.g. the 1022 * seqNum when the result of {@link #getServerName(Result, int)} was written. 1023 * @param r Result to pull the seqNum from 1024 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 1025 */ 1026 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 1027 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1028 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1029 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1030 } 1031 1032 /** 1033 * Returns the daughter regions by reading the corresponding columns of the catalog table Result. 1034 * @param data a Result object from the catalog table scan 1035 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1036 */ 1037 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1038 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1039 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1040 return new PairOfSameType<>(splitA, splitB); 1041 } 1042 1043 /** 1044 * Returns an HRegionLocationList extracted from the result. 1045 * @return an HRegionLocationList containing all locations for the region range or null if we 1046 * can't deserialize the result. 1047 */ 1048 @Nullable 1049 public static RegionLocations getRegionLocations(final Result r) { 1050 if (r == null) return null; 1051 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1052 if (regionInfo == null) return null; 1053 1054 List<HRegionLocation> locations = new ArrayList<>(1); 1055 NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyMap = r.getNoVersionMap(); 1056 1057 locations.add(getRegionLocation(r, regionInfo, 0)); 1058 1059 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1060 if (infoMap == null) return new RegionLocations(locations); 1061 1062 // iterate until all serverName columns are seen 1063 int replicaId = 0; 1064 byte[] serverColumn = getServerColumn(replicaId); 1065 SortedMap<byte[], byte[]> serverMap; 1066 serverMap = infoMap.tailMap(serverColumn, false); 1067 1068 if (serverMap.isEmpty()) return new RegionLocations(locations); 1069 1070 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1071 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1072 if (replicaId < 0) { 1073 break; 1074 } 1075 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1076 // In case the region replica is newly created, it's location might be null. We usually do not 1077 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1078 if (location.getServerName() == null) { 1079 locations.add(null); 1080 } else { 1081 locations.add(location); 1082 } 1083 } 1084 1085 return new RegionLocations(locations); 1086 } 1087 1088 /** 1089 * Returns the HRegionLocation parsed from the given meta row Result for the given regionInfo and 1090 * replicaId. The regionInfo can be the default region info for the replica. 1091 * @param r the meta row result 1092 * @param regionInfo RegionInfo for default replica 1093 * @param replicaId the replicaId for the HRegionLocation 1094 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1095 */ 1096 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1097 final int replicaId) { 1098 ServerName serverName = getServerName(r, replicaId); 1099 long seqNum = getSeqNumDuringOpen(r, replicaId); 1100 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1101 return new HRegionLocation(replicaInfo, serverName, seqNum); 1102 } 1103 1104 /** 1105 * Returns RegionInfo object from the column 1106 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog table Result. 1107 * @param data a Result object from the catalog table scan 1108 * @return RegionInfo or null 1109 */ 1110 public static RegionInfo getRegionInfo(Result data) { 1111 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1112 } 1113 1114 /** 1115 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1116 * <code>qualifier</code> of the catalog table result. 1117 * @param r a Result object from the catalog table scan 1118 * @param qualifier Column family qualifier 1119 * @return An RegionInfo instance or null. 1120 */ 1121 @Nullable 1122 public static RegionInfo getRegionInfo(final Result r, byte[] qualifier) { 1123 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1124 if (cell == null) return null; 1125 return RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 1126 cell.getValueLength()); 1127 } 1128 1129 /** 1130 * Fetch table state for given table from META table 1131 * @param conn connection to use 1132 * @param tableName table to fetch state for 1133 */ 1134 @Nullable 1135 public static TableState getTableState(Connection conn, TableName tableName) throws IOException { 1136 if (tableName.equals(TableName.META_TABLE_NAME)) { 1137 return new TableState(tableName, TableState.State.ENABLED); 1138 } 1139 Table metaHTable = getMetaHTable(conn); 1140 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1141 Result result = metaHTable.get(get); 1142 return getTableState(result); 1143 } 1144 1145 /** 1146 * Fetch table states from META table 1147 * @param conn connection to use 1148 * @return map {tableName -> state} 1149 */ 1150 public static Map<TableName, TableState> getTableStates(Connection conn) throws IOException { 1151 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1152 Visitor collector = r -> { 1153 TableState state = getTableState(r); 1154 if (state != null) { 1155 states.put(state.getTableName(), state); 1156 } 1157 return true; 1158 }; 1159 fullScanTables(conn, collector); 1160 return states; 1161 } 1162 1163 /** 1164 * Updates state in META Do not use. For internal use only. 1165 * @param conn connection to use 1166 * @param tableName table to look for 1167 */ 1168 public static void updateTableState(Connection conn, TableName tableName, TableState.State actual) 1169 throws IOException { 1170 updateTableState(conn, new TableState(tableName, actual)); 1171 } 1172 1173 /** 1174 * Decode table state from META Result. Should contain cell from HConstants.TABLE_FAMILY 1175 * @return null if not found 1176 */ 1177 @Nullable 1178 public static TableState getTableState(Result r) throws IOException { 1179 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1180 if (cell == null) { 1181 return null; 1182 } 1183 try { 1184 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1185 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1186 cell.getValueOffset() + cell.getValueLength())); 1187 } catch (DeserializationException e) { 1188 throw new IOException(e); 1189 } 1190 } 1191 1192 /** 1193 * Implementations 'visit' a catalog table row. 1194 */ 1195 public interface Visitor { 1196 /** 1197 * Visit the catalog table row. 1198 * @param r A row from catalog table 1199 * @return True if we are to proceed scanning the table, else false if we are to stop now. 1200 */ 1201 boolean visit(final Result r) throws IOException; 1202 } 1203 1204 /** 1205 * Implementations 'visit' a catalog table row but with close() at the end. 1206 */ 1207 public interface CloseableVisitor extends Visitor, Closeable { 1208 } 1209 1210 /** 1211 * A {@link Visitor} that collects content out of passed {@link Result}. 1212 */ 1213 static abstract class CollectingVisitor<T> implements Visitor { 1214 final List<T> results = new ArrayList<>(); 1215 1216 @Override 1217 public boolean visit(Result r) throws IOException { 1218 if (r != null && !r.isEmpty()) { 1219 add(r); 1220 } 1221 return true; 1222 } 1223 1224 abstract void add(Result r); 1225 1226 /** Returns Collected results; wait till visits complete to collect all possible results */ 1227 List<T> getResults() { 1228 return this.results; 1229 } 1230 } 1231 1232 /** 1233 * Collects all returned. 1234 */ 1235 static class CollectAllVisitor extends CollectingVisitor<Result> { 1236 @Override 1237 void add(Result r) { 1238 this.results.add(r); 1239 } 1240 } 1241 1242 /** 1243 * A Visitor that skips offline regions and split parents 1244 */ 1245 public static abstract class DefaultVisitorBase implements Visitor { 1246 1247 DefaultVisitorBase() { 1248 super(); 1249 } 1250 1251 public abstract boolean visitInternal(Result rowResult) throws IOException; 1252 1253 @Override 1254 public boolean visit(Result rowResult) throws IOException { 1255 RegionInfo info = getRegionInfo(rowResult); 1256 if (info == null) { 1257 return true; 1258 } 1259 1260 // skip over offline and split regions 1261 if (!(info.isOffline() || info.isSplit())) { 1262 return visitInternal(rowResult); 1263 } 1264 return true; 1265 } 1266 } 1267 1268 /** 1269 * A Visitor for a table. Provides a consistent view of the table's hbase:meta entries during 1270 * concurrent splits (see HBASE-5986 for details). This class does not guarantee ordered traversal 1271 * of meta entries, and can block until the hbase:meta entries for daughters are available during 1272 * splits. 1273 */ 1274 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1275 private TableName tableName; 1276 1277 public TableVisitorBase(TableName tableName) { 1278 super(); 1279 this.tableName = tableName; 1280 } 1281 1282 @Override 1283 public final boolean visit(Result rowResult) throws IOException { 1284 RegionInfo info = getRegionInfo(rowResult); 1285 if (info == null) { 1286 return true; 1287 } 1288 if (!info.getTable().equals(tableName)) { 1289 return false; 1290 } 1291 return super.visit(rowResult); 1292 } 1293 } 1294 1295 //////////////////////// 1296 // Editing operations // 1297 //////////////////////// 1298 1299 /** 1300 * Generates and returns a {@link Put} containing the {@link RegionInfo} for the catalog table. 1301 * @throws IllegalArgumentException when the provided RegionInfo is not the default replica. 1302 */ 1303 public static Put makePutFromRegionInfo(RegionInfo regionInfo) throws IOException { 1304 return makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1305 } 1306 1307 /** 1308 * Generates and returns a {@link Put} containing the {@link RegionInfo} for the catalog table. 1309 * @throws IllegalArgumentException when the provided RegionInfo is not the default replica. 1310 */ 1311 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1312 return addRegionInfo(new Put(getMetaKeyForRegion(regionInfo), ts), regionInfo); 1313 } 1314 1315 /** 1316 * Generates and returns a Delete containing the region info for the catalog table 1317 */ 1318 public static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1319 if (regionInfo == null) { 1320 throw new IllegalArgumentException("Can't make a delete for null region"); 1321 } 1322 if (regionInfo.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 1323 throw new IllegalArgumentException( 1324 "Can't make delete for a replica region. Operate on the primary"); 1325 } 1326 Delete delete = new Delete(getMetaKeyForRegion(regionInfo)); 1327 delete.addFamily(getCatalogFamily(), ts); 1328 return delete; 1329 } 1330 1331 /** 1332 * Adds split daughters to the Put 1333 */ 1334 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1335 throws IOException { 1336 if (splitA != null) { 1337 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1338 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITA_QUALIFIER) 1339 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1340 .setValue(RegionInfo.toByteArray(splitA)).build()); 1341 } 1342 if (splitB != null) { 1343 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1344 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITB_QUALIFIER) 1345 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1346 .setValue(RegionInfo.toByteArray(splitB)).build()); 1347 } 1348 return put; 1349 } 1350 1351 /** 1352 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1353 * @param connection connection we're using 1354 * @param p Put to add to hbase:meta 1355 */ 1356 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1357 try (Table table = getMetaHTable(connection)) { 1358 put(table, p); 1359 } 1360 } 1361 1362 /** 1363 * @param t Table to use 1364 * @param p put to make 1365 */ 1366 private static void put(Table t, Put p) throws IOException { 1367 debugLogMutation(p); 1368 t.put(p); 1369 } 1370 1371 /** 1372 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1373 * @param connection connection we're using 1374 * @param ps Put to add to hbase:meta 1375 */ 1376 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1377 throws IOException { 1378 if (ps.isEmpty()) { 1379 return; 1380 } 1381 try (Table t = getMetaHTable(connection)) { 1382 debugLogMutations(ps); 1383 // the implementation for putting a single Put is much simpler so here we do a check first. 1384 if (ps.size() == 1) { 1385 t.put(ps.get(0)); 1386 } else { 1387 t.put(ps); 1388 } 1389 } 1390 } 1391 1392 /** 1393 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1394 * @param connection connection we're using 1395 * @param d Delete to add to hbase:meta 1396 */ 1397 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1398 throws IOException { 1399 List<Delete> dels = new ArrayList<>(1); 1400 dels.add(d); 1401 deleteFromMetaTable(connection, dels); 1402 } 1403 1404 /** 1405 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1406 * @param connection connection we're using 1407 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1408 */ 1409 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1410 throws IOException { 1411 try (Table t = getMetaHTable(connection)) { 1412 debugLogMutations(deletes); 1413 t.delete(deletes); 1414 } 1415 } 1416 1417 /** 1418 * Set the column value corresponding to this {@code replicaId}'s {@link RegionState} to the 1419 * provided {@code state}. Mutates the provided {@link Put}. 1420 */ 1421 private static Put addRegionStateToPut(Put put, int replicaId, RegionState.State state) 1422 throws IOException { 1423 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1424 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getRegionStateColumn(replicaId)) 1425 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name())) 1426 .build()); 1427 return put; 1428 } 1429 1430 /** 1431 * Update state column in hbase:meta. 1432 */ 1433 public static void updateRegionState(Connection connection, RegionInfo ri, 1434 RegionState.State state) throws IOException { 1435 final Put put = makePutFromRegionInfo(ri); 1436 addRegionStateToPut(put, ri.getReplicaId(), state); 1437 putsToMetaTable(connection, Collections.singletonList(put)); 1438 } 1439 1440 /** 1441 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1442 * add its daughter's as different rows, but adds information about the daughters in the same row 1443 * as the parent. Use 1444 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if 1445 * you want to do that. 1446 * @param connection connection we're using 1447 * @param regionInfo RegionInfo of parent region 1448 * @param splitA first split daughter of the parent regionInfo 1449 * @param splitB second split daughter of the parent regionInfo 1450 * @throws IOException if problem connecting or updating meta 1451 */ 1452 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1453 RegionInfo splitA, RegionInfo splitB) throws IOException { 1454 try (Table meta = getMetaHTable(connection)) { 1455 Put put = makePutFromRegionInfo(regionInfo); 1456 addDaughtersToPut(put, splitA, splitB); 1457 meta.put(put); 1458 debugLogMutation(put); 1459 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1460 } 1461 } 1462 1463 /** 1464 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1465 * does not add its daughter's as different rows, but adds information about the daughters in the 1466 * same row as the parent. Use 1467 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if 1468 * you want to do that. 1469 * @param connection connection we're using 1470 * @param regionInfo region information 1471 * @throws IOException if problem connecting or updating meta 1472 */ 1473 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1474 throws IOException { 1475 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1476 } 1477 1478 /** 1479 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is 1480 * CLOSED. 1481 * @param connection connection we're using 1482 * @param regionInfos region information list 1483 * @throws IOException if problem connecting or updating meta 1484 */ 1485 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1486 int regionReplication) throws IOException { 1487 addRegionsToMeta(connection, regionInfos, regionReplication, 1488 EnvironmentEdgeManager.currentTime()); 1489 } 1490 1491 /** 1492 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is 1493 * CLOSED. 1494 * @param connection connection we're using 1495 * @param regionInfos region information list 1496 * @param ts desired timestamp 1497 * @throws IOException if problem connecting or updating meta 1498 */ 1499 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1500 int regionReplication, long ts) throws IOException { 1501 List<Put> puts = new ArrayList<>(); 1502 for (RegionInfo regionInfo : regionInfos) { 1503 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1504 Put put = makePutFromRegionInfo(regionInfo, ts); 1505 // New regions are added with initial state of CLOSED. 1506 addRegionStateToPut(put, regionInfo.getReplicaId(), RegionState.State.CLOSED); 1507 // Add empty locations for region replicas so that number of replicas can be cached 1508 // whenever the primary region is looked up from meta 1509 for (int i = 1; i < regionReplication; i++) { 1510 addEmptyLocation(put, i); 1511 } 1512 puts.add(put); 1513 } 1514 } 1515 putsToMetaTable(connection, puts); 1516 LOG.info("Added {} regions to meta.", puts.size()); 1517 } 1518 1519 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1520 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1521 int max = mergeRegions.size(); 1522 if (max > limit) { 1523 // Should never happen!!!!! But just in case. 1524 throw new RuntimeException( 1525 "Can't merge " + max + " regions in one go; " + limit + " is upper-limit."); 1526 } 1527 int counter = 0; 1528 for (RegionInfo ri : mergeRegions) { 1529 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1530 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1531 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier)) 1532 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1533 .setValue(RegionInfo.toByteArray(ri)).build()); 1534 } 1535 return put; 1536 } 1537 1538 /** 1539 * Merge regions into one in an atomic operation. Deletes the merging regions in hbase:meta and 1540 * adds the merged region. 1541 * @param connection connection we're using 1542 * @param mergedRegion the merged region 1543 * @param parentSeqNum Parent regions to merge and their next open sequence id used by serial 1544 * replication. Set to -1 if not needed by this table. 1545 * @param sn the location of the region 1546 */ 1547 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1548 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) throws IOException { 1549 try (Table meta = getMetaHTable(connection)) { 1550 long time = HConstants.LATEST_TIMESTAMP; 1551 List<Mutation> mutations = new ArrayList<>(); 1552 List<RegionInfo> replicationParents = new ArrayList<>(); 1553 for (Map.Entry<RegionInfo, Long> e : parentSeqNum.entrySet()) { 1554 RegionInfo ri = e.getKey(); 1555 long seqNum = e.getValue(); 1556 // Deletes for merging regions 1557 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1558 if (seqNum > 0) { 1559 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1560 replicationParents.add(ri); 1561 } 1562 } 1563 // Put for parent 1564 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1565 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1566 // Set initial state to CLOSED. 1567 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1568 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1569 // master tries to assign this offline region. This is followed by re-assignments of the 1570 // merged region from resumed {@link MergeTableRegionsProcedure} 1571 addRegionStateToPut(putOfMerged, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED); 1572 mutations.add(putOfMerged); 1573 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1574 // if crash after merge happened but before we got to here.. means in-memory 1575 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1576 // assign the merged region later. 1577 if (sn != null) { 1578 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1579 } 1580 1581 // Add empty locations for region replicas of the merged region so that number of replicas 1582 // can be cached whenever the primary region is looked up from meta 1583 for (int i = 1; i < regionReplication; i++) { 1584 addEmptyLocation(putOfMerged, i); 1585 } 1586 // add parent reference for serial replication 1587 if (!replicationParents.isEmpty()) { 1588 addReplicationParent(putOfMerged, replicationParents); 1589 } 1590 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1591 multiMutate(meta, tableRow, mutations); 1592 } 1593 } 1594 1595 /** 1596 * Splits the region into two in an atomic operation. Offlines the parent region with the 1597 * information that it is split into two, and also adds the daughter regions. Does not add the 1598 * location information to the daughter regions since they are not open yet. 1599 * @param connection connection we're using 1600 * @param parent the parent region which is split 1601 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1602 * replication. -1 if not necessary. 1603 * @param splitA Split daughter region A 1604 * @param splitB Split daughter region B 1605 * @param sn the location of the region 1606 */ 1607 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1608 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) throws IOException { 1609 try (Table meta = getMetaHTable(connection)) { 1610 long time = EnvironmentEdgeManager.currentTime(); 1611 // Put for parent 1612 Put putParent = makePutFromRegionInfo( 1613 RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time); 1614 addDaughtersToPut(putParent, splitA, splitB); 1615 1616 // Puts for daughters 1617 Put putA = makePutFromRegionInfo(splitA, time); 1618 Put putB = makePutFromRegionInfo(splitB, time); 1619 if (parentOpenSeqNum > 0) { 1620 addReplicationBarrier(putParent, parentOpenSeqNum); 1621 addReplicationParent(putA, Collections.singletonList(parent)); 1622 addReplicationParent(putB, Collections.singletonList(parent)); 1623 } 1624 // Set initial state to CLOSED 1625 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1626 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1627 // master tries to assign these offline regions. This is followed by re-assignments of the 1628 // daughter regions from resumed {@link SplitTableRegionProcedure} 1629 addRegionStateToPut(putA, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED); 1630 addRegionStateToPut(putB, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED); 1631 1632 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1633 addSequenceNum(putB, 1, splitB.getReplicaId()); 1634 1635 // Add empty locations for region replicas of daughters so that number of replicas can be 1636 // cached whenever the primary region is looked up from meta 1637 for (int i = 1; i < regionReplication; i++) { 1638 addEmptyLocation(putA, i); 1639 addEmptyLocation(putB, i); 1640 } 1641 1642 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1643 multiMutate(meta, tableRow, putParent, putA, putB); 1644 } 1645 } 1646 1647 /** 1648 * Update state of the table in meta. 1649 * @param connection what we use for update 1650 * @param state new state 1651 */ 1652 private static void updateTableState(Connection connection, TableState state) throws IOException { 1653 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1654 putToMetaTable(connection, put); 1655 LOG.info("Updated {} in hbase:meta", state); 1656 } 1657 1658 /** 1659 * Construct PUT for given state 1660 * @param state new state 1661 */ 1662 public static Put makePutFromTableState(TableState state, long ts) { 1663 Put put = new Put(state.getTableName().getName(), ts); 1664 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1665 return put; 1666 } 1667 1668 /** 1669 * Remove state for table from meta 1670 * @param connection to use for deletion 1671 * @param table to delete state for 1672 */ 1673 public static void deleteTableState(Connection connection, TableName table) throws IOException { 1674 long time = EnvironmentEdgeManager.currentTime(); 1675 Delete delete = new Delete(table.getName()); 1676 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1677 deleteFromMetaTable(connection, delete); 1678 LOG.info("Deleted table " + table + " state from META"); 1679 } 1680 1681 private static void multiMutate(Table table, byte[] row, Mutation... mutations) 1682 throws IOException { 1683 multiMutate(table, row, Arrays.asList(mutations)); 1684 } 1685 1686 /** 1687 * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge 1688 * and split as these want to make atomic mutations across multiple rows. 1689 * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE. 1690 */ 1691 static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations) 1692 throws IOException { 1693 debugLogMutations(mutations); 1694 Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> { 1695 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1696 for (Mutation mutation : mutations) { 1697 if (mutation instanceof Put) { 1698 builder.addMutationRequest( 1699 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 1700 } else if (mutation instanceof Delete) { 1701 builder.addMutationRequest( 1702 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1703 } else { 1704 throw new DoNotRetryIOException( 1705 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 1706 } 1707 } 1708 ServerRpcController controller = new ServerRpcController(); 1709 CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback = 1710 new CoprocessorRpcUtils.BlockingRpcCallback<>(); 1711 instance.mutateRows(controller, builder.build(), rpcCallback); 1712 MutateRowsResponse resp = rpcCallback.get(); 1713 if (controller.failedOnException()) { 1714 throw controller.getFailedOn(); 1715 } 1716 return resp; 1717 }; 1718 try { 1719 table.coprocessorService(MultiRowMutationService.class, row, row, callable); 1720 } catch (Throwable e) { 1721 // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g. 1722 // a RejectedExecutionException because the hosting exception is shutting down. 1723 // This is old behavior worth reexamining. Procedures doing merge or split 1724 // currently don't handle RuntimeExceptions coming up out of meta table edits. 1725 // Would have to work on this at least. See HBASE-23904. 1726 Throwables.throwIfInstanceOf(e, IOException.class); 1727 throw new IOException(e); 1728 } 1729 } 1730 1731 /** 1732 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1733 * and startcode. 1734 * <p> 1735 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1736 * edits to that region. 1737 * @param connection connection we're using 1738 * @param regionInfo region to update location of 1739 * @param openSeqNum the latest sequence number obtained when the region was open 1740 * @param sn Server name 1741 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1742 */ 1743 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1744 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1745 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1746 } 1747 1748 /** 1749 * Updates the location of the specified region to be the specified server. 1750 * <p> 1751 * Connects to the specified server which should be hosting the specified catalog region name to 1752 * perform the edit. 1753 * @param connection connection we're using 1754 * @param regionInfo region to update location of 1755 * @param sn Server name 1756 * @param openSeqNum the latest sequence number obtained when the region was open 1757 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1758 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1759 * is down on other end. 1760 */ 1761 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1762 long openSeqNum, long masterSystemTime) throws IOException { 1763 // region replicas are kept in the primary region's row 1764 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1765 addRegionInfo(put, regionInfo); 1766 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1767 putToMetaTable(connection, put); 1768 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1769 } 1770 1771 /** 1772 * Deletes the specified region from META. 1773 * @param connection connection we're using 1774 * @param regionInfo region to be deleted from META 1775 */ 1776 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1777 throws IOException { 1778 Delete delete = new Delete(regionInfo.getRegionName()); 1779 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1780 deleteFromMetaTable(connection, delete); 1781 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1782 } 1783 1784 /** 1785 * Deletes the specified regions from META. 1786 * @param connection connection we're using 1787 * @param regionsInfo list of regions to be deleted from META 1788 */ 1789 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1790 throws IOException { 1791 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1792 } 1793 1794 /** 1795 * Deletes the specified regions from META. 1796 * @param connection connection we're using 1797 * @param regionsInfo list of regions to be deleted from META 1798 */ 1799 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1800 long ts) throws IOException { 1801 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1802 for (RegionInfo hri : regionsInfo) { 1803 Delete e = new Delete(hri.getRegionName()); 1804 e.addFamily(getCatalogFamily(), ts); 1805 deletes.add(e); 1806 } 1807 deleteFromMetaTable(connection, deletes); 1808 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1809 LOG.debug("Deleted regions: {}", regionsInfo); 1810 } 1811 1812 /** 1813 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1814 * adds new ones. Regions added back have state CLOSED. 1815 * @param connection connection we're using 1816 * @param regionInfos list of regions to be added to META 1817 */ 1818 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1819 int regionReplication) throws IOException { 1820 // use master time for delete marker and the Put 1821 long now = EnvironmentEdgeManager.currentTime(); 1822 deleteRegionInfos(connection, regionInfos, now); 1823 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1824 // eclipse the following puts, that might happen in the same ts from the server. 1825 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1826 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1827 // 1828 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1829 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1830 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1831 LOG.debug("Overwritten regions: {} ", regionInfos); 1832 } 1833 1834 /** 1835 * Deletes merge qualifiers for the specified merge region. 1836 * @param connection connection we're using 1837 * @param mergeRegion the merged region 1838 */ 1839 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1840 throws IOException { 1841 Delete delete = new Delete(mergeRegion.getRegionName()); 1842 // NOTE: We are doing a new hbase:meta read here. 1843 Cell[] cells = getRegionResult(connection, mergeRegion).rawCells(); 1844 if (cells == null || cells.length == 0) { 1845 return; 1846 } 1847 List<byte[]> qualifiers = new ArrayList<>(); 1848 for (Cell cell : cells) { 1849 if (!isMergeQualifierPrefix(cell)) { 1850 continue; 1851 } 1852 byte[] qualifier = CellUtil.cloneQualifier(cell); 1853 qualifiers.add(qualifier); 1854 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1855 } 1856 1857 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 1858 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 1859 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 1860 if (qualifiers.isEmpty()) { 1861 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() 1862 + " in meta table, they are cleaned up already, Skip."); 1863 return; 1864 } 1865 1866 deleteFromMetaTable(connection, delete); 1867 LOG.info( 1868 "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " 1869 + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))); 1870 } 1871 1872 public static Put addRegionInfo(final Put p, final RegionInfo hri) throws IOException { 1873 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 1874 .setFamily(getCatalogFamily()).setQualifier(HConstants.REGIONINFO_QUALIFIER) 1875 .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put) 1876 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1877 // shows an info:regioninfo value with encoded name and region 1878 // name that differs from that of the hbase;meta row. 1879 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1880 .build()); 1881 return p; 1882 } 1883 1884 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1885 throws IOException { 1886 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1887 return p 1888 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1889 .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp()) 1890 .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getAddress().toString())).build()) 1891 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1892 .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp()) 1893 .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getStartcode())).build()) 1894 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1895 .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 1896 .setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)).build()); 1897 } 1898 1899 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1900 for (byte b : regionName) { 1901 if (b == ESCAPE_BYTE) { 1902 out.write(ESCAPE_BYTE); 1903 } 1904 out.write(b); 1905 } 1906 } 1907 1908 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1909 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1910 Iterator<RegionInfo> iter = parents.iterator(); 1911 writeRegionName(bos, iter.next().getRegionName()); 1912 while (iter.hasNext()) { 1913 bos.write(ESCAPE_BYTE); 1914 bos.write(SEPARATED_BYTE); 1915 writeRegionName(bos, iter.next().getRegionName()); 1916 } 1917 return bos.toByteArray(); 1918 } 1919 1920 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1921 List<byte[]> parents = new ArrayList<>(); 1922 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1923 for (int i = 0; i < bytes.length; i++) { 1924 if (bytes[i] == ESCAPE_BYTE) { 1925 i++; 1926 if (bytes[i] == SEPARATED_BYTE) { 1927 parents.add(bos.toByteArray()); 1928 bos.reset(); 1929 continue; 1930 } 1931 // fall through to append the byte 1932 } 1933 bos.write(bytes[i]); 1934 } 1935 if (bos.size() > 0) { 1936 parents.add(bos.toByteArray()); 1937 } 1938 return parents; 1939 } 1940 1941 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1942 byte[] value = getParentsBytes(parents); 1943 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1944 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1945 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(value).build()); 1946 } 1947 1948 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 1949 throws IOException { 1950 Put put = new Put(regionInfo.getRegionName(), ts); 1951 addReplicationBarrier(put, openSeqNum); 1952 return put; 1953 } 1954 1955 /** 1956 * See class comment on SerialReplicationChecker 1957 */ 1958 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 1959 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1960 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(HConstants.SEQNUM_QUALIFIER) 1961 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)) 1962 .build()); 1963 } 1964 1965 public static Put addEmptyLocation(Put p, int replicaId) throws IOException { 1966 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1967 return p 1968 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1969 .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp()) 1970 .setType(Cell.Type.Put).build()) 1971 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1972 .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp()) 1973 .setType(Cell.Type.Put).build()) 1974 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1975 .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 1976 .setType(Cell.Type.Put).build()); 1977 } 1978 1979 public static final class ReplicationBarrierResult { 1980 private final long[] barriers; 1981 private final RegionState.State state; 1982 private final List<byte[]> parentRegionNames; 1983 1984 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 1985 this.barriers = barriers; 1986 this.state = state; 1987 this.parentRegionNames = parentRegionNames; 1988 } 1989 1990 public long[] getBarriers() { 1991 return barriers; 1992 } 1993 1994 public RegionState.State getState() { 1995 return state; 1996 } 1997 1998 public List<byte[]> getParentRegionNames() { 1999 return parentRegionNames; 2000 } 2001 2002 @Override 2003 public String toString() { 2004 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + state 2005 + ", parentRegionNames=" 2006 + parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) 2007 + "]"; 2008 } 2009 } 2010 2011 private static long getReplicationBarrier(Cell c) { 2012 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2013 } 2014 2015 public static long[] getReplicationBarriers(Result result) { 2016 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2017 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2018 } 2019 2020 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2021 long[] barriers = getReplicationBarriers(result); 2022 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2023 RegionState.State state = 2024 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2025 byte[] parentRegionsBytes = 2026 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2027 List<byte[]> parentRegionNames = 2028 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2029 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2030 } 2031 2032 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2033 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2034 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2035 byte[] metaStopKey = 2036 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2037 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2038 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2039 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2040 .setCaching(10); 2041 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2042 for (Result result;;) { 2043 result = scanner.next(); 2044 if (result == null) { 2045 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2046 } 2047 byte[] regionName = result.getRow(); 2048 // TODO: we may look up a region which has already been split or merged so we need to check 2049 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2050 // record for the given region, for now it will scan to the end of the table. 2051 if ( 2052 !Bytes.equals(encodedRegionName, Bytes.toBytes(RegionInfo.encodeRegionName(regionName))) 2053 ) { 2054 continue; 2055 } 2056 return getReplicationBarrierResult(result); 2057 } 2058 } 2059 } 2060 2061 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2062 throws IOException { 2063 try (Table table = getMetaHTable(conn)) { 2064 Result result = table.get(new Get(regionName) 2065 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2066 .readAllVersions()); 2067 return getReplicationBarriers(result); 2068 } 2069 } 2070 2071 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2072 TableName tableName) throws IOException { 2073 List<Pair<String, Long>> list = new ArrayList<>(); 2074 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2075 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2076 byte[] value = 2077 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2078 if (value == null) { 2079 return true; 2080 } 2081 long lastBarrier = Bytes.toLong(value); 2082 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2083 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2084 return true; 2085 }); 2086 return list; 2087 } 2088 2089 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2090 TableName tableName) throws IOException { 2091 List<String> list = new ArrayList<>(); 2092 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2093 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2094 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2095 list.add(RegionInfo.encodeRegionName(r.getRow())); 2096 return true; 2097 }, CatalogReplicaMode.NONE); 2098 return list; 2099 } 2100 2101 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2102 if (!METALOG.isDebugEnabled()) { 2103 return; 2104 } 2105 // Logging each mutation in separate line makes it easier to see diff between them visually 2106 // because of common starting indentation. 2107 for (Mutation mutation : mutations) { 2108 debugLogMutation(mutation); 2109 } 2110 } 2111 2112 private static void debugLogMutation(Mutation p) throws IOException { 2113 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2114 } 2115 2116 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2117 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 2118 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getSeqNumColumn(replicaId)) 2119 .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)) 2120 .build()); 2121 } 2122}