001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import edu.umd.cs.findbugs.annotations.NonNull;
021import edu.umd.cs.findbugs.annotations.Nullable;
022import java.io.ByteArrayOutputStream;
023import java.io.Closeable;
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Iterator;
030import java.util.LinkedHashMap;
031import java.util.List;
032import java.util.Map;
033import java.util.NavigableMap;
034import java.util.SortedMap;
035import java.util.TreeMap;
036import java.util.concurrent.ThreadLocalRandom;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039import java.util.stream.Collectors;
040import org.apache.hadoop.conf.Configuration;
041import org.apache.hadoop.hbase.client.Connection;
042import org.apache.hadoop.hbase.client.Consistency;
043import org.apache.hadoop.hbase.client.Delete;
044import org.apache.hadoop.hbase.client.Get;
045import org.apache.hadoop.hbase.client.Mutation;
046import org.apache.hadoop.hbase.client.Put;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.RegionReplicaUtil;
050import org.apache.hadoop.hbase.client.Result;
051import org.apache.hadoop.hbase.client.ResultScanner;
052import org.apache.hadoop.hbase.client.Scan;
053import org.apache.hadoop.hbase.client.Table;
054import org.apache.hadoop.hbase.client.TableState;
055import org.apache.hadoop.hbase.client.coprocessor.Batch;
056import org.apache.hadoop.hbase.exceptions.DeserializationException;
057import org.apache.hadoop.hbase.filter.Filter;
058import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
059import org.apache.hadoop.hbase.filter.RowFilter;
060import org.apache.hadoop.hbase.filter.SubstringComparator;
061import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
062import org.apache.hadoop.hbase.ipc.ServerRpcController;
063import org.apache.hadoop.hbase.master.RegionState;
064import org.apache.hadoop.hbase.master.RegionState.State;
065import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
067import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService;
068import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest;
069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse;
070import org.apache.hadoop.hbase.util.Bytes;
071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
072import org.apache.hadoop.hbase.util.ExceptionUtil;
073import org.apache.hadoop.hbase.util.Pair;
074import org.apache.hadoop.hbase.util.PairOfSameType;
075import org.apache.yetus.audience.InterfaceAudience;
076import org.slf4j.Logger;
077import org.slf4j.LoggerFactory;
078
079import org.apache.hbase.thirdparty.com.google.common.base.Throwables;
080
081/**
082 * <p>
083 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored
084 * to <code>hbase:meta</code>.
085 * </p>
086 * <p>
087 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is
088 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection
089 * (opened before each operation, closed right after), while when used on HM or HRS (like in
090 * AssignmentManager) we want permanent connection.
091 * </p>
092 * <p>
093 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table
094 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is
095 * called default replica.
096 * </p>
097 * <p>
098 * <h2>Meta layout</h2>
099 *
100 * <pre>
101 * For each table there is single row named for the table with a 'table' column family.
102 * The column family currently has one column in it, the 'state' column:
103 *
104 * table:state             => contains table state
105 *
106 * Then for each table range ('Region'), there is a single row, formatted as:
107 * &lt;tableName&gt;,&lt;startKey&gt;,&lt;regionId&gt;,&lt;encodedRegionName&gt;.
108 * This row is the serialized regionName of the default region replica.
109 * Columns are:
110 * info:regioninfo         => contains serialized HRI for the default region replica
111 * info:server             => contains hostname:port (in string form) for the server hosting
112 *                            the default regionInfo replica
113 * info:server_&lt;replicaId&gt => contains hostname:port (in string form) for the server hosting
114 *                                 the regionInfo replica with replicaId
115 * info:serverstartcode    => contains server start code (in binary long form) for the server
116 *                            hosting the default regionInfo replica
117 * info:serverstartcode_&lt;replicaId&gt => contains server start code (in binary long form) for
118 *                                          the server hosting the regionInfo replica with
119 *                                          replicaId
120 * info:seqnumDuringOpen   => contains seqNum (in binary long form) for the region at the time
121 *                             the server opened the region with default replicaId
122 * info:seqnumDuringOpen_&lt;replicaId&gt => contains seqNum (in binary long form) for the region
123 *                                           at the time the server opened the region with
124 *                                           replicaId
125 * info:splitA             => contains a serialized HRI for the first daughter region if the
126 *                             region is split
127 * info:splitB             => contains a serialized HRI for the second daughter region if the
128 *                             region is split
129 * info:merge*             => contains a serialized HRI for a merge parent region. There will be two
130 *                             or more of these columns in a row. A row that has these columns is
131 *                             undergoing a merge and is the result of the merge. Columns listed
132 *                             in marge* columns are the parents of this merged region. Example
133 *                             columns: info:merge0001, info:merge0002. You make also see 'mergeA',
134 *                             and 'mergeB'. This is old form replaced by the new format that allows
135 *                             for more than two parents to be merged at a time.
136 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker.
137 * </pre>
138 * </p>
139 * <p>
140 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not
141 * leak out of it (through Result objects, etc)
142 * </p>
143 */
144@InterfaceAudience.Private
145public class MetaTableAccessor {
146
147  private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class);
148  private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META");
149
150  public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent");
151
152  private static final byte ESCAPE_BYTE = (byte) 0xFF;
153
154  private static final byte SEPARATED_BYTE = 0x00;
155
156  @InterfaceAudience.Private
157  @SuppressWarnings("ImmutableEnumChecker")
158  public enum QueryType {
159    ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY),
160    REGION(HConstants.CATALOG_FAMILY),
161    TABLE(HConstants.TABLE_FAMILY),
162    REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY);
163
164    private final byte[][] families;
165
166    QueryType(byte[]... families) {
167      this.families = families;
168    }
169
170    byte[][] getFamilies() {
171      return this.families;
172    }
173  }
174
175  /** The delimiter for meta columns for replicaIds &gt; 0 */
176  static final char META_REPLICA_ID_DELIMITER = '_';
177
178  /** A regex for parsing server columns from meta. See above javadoc for meta layout */
179  private static final Pattern SERVER_COLUMN_PATTERN =
180    Pattern.compile("^server(_[0-9a-fA-F]{4})?$");
181
182  ////////////////////////
183  // Reading operations //
184  ////////////////////////
185
186  /**
187   * Performs a full scan of <code>hbase:meta</code> for regions.
188   * @param connection connection we're using
189   * @param visitor    Visitor invoked against each row in regions family.
190   */
191  public static void fullScanRegions(Connection connection, final Visitor visitor)
192    throws IOException {
193    scanMeta(connection, null, null, QueryType.REGION, visitor);
194  }
195
196  /**
197   * Performs a full scan of <code>hbase:meta</code> for regions.
198   * @param connection connection we're using
199   */
200  public static List<Result> fullScanRegions(Connection connection) throws IOException {
201    return fullScan(connection, QueryType.REGION);
202  }
203
204  /**
205   * Performs a full scan of <code>hbase:meta</code> for tables.
206   * @param connection connection we're using
207   * @param visitor    Visitor invoked against each row in tables family.
208   */
209  public static void fullScanTables(Connection connection, final Visitor visitor)
210    throws IOException {
211    scanMeta(connection, null, null, QueryType.TABLE, visitor);
212  }
213
214  /**
215   * Performs a full scan of <code>hbase:meta</code>.
216   * @param connection connection we're using
217   * @param type       scanned part of meta
218   * @return List of {@link Result}
219   */
220  private static List<Result> fullScan(Connection connection, QueryType type) throws IOException {
221    CollectAllVisitor v = new CollectAllVisitor();
222    scanMeta(connection, null, null, type, v);
223    return v.getResults();
224  }
225
226  /**
227   * Callers should call close on the returned {@link Table} instance.
228   * @param connection connection we're using to access Meta
229   * @return An {@link Table} for <code>hbase:meta</code>
230   */
231  public static Table getMetaHTable(final Connection connection) throws IOException {
232    // We used to pass whole CatalogTracker in here, now we just pass in Connection
233    if (connection == null) {
234      throw new NullPointerException("No connection");
235    } else if (connection.isClosed()) {
236      throw new IOException("connection is closed");
237    }
238    return connection.getTable(TableName.META_TABLE_NAME);
239  }
240
241  /**
242   * @param t Table to use (will be closed when done).
243   * @param g Get to run
244   */
245  private static Result get(final Table t, final Get g) throws IOException {
246    if (t == null) return null;
247    try {
248      return t.get(g);
249    } finally {
250      t.close();
251    }
252  }
253
254  /**
255   * Gets the region info and assignment for the specified region.
256   * @param connection connection we're using
257   * @param regionName Region to lookup.
258   * @return Location and RegionInfo for <code>regionName</code>
259   * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead
260   */
261  @Deprecated
262  public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte[] regionName)
263    throws IOException {
264    HRegionLocation location = getRegionLocation(connection, regionName);
265    return location == null ? null : new Pair<>(location.getRegionInfo(), location.getServerName());
266  }
267
268  /**
269   * Returns the HRegionLocation from meta for the given region
270   * @param connection connection we're using
271   * @param regionName region we're looking for
272   * @return HRegionLocation for the given region
273   */
274  public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName)
275    throws IOException {
276    byte[] row = regionName;
277    RegionInfo parsedInfo = null;
278    try {
279      parsedInfo = parseRegionInfoFromRegionName(regionName);
280      row = getMetaKeyForRegion(parsedInfo);
281    } catch (Exception parseEx) {
282      // If it is not a valid regionName(i.e, tableName), it needs to return null here
283      // as querying meta table wont help.
284      return null;
285    }
286    Get get = new Get(row);
287    get.addFamily(HConstants.CATALOG_FAMILY);
288    Result r = get(getMetaHTable(connection), get);
289    RegionLocations locations = getRegionLocations(r);
290    return locations == null
291      ? null
292      : locations.getRegionLocation(
293        parsedInfo == null ? RegionInfo.DEFAULT_REPLICA_ID : parsedInfo.getReplicaId());
294  }
295
296  /**
297   * Returns the HRegionLocation from meta for the given region
298   * @param connection connection we're using
299   * @param regionInfo region information
300   * @return HRegionLocation for the given region
301   */
302  public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo)
303    throws IOException {
304    return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), regionInfo,
305      regionInfo.getReplicaId());
306  }
307
308  /** Returns Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. */
309  public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri)
310    throws IOException {
311    Get get = new Get(getMetaKeyForRegion(ri));
312    get.addFamily(HConstants.CATALOG_FAMILY);
313    return get(getMetaHTable(connection), get);
314  }
315
316  /** Returns the row key to use for this regionInfo */
317  public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) {
318    return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName();
319  }
320
321  /**
322   * Returns an HRI parsed from this regionName. Not all the fields of the HRI is stored in the
323   * name, so the returned object should only be used for the fields in the regionName.
324   */
325  // This should be moved to RegionInfo? TODO.
326  public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException {
327    byte[][] fields = RegionInfo.parseRegionName(regionName);
328    long regionId = Long.parseLong(Bytes.toString(fields[2]));
329    int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0;
330    return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])).setStartKey(fields[1])
331      .setRegionId(regionId).setReplicaId(replicaId).build();
332  }
333
334  /**
335   * Gets the result in hbase:meta for the specified region.
336   * @param connection connection we're using
337   * @param regionInfo region we're looking for
338   * @return result of the specified region
339   */
340  public static Result getRegionResult(Connection connection, RegionInfo regionInfo)
341    throws IOException {
342    Get get = new Get(getMetaKeyForRegion(regionInfo));
343    get.addFamily(HConstants.CATALOG_FAMILY);
344    return get(getMetaHTable(connection), get);
345  }
346
347  /**
348   * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, returning
349   * a single related <code>Result</code> instance if any row is found, null otherwise.
350   * @param connection        the connection to query META table.
351   * @param regionEncodedName the region encoded name to look for at META.
352   * @return <code>Result</code> instance with the row related info in META, null otherwise.
353   * @throws IOException if any errors occur while querying META.
354   */
355  public static Result scanByRegionEncodedName(Connection connection, String regionEncodedName)
356    throws IOException {
357    RowFilter rowFilter =
358      new RowFilter(CompareOperator.EQUAL, new SubstringComparator(regionEncodedName));
359    Scan scan = getMetaScan(connection.getConfiguration(), 1);
360    scan.setFilter(rowFilter);
361    try (Table table = getMetaHTable(connection);
362      ResultScanner resultScanner = table.getScanner(scan)) {
363      return resultScanner.next();
364    }
365  }
366
367  /**
368   * Returns Return all regioninfos listed in the 'info:merge*' columns of the {@code regionInfo}
369   * row.
370   */
371  @Nullable
372  public static List<RegionInfo> getMergeRegions(Connection connection, RegionInfo regionInfo)
373    throws IOException {
374    return getMergeRegions(getRegionResult(connection, regionInfo).rawCells());
375  }
376
377  /**
378   * Check whether the given {@code regionInfo} has any 'info:merge*' columns.
379   */
380  public static boolean hasMergeRegions(Connection conn, RegionInfo regionInfo) throws IOException {
381    return hasMergeRegions(getRegionResult(conn, regionInfo).rawCells());
382  }
383
384  /**
385   * Returns Deserialized values of &lt;qualifier,regioninfo&gt; pairs taken from column values that
386   * match the regex 'info:merge.*' in array of <code>cells</code>.
387   */
388  @Nullable
389  public static Map<String, RegionInfo> getMergeRegionsWithName(Cell[] cells) {
390    if (cells == null) {
391      return null;
392    }
393    Map<String, RegionInfo> regionsToMerge = null;
394    for (Cell cell : cells) {
395      if (!isMergeQualifierPrefix(cell)) {
396        continue;
397      }
398      // Ok. This cell is that of a info:merge* column.
399      RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
400        cell.getValueLength());
401      if (ri != null) {
402        if (regionsToMerge == null) {
403          regionsToMerge = new LinkedHashMap<>();
404        }
405        regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri);
406      }
407    }
408    return regionsToMerge;
409  }
410
411  /**
412   * Returns Deserialized regioninfo values taken from column values that match the regex
413   * 'info:merge.*' in array of <code>cells</code>.
414   */
415  @Nullable
416  public static List<RegionInfo> getMergeRegions(Cell[] cells) {
417    Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells);
418    return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values());
419  }
420
421  /**
422   * Returns True if any merge regions present in <code>cells</code>; i.e. the column in
423   * <code>cell</code> matches the regex 'info:merge.*'.
424   */
425  public static boolean hasMergeRegions(Cell[] cells) {
426    for (Cell cell : cells) {
427      if (!isMergeQualifierPrefix(cell)) {
428        continue;
429      }
430      return true;
431    }
432    return false;
433  }
434
435  /** Returns True if the column in <code>cell</code> matches the regex 'info:merge.*'. */
436  private static boolean isMergeQualifierPrefix(Cell cell) {
437    // Check to see if has family and that qualifier starts with the merge qualifier 'merge'
438    return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY)
439      && PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX);
440  }
441
442  /**
443   * Lists all of the regions currently in META.
444   * @param connection                  to connect with
445   * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions,
446   *                                    true and we'll leave out offlined regions from returned list
447   * @return List of all user-space regions.
448   */
449  public static List<RegionInfo> getAllRegions(Connection connection,
450    boolean excludeOfflinedSplitParents) throws IOException {
451    List<Pair<RegionInfo, ServerName>> result;
452
453    result = getTableRegionsAndLocations(connection, null, excludeOfflinedSplitParents);
454
455    return getListOfRegionInfos(result);
456
457  }
458
459  /**
460   * Gets all of the regions of the specified table. Do not use this method to get meta table
461   * regions, use methods in MetaTableLocator instead.
462   * @param connection connection we're using
463   * @param tableName  table we're looking for
464   * @return Ordered list of {@link RegionInfo}.
465   */
466  public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName)
467    throws IOException {
468    return getTableRegions(connection, tableName, false);
469  }
470
471  /**
472   * Gets all of the regions of the specified table. Do not use this method to get meta table
473   * regions, use methods in MetaTableLocator instead.
474   * @param connection                  connection we're using
475   * @param tableName                   table we're looking for
476   * @param excludeOfflinedSplitParents If true, do not include offlined split parents in the
477   *                                    return.
478   * @return Ordered list of {@link RegionInfo}.
479   */
480  public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName,
481    final boolean excludeOfflinedSplitParents) throws IOException {
482    List<Pair<RegionInfo, ServerName>> result =
483      getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents);
484    return getListOfRegionInfos(result);
485  }
486
487  @SuppressWarnings("MixedMutabilityReturnType")
488  private static List<RegionInfo>
489    getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) {
490    if (pairs == null || pairs.isEmpty()) {
491      return Collections.emptyList();
492    }
493    List<RegionInfo> result = new ArrayList<>(pairs.size());
494    for (Pair<RegionInfo, ServerName> pair : pairs) {
495      result.add(pair.getFirst());
496    }
497    return result;
498  }
499
500  /**
501   * Returns start row for scanning META according to query type
502   */
503  public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) {
504    if (tableName == null) {
505      return null;
506    }
507    switch (type) {
508      case REGION:
509        byte[] startRow = new byte[tableName.getName().length + 2];
510        System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length);
511        startRow[startRow.length - 2] = HConstants.DELIMITER;
512        startRow[startRow.length - 1] = HConstants.DELIMITER;
513        return startRow;
514      case ALL:
515      case TABLE:
516      default:
517        return tableName.getName();
518    }
519  }
520
521  /**
522   * Returns stop row for scanning META according to query type
523   */
524  public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) {
525    if (tableName == null) {
526      return null;
527    }
528    final byte[] stopRow;
529    switch (type) {
530      case REGION:
531        stopRow = new byte[tableName.getName().length + 3];
532        System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length);
533        stopRow[stopRow.length - 3] = ' ';
534        stopRow[stopRow.length - 2] = HConstants.DELIMITER;
535        stopRow[stopRow.length - 1] = HConstants.DELIMITER;
536        break;
537      case ALL:
538      case TABLE:
539      default:
540        stopRow = new byte[tableName.getName().length + 1];
541        System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length);
542        stopRow[stopRow.length - 1] = ' ';
543        break;
544    }
545    return stopRow;
546  }
547
548  /**
549   * This method creates a Scan object that will only scan catalog rows that belong to the specified
550   * table. It doesn't specify any columns. This is a better alternative to just using a start row
551   * and scan until it hits a new table since that requires parsing the HRI to get the table name.
552   * @param tableName bytes of table's name
553   * @return configured Scan object
554   */
555  public static Scan getScanForTableName(Configuration conf, TableName tableName) {
556    // Start key is just the table name with delimiters
557    byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION);
558    // Stop key appends the smallest possible char to the table name
559    byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION);
560
561    Scan scan = getMetaScan(conf, -1);
562    scan.setStartRow(startKey);
563    scan.setStopRow(stopKey);
564    return scan;
565  }
566
567  private static Scan getMetaScan(Configuration conf, int rowUpperLimit) {
568    Scan scan = new Scan();
569    int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING,
570      HConstants.DEFAULT_HBASE_META_SCANNER_CACHING);
571    if (conf.getBoolean(HConstants.USE_META_REPLICAS, HConstants.DEFAULT_USE_META_REPLICAS)) {
572      scan.setConsistency(Consistency.TIMELINE);
573    }
574    if (rowUpperLimit > 0) {
575      scan.setLimit(rowUpperLimit);
576      scan.setReadType(Scan.ReadType.PREAD);
577    }
578    scan.setCaching(scannerCaching);
579    return scan;
580  }
581
582  /**
583   * Do not use this method to get meta table regions, use methods in MetaTableLocator instead.
584   * @param connection connection we're using
585   * @param tableName  table we're looking for
586   * @return Return list of regioninfos and server.
587   */
588  public static List<Pair<RegionInfo, ServerName>>
589    getTableRegionsAndLocations(Connection connection, TableName tableName) throws IOException {
590    return getTableRegionsAndLocations(connection, tableName, true);
591  }
592
593  /**
594   * Do not use this method to get meta table regions, use methods in MetaTableLocator instead.
595   * @param connection                  connection we're using
596   * @param tableName                   table to work with, can be null for getting all regions
597   * @param excludeOfflinedSplitParents don't return split parents
598   * @return Return list of regioninfos and server addresses.
599   */
600  // What happens here when 1M regions in hbase:meta? This won't scale?
601  public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations(
602    Connection connection, @Nullable final TableName tableName,
603    final boolean excludeOfflinedSplitParents) throws IOException {
604    if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) {
605      throw new IOException(
606        "This method can't be used to locate meta regions;" + " use MetaTableLocator instead");
607    }
608    // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress
609    CollectingVisitor<Pair<RegionInfo, ServerName>> visitor =
610      new CollectingVisitor<Pair<RegionInfo, ServerName>>() {
611        private RegionLocations current = null;
612
613        @Override
614        public boolean visit(Result r) throws IOException {
615          current = getRegionLocations(r);
616          if (current == null || current.getRegionLocation().getRegion() == null) {
617            LOG.warn("No serialized RegionInfo in " + r);
618            return true;
619          }
620          RegionInfo hri = current.getRegionLocation().getRegion();
621          if (excludeOfflinedSplitParents && hri.isSplitParent()) return true;
622          // Else call super and add this Result to the collection.
623          return super.visit(r);
624        }
625
626        @Override
627        void add(Result r) {
628          if (current == null) {
629            return;
630          }
631          for (HRegionLocation loc : current.getRegionLocations()) {
632            if (loc != null) {
633              this.results.add(new Pair<>(loc.getRegion(), loc.getServerName()));
634            }
635          }
636        }
637      };
638    scanMeta(connection, getTableStartRowForMeta(tableName, QueryType.REGION),
639      getTableStopRowForMeta(tableName, QueryType.REGION), QueryType.REGION, visitor);
640    return visitor.getResults();
641  }
642
643  /**
644   * Get the user regions a given server is hosting.
645   * @param connection connection we're using
646   * @param serverName server whose regions we're interested in
647   * @return List of user regions installed on this server (does not include catalog regions).
648   */
649  public static NavigableMap<RegionInfo, Result> getServerUserRegions(Connection connection,
650    final ServerName serverName) throws IOException {
651    final NavigableMap<RegionInfo, Result> hris = new TreeMap<>();
652    // Fill the above hris map with entries from hbase:meta that have the passed
653    // servername.
654    CollectingVisitor<Result> v = new CollectingVisitor<Result>() {
655      @Override
656      void add(Result r) {
657        if (r == null || r.isEmpty()) return;
658        RegionLocations locations = getRegionLocations(r);
659        if (locations == null) return;
660        for (HRegionLocation loc : locations.getRegionLocations()) {
661          if (loc != null) {
662            if (loc.getServerName() != null && loc.getServerName().equals(serverName)) {
663              hris.put(loc.getRegion(), r);
664            }
665          }
666        }
667      }
668    };
669    scanMeta(connection, null, null, QueryType.REGION, v);
670    return hris;
671  }
672
673  public static void fullScanMetaAndPrint(Connection connection) throws IOException {
674    Visitor v = r -> {
675      if (r == null || r.isEmpty()) {
676        return true;
677      }
678      LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r);
679      TableState state = getTableState(r);
680      if (state != null) {
681        LOG.info("fullScanMetaAndPrint.Table State={}" + state);
682      } else {
683        RegionLocations locations = getRegionLocations(r);
684        if (locations == null) {
685          return true;
686        }
687        for (HRegionLocation loc : locations.getRegionLocations()) {
688          if (loc != null) {
689            LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion());
690          }
691        }
692      }
693      return true;
694    };
695    scanMeta(connection, null, null, QueryType.ALL, v);
696  }
697
698  public static void scanMetaForTableRegions(Connection connection, Visitor visitor,
699    TableName tableName, CatalogReplicaMode metaReplicaMode) throws IOException {
700    scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor, metaReplicaMode);
701  }
702
703  public static void scanMetaForTableRegions(Connection connection, Visitor visitor,
704    TableName tableName) throws IOException {
705    scanMetaForTableRegions(connection, visitor, tableName, CatalogReplicaMode.NONE);
706  }
707
708  private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows,
709    final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
710    scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type),
711      type, null, maxRows, visitor, metaReplicaMode);
712
713  }
714
715  private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
716    @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException {
717    scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor);
718  }
719
720  /**
721   * Performs a scan of META table for given table starting from given row.
722   * @param connection connection we're using
723   * @param visitor    visitor to call
724   * @param tableName  table withing we scan
725   * @param row        start scan from this row
726   * @param rowLimit   max number of rows to return
727   */
728  public static void scanMeta(Connection connection, final Visitor visitor,
729    final TableName tableName, final byte[] row, final int rowLimit) throws IOException {
730    byte[] startRow = null;
731    byte[] stopRow = null;
732    if (tableName != null) {
733      startRow = getTableStartRowForMeta(tableName, QueryType.REGION);
734      if (row != null) {
735        RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row);
736        startRow =
737          RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false);
738      }
739      stopRow = getTableStopRowForMeta(tableName, QueryType.REGION);
740    }
741    scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor);
742  }
743
744  /**
745   * Performs a scan of META table.
746   * @param connection connection we're using
747   * @param startRow   Where to start the scan. Pass null if want to begin scan at first row.
748   * @param stopRow    Where to stop the scan. Pass null if want to scan all rows from the start one
749   * @param type       scanned part of meta
750   * @param maxRows    maximum rows to return
751   * @param visitor    Visitor invoked against each row.
752   */
753  static void scanMeta(Connection connection, @Nullable final byte[] startRow,
754    @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor)
755    throws IOException {
756    scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor, CatalogReplicaMode.NONE);
757  }
758
759  private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
760    @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows,
761    final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
762    int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE;
763    Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit);
764
765    for (byte[] family : type.getFamilies()) {
766      scan.addFamily(family);
767    }
768    if (startRow != null) {
769      scan.withStartRow(startRow);
770    }
771    if (stopRow != null) {
772      scan.withStopRow(stopRow);
773    }
774    if (filter != null) {
775      scan.setFilter(filter);
776    }
777
778    if (LOG.isTraceEnabled()) {
779      LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow)
780        + " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit
781        + " with caching=" + scan.getCaching());
782    }
783
784    int currentRow = 0;
785    try (Table metaTable = getMetaHTable(connection)) {
786      switch (metaReplicaMode) {
787        case LOAD_BALANCE:
788          int numOfReplicas = metaTable.getDescriptor().getRegionReplication();
789          if (numOfReplicas > 1) {
790            int replicaId = ThreadLocalRandom.current().nextInt(numOfReplicas);
791
792            // When the replicaId is 0, do not set to Consistency.TIMELINE
793            if (replicaId > 0) {
794              scan.setReplicaId(replicaId);
795              scan.setConsistency(Consistency.TIMELINE);
796            }
797          }
798          break;
799        case HEDGED_READ:
800          scan.setConsistency(Consistency.TIMELINE);
801          break;
802        default:
803          // Do nothing
804      }
805      try (ResultScanner scanner = metaTable.getScanner(scan)) {
806        Result data;
807        while ((data = scanner.next()) != null) {
808          if (data.isEmpty()) continue;
809          // Break if visit returns false.
810          if (!visitor.visit(data)) break;
811          if (++currentRow >= rowUpperLimit) break;
812        }
813      }
814    }
815    if (visitor instanceof Closeable) {
816      try {
817        ((Closeable) visitor).close();
818      } catch (Throwable t) {
819        ExceptionUtil.rethrowIfInterrupt(t);
820        LOG.debug("Got exception in closing the meta scanner visitor", t);
821      }
822    }
823  }
824
825  /** Returns Get closest metatable region row to passed <code>row</code> */
826  @NonNull
827  private static RegionInfo getClosestRegionInfo(Connection connection,
828    @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException {
829    byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
830    Scan scan = getMetaScan(connection.getConfiguration(), 1);
831    scan.setReversed(true);
832    scan.withStartRow(searchRow);
833    try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) {
834      Result result = resultScanner.next();
835      if (result == null) {
836        throw new TableNotFoundException("Cannot find row in META " + " for table: " + tableName
837          + ", row=" + Bytes.toStringBinary(row));
838      }
839      RegionInfo regionInfo = getRegionInfo(result);
840      if (regionInfo == null) {
841        throw new IOException("RegionInfo was null or empty in Meta for " + tableName + ", row="
842          + Bytes.toStringBinary(row));
843      }
844      return regionInfo;
845    }
846  }
847
848  /**
849   * Returns the column family used for meta columns.
850   * @return HConstants.CATALOG_FAMILY.
851   */
852  public static byte[] getCatalogFamily() {
853    return HConstants.CATALOG_FAMILY;
854  }
855
856  /**
857   * Returns the column family used for table columns.
858   * @return HConstants.TABLE_FAMILY.
859   */
860  private static byte[] getTableFamily() {
861    return HConstants.TABLE_FAMILY;
862  }
863
864  /**
865   * Returns the column qualifier for serialized region info
866   * @return HConstants.REGIONINFO_QUALIFIER
867   */
868  public static byte[] getRegionInfoColumn() {
869    return HConstants.REGIONINFO_QUALIFIER;
870  }
871
872  /**
873   * Returns the column qualifier for serialized table state
874   * @return HConstants.TABLE_STATE_QUALIFIER
875   */
876  private static byte[] getTableStateColumn() {
877    return HConstants.TABLE_STATE_QUALIFIER;
878  }
879
880  /**
881   * Returns the column qualifier for serialized region state
882   * @return HConstants.STATE_QUALIFIER
883   */
884  private static byte[] getRegionStateColumn() {
885    return HConstants.STATE_QUALIFIER;
886  }
887
888  /**
889   * Returns the column qualifier for serialized region state
890   * @param replicaId the replicaId of the region
891   * @return a byte[] for state qualifier
892   */
893  public static byte[] getRegionStateColumn(int replicaId) {
894    return replicaId == 0
895      ? HConstants.STATE_QUALIFIER
896      : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
897        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
898  }
899
900  /**
901   * Returns the column qualifier for serialized region state
902   * @param replicaId the replicaId of the region
903   * @return a byte[] for sn column qualifier
904   */
905  public static byte[] getServerNameColumn(int replicaId) {
906    return replicaId == 0
907      ? HConstants.SERVERNAME_QUALIFIER
908      : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
909        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
910  }
911
912  /**
913   * Returns the column qualifier for server column for replicaId
914   * @param replicaId the replicaId of the region
915   * @return a byte[] for server column qualifier
916   */
917  public static byte[] getServerColumn(int replicaId) {
918    return replicaId == 0
919      ? HConstants.SERVER_QUALIFIER
920      : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
921        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
922  }
923
924  /**
925   * Returns the column qualifier for server start code column for replicaId
926   * @param replicaId the replicaId of the region
927   * @return a byte[] for server start code column qualifier
928   */
929  public static byte[] getStartCodeColumn(int replicaId) {
930    return replicaId == 0
931      ? HConstants.STARTCODE_QUALIFIER
932      : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
933        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
934  }
935
936  /**
937   * Returns the column qualifier for seqNum column for replicaId
938   * @param replicaId the replicaId of the region
939   * @return a byte[] for seqNum column qualifier
940   */
941  public static byte[] getSeqNumColumn(int replicaId) {
942    return replicaId == 0
943      ? HConstants.SEQNUM_QUALIFIER
944      : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
945        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
946  }
947
948  /**
949   * Parses the replicaId from the server column qualifier. See top of the class javadoc for the
950   * actual meta layout
951   * @param serverColumn the column qualifier
952   * @return an int for the replicaId
953   */
954  static int parseReplicaIdFromServerColumn(byte[] serverColumn) {
955    String serverStr = Bytes.toString(serverColumn);
956
957    Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr);
958    if (matcher.matches() && matcher.groupCount() > 0) {
959      String group = matcher.group(1);
960      if (group != null && group.length() > 0) {
961        return Integer.parseInt(group.substring(1), 16);
962      } else {
963        return 0;
964      }
965    }
966    return -1;
967  }
968
969  /**
970   * Returns a {@link ServerName} from catalog table {@link Result}.
971   * @param r Result to pull from
972   * @return A ServerName instance or null if necessary fields not found or empty.
973   */
974  @Nullable
975  @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only
976  public static ServerName getServerName(final Result r, final int replicaId) {
977    byte[] serverColumn = getServerColumn(replicaId);
978    Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn);
979    if (cell == null || cell.getValueLength() == 0) return null;
980    String hostAndPort =
981      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
982    byte[] startcodeColumn = getStartCodeColumn(replicaId);
983    cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn);
984    if (cell == null || cell.getValueLength() == 0) return null;
985    try {
986      return ServerName.valueOf(hostAndPort,
987        Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
988    } catch (IllegalArgumentException e) {
989      LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e);
990      return null;
991    }
992  }
993
994  /**
995   * Returns the {@link ServerName} from catalog table {@link Result} where the region is
996   * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)}
997   * if the server is at OPEN state.
998   * @param r Result to pull the transitioning server name from
999   * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} if
1000   *         necessary fields not found or empty.
1001   */
1002  @Nullable
1003  public static ServerName getTargetServerName(final Result r, final int replicaId) {
1004    final Cell cell =
1005      r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId));
1006    if (cell == null || cell.getValueLength() == 0) {
1007      RegionLocations locations = MetaTableAccessor.getRegionLocations(r);
1008      if (locations != null) {
1009        HRegionLocation location = locations.getRegionLocation(replicaId);
1010        if (location != null) {
1011          return location.getServerName();
1012        }
1013      }
1014      return null;
1015    }
1016    return ServerName.parseServerName(
1017      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
1018  }
1019
1020  /**
1021   * The latest seqnum that the server writing to meta observed when opening the region. E.g. the
1022   * seqNum when the result of {@link #getServerName(Result, int)} was written.
1023   * @param r Result to pull the seqNum from
1024   * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written.
1025   */
1026  private static long getSeqNumDuringOpen(final Result r, final int replicaId) {
1027    Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId));
1028    if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM;
1029    return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
1030  }
1031
1032  /**
1033   * Returns the daughter regions by reading the corresponding columns of the catalog table Result.
1034   * @param data a Result object from the catalog table scan
1035   * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent
1036   */
1037  public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) {
1038    RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER);
1039    RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER);
1040    return new PairOfSameType<>(splitA, splitB);
1041  }
1042
1043  /**
1044   * Returns an HRegionLocationList extracted from the result.
1045   * @return an HRegionLocationList containing all locations for the region range or null if we
1046   *         can't deserialize the result.
1047   */
1048  @Nullable
1049  public static RegionLocations getRegionLocations(final Result r) {
1050    if (r == null) return null;
1051    RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn());
1052    if (regionInfo == null) return null;
1053
1054    List<HRegionLocation> locations = new ArrayList<>(1);
1055    NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyMap = r.getNoVersionMap();
1056
1057    locations.add(getRegionLocation(r, regionInfo, 0));
1058
1059    NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily());
1060    if (infoMap == null) return new RegionLocations(locations);
1061
1062    // iterate until all serverName columns are seen
1063    int replicaId = 0;
1064    byte[] serverColumn = getServerColumn(replicaId);
1065    SortedMap<byte[], byte[]> serverMap;
1066    serverMap = infoMap.tailMap(serverColumn, false);
1067
1068    if (serverMap.isEmpty()) return new RegionLocations(locations);
1069
1070    for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) {
1071      replicaId = parseReplicaIdFromServerColumn(entry.getKey());
1072      if (replicaId < 0) {
1073        break;
1074      }
1075      HRegionLocation location = getRegionLocation(r, regionInfo, replicaId);
1076      // In case the region replica is newly created, it's location might be null. We usually do not
1077      // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs.
1078      if (location.getServerName() == null) {
1079        locations.add(null);
1080      } else {
1081        locations.add(location);
1082      }
1083    }
1084
1085    return new RegionLocations(locations);
1086  }
1087
1088  /**
1089   * Returns the HRegionLocation parsed from the given meta row Result for the given regionInfo and
1090   * replicaId. The regionInfo can be the default region info for the replica.
1091   * @param r          the meta row result
1092   * @param regionInfo RegionInfo for default replica
1093   * @param replicaId  the replicaId for the HRegionLocation
1094   * @return HRegionLocation parsed from the given meta row Result for the given replicaId
1095   */
1096  private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo,
1097    final int replicaId) {
1098    ServerName serverName = getServerName(r, replicaId);
1099    long seqNum = getSeqNumDuringOpen(r, replicaId);
1100    RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId);
1101    return new HRegionLocation(replicaInfo, serverName, seqNum);
1102  }
1103
1104  /**
1105   * Returns RegionInfo object from the column
1106   * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog table Result.
1107   * @param data a Result object from the catalog table scan
1108   * @return RegionInfo or null
1109   */
1110  public static RegionInfo getRegionInfo(Result data) {
1111    return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER);
1112  }
1113
1114  /**
1115   * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and
1116   * <code>qualifier</code> of the catalog table result.
1117   * @param r         a Result object from the catalog table scan
1118   * @param qualifier Column family qualifier
1119   * @return An RegionInfo instance or null.
1120   */
1121  @Nullable
1122  public static RegionInfo getRegionInfo(final Result r, byte[] qualifier) {
1123    Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier);
1124    if (cell == null) return null;
1125    return RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
1126      cell.getValueLength());
1127  }
1128
1129  /**
1130   * Fetch table state for given table from META table
1131   * @param conn      connection to use
1132   * @param tableName table to fetch state for
1133   */
1134  @Nullable
1135  public static TableState getTableState(Connection conn, TableName tableName) throws IOException {
1136    if (tableName.equals(TableName.META_TABLE_NAME)) {
1137      return new TableState(tableName, TableState.State.ENABLED);
1138    }
1139    Table metaHTable = getMetaHTable(conn);
1140    Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn());
1141    Result result = metaHTable.get(get);
1142    return getTableState(result);
1143  }
1144
1145  /**
1146   * Fetch table states from META table
1147   * @param conn connection to use
1148   * @return map {tableName -&gt; state}
1149   */
1150  public static Map<TableName, TableState> getTableStates(Connection conn) throws IOException {
1151    final Map<TableName, TableState> states = new LinkedHashMap<>();
1152    Visitor collector = r -> {
1153      TableState state = getTableState(r);
1154      if (state != null) {
1155        states.put(state.getTableName(), state);
1156      }
1157      return true;
1158    };
1159    fullScanTables(conn, collector);
1160    return states;
1161  }
1162
1163  /**
1164   * Updates state in META Do not use. For internal use only.
1165   * @param conn      connection to use
1166   * @param tableName table to look for
1167   */
1168  public static void updateTableState(Connection conn, TableName tableName, TableState.State actual)
1169    throws IOException {
1170    updateTableState(conn, new TableState(tableName, actual));
1171  }
1172
1173  /**
1174   * Decode table state from META Result. Should contain cell from HConstants.TABLE_FAMILY
1175   * @return null if not found
1176   */
1177  @Nullable
1178  public static TableState getTableState(Result r) throws IOException {
1179    Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn());
1180    if (cell == null) {
1181      return null;
1182    }
1183    try {
1184      return TableState.parseFrom(TableName.valueOf(r.getRow()),
1185        Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(),
1186          cell.getValueOffset() + cell.getValueLength()));
1187    } catch (DeserializationException e) {
1188      throw new IOException(e);
1189    }
1190  }
1191
1192  /**
1193   * Implementations 'visit' a catalog table row.
1194   */
1195  public interface Visitor {
1196    /**
1197     * Visit the catalog table row.
1198     * @param r A row from catalog table
1199     * @return True if we are to proceed scanning the table, else false if we are to stop now.
1200     */
1201    boolean visit(final Result r) throws IOException;
1202  }
1203
1204  /**
1205   * Implementations 'visit' a catalog table row but with close() at the end.
1206   */
1207  public interface CloseableVisitor extends Visitor, Closeable {
1208  }
1209
1210  /**
1211   * A {@link Visitor} that collects content out of passed {@link Result}.
1212   */
1213  static abstract class CollectingVisitor<T> implements Visitor {
1214    final List<T> results = new ArrayList<>();
1215
1216    @Override
1217    public boolean visit(Result r) throws IOException {
1218      if (r != null && !r.isEmpty()) {
1219        add(r);
1220      }
1221      return true;
1222    }
1223
1224    abstract void add(Result r);
1225
1226    /** Returns Collected results; wait till visits complete to collect all possible results */
1227    List<T> getResults() {
1228      return this.results;
1229    }
1230  }
1231
1232  /**
1233   * Collects all returned.
1234   */
1235  static class CollectAllVisitor extends CollectingVisitor<Result> {
1236    @Override
1237    void add(Result r) {
1238      this.results.add(r);
1239    }
1240  }
1241
1242  /**
1243   * A Visitor that skips offline regions and split parents
1244   */
1245  public static abstract class DefaultVisitorBase implements Visitor {
1246
1247    DefaultVisitorBase() {
1248      super();
1249    }
1250
1251    public abstract boolean visitInternal(Result rowResult) throws IOException;
1252
1253    @Override
1254    public boolean visit(Result rowResult) throws IOException {
1255      RegionInfo info = getRegionInfo(rowResult);
1256      if (info == null) {
1257        return true;
1258      }
1259
1260      // skip over offline and split regions
1261      if (!(info.isOffline() || info.isSplit())) {
1262        return visitInternal(rowResult);
1263      }
1264      return true;
1265    }
1266  }
1267
1268  /**
1269   * A Visitor for a table. Provides a consistent view of the table's hbase:meta entries during
1270   * concurrent splits (see HBASE-5986 for details). This class does not guarantee ordered traversal
1271   * of meta entries, and can block until the hbase:meta entries for daughters are available during
1272   * splits.
1273   */
1274  public static abstract class TableVisitorBase extends DefaultVisitorBase {
1275    private TableName tableName;
1276
1277    public TableVisitorBase(TableName tableName) {
1278      super();
1279      this.tableName = tableName;
1280    }
1281
1282    @Override
1283    public final boolean visit(Result rowResult) throws IOException {
1284      RegionInfo info = getRegionInfo(rowResult);
1285      if (info == null) {
1286        return true;
1287      }
1288      if (!info.getTable().equals(tableName)) {
1289        return false;
1290      }
1291      return super.visit(rowResult);
1292    }
1293  }
1294
1295  ////////////////////////
1296  // Editing operations //
1297  ////////////////////////
1298
1299  /**
1300   * Generates and returns a {@link Put} containing the {@link RegionInfo} for the catalog table.
1301   * @throws IllegalArgumentException when the provided RegionInfo is not the default replica.
1302   */
1303  public static Put makePutFromRegionInfo(RegionInfo regionInfo) throws IOException {
1304    return makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime());
1305  }
1306
1307  /**
1308   * Generates and returns a {@link Put} containing the {@link RegionInfo} for the catalog table.
1309   * @throws IllegalArgumentException when the provided RegionInfo is not the default replica.
1310   */
1311  public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException {
1312    return addRegionInfo(new Put(getMetaKeyForRegion(regionInfo), ts), regionInfo);
1313  }
1314
1315  /**
1316   * Generates and returns a Delete containing the region info for the catalog table
1317   */
1318  public static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) {
1319    if (regionInfo == null) {
1320      throw new IllegalArgumentException("Can't make a delete for null region");
1321    }
1322    if (regionInfo.getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) {
1323      throw new IllegalArgumentException(
1324        "Can't make delete for a replica region. Operate on the primary");
1325    }
1326    Delete delete = new Delete(getMetaKeyForRegion(regionInfo));
1327    delete.addFamily(getCatalogFamily(), ts);
1328    return delete;
1329  }
1330
1331  /**
1332   * Adds split daughters to the Put
1333   */
1334  private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB)
1335    throws IOException {
1336    if (splitA != null) {
1337      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1338        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITA_QUALIFIER)
1339        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1340        .setValue(RegionInfo.toByteArray(splitA)).build());
1341    }
1342    if (splitB != null) {
1343      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1344        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITB_QUALIFIER)
1345        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1346        .setValue(RegionInfo.toByteArray(splitB)).build());
1347    }
1348    return put;
1349  }
1350
1351  /**
1352   * Put the passed <code>p</code> to the <code>hbase:meta</code> table.
1353   * @param connection connection we're using
1354   * @param p          Put to add to hbase:meta
1355   */
1356  private static void putToMetaTable(Connection connection, Put p) throws IOException {
1357    try (Table table = getMetaHTable(connection)) {
1358      put(table, p);
1359    }
1360  }
1361
1362  /**
1363   * @param t Table to use
1364   * @param p put to make
1365   */
1366  private static void put(Table t, Put p) throws IOException {
1367    debugLogMutation(p);
1368    t.put(p);
1369  }
1370
1371  /**
1372   * Put the passed <code>ps</code> to the <code>hbase:meta</code> table.
1373   * @param connection connection we're using
1374   * @param ps         Put to add to hbase:meta
1375   */
1376  public static void putsToMetaTable(final Connection connection, final List<Put> ps)
1377    throws IOException {
1378    if (ps.isEmpty()) {
1379      return;
1380    }
1381    try (Table t = getMetaHTable(connection)) {
1382      debugLogMutations(ps);
1383      // the implementation for putting a single Put is much simpler so here we do a check first.
1384      if (ps.size() == 1) {
1385        t.put(ps.get(0));
1386      } else {
1387        t.put(ps);
1388      }
1389    }
1390  }
1391
1392  /**
1393   * Delete the passed <code>d</code> from the <code>hbase:meta</code> table.
1394   * @param connection connection we're using
1395   * @param d          Delete to add to hbase:meta
1396   */
1397  private static void deleteFromMetaTable(final Connection connection, final Delete d)
1398    throws IOException {
1399    List<Delete> dels = new ArrayList<>(1);
1400    dels.add(d);
1401    deleteFromMetaTable(connection, dels);
1402  }
1403
1404  /**
1405   * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table.
1406   * @param connection connection we're using
1407   * @param deletes    Deletes to add to hbase:meta This list should support #remove.
1408   */
1409  private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes)
1410    throws IOException {
1411    try (Table t = getMetaHTable(connection)) {
1412      debugLogMutations(deletes);
1413      t.delete(deletes);
1414    }
1415  }
1416
1417  /**
1418   * Set the column value corresponding to this {@code replicaId}'s {@link RegionState} to the
1419   * provided {@code state}. Mutates the provided {@link Put}.
1420   */
1421  private static Put addRegionStateToPut(Put put, int replicaId, RegionState.State state)
1422    throws IOException {
1423    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1424      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getRegionStateColumn(replicaId))
1425      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name()))
1426      .build());
1427    return put;
1428  }
1429
1430  /**
1431   * Update state column in hbase:meta.
1432   */
1433  public static void updateRegionState(Connection connection, RegionInfo ri,
1434    RegionState.State state) throws IOException {
1435    final Put put = makePutFromRegionInfo(ri);
1436    addRegionStateToPut(put, ri.getReplicaId(), state);
1437    putsToMetaTable(connection, Collections.singletonList(put));
1438  }
1439
1440  /**
1441   * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not
1442   * add its daughter's as different rows, but adds information about the daughters in the same row
1443   * as the parent. Use
1444   * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if
1445   * you want to do that.
1446   * @param connection connection we're using
1447   * @param regionInfo RegionInfo of parent region
1448   * @param splitA     first split daughter of the parent regionInfo
1449   * @param splitB     second split daughter of the parent regionInfo
1450   * @throws IOException if problem connecting or updating meta
1451   */
1452  public static void addSplitsToParent(Connection connection, RegionInfo regionInfo,
1453    RegionInfo splitA, RegionInfo splitB) throws IOException {
1454    try (Table meta = getMetaHTable(connection)) {
1455      Put put = makePutFromRegionInfo(regionInfo);
1456      addDaughtersToPut(put, splitA, splitB);
1457      meta.put(put);
1458      debugLogMutation(put);
1459      LOG.debug("Added region {}", regionInfo.getRegionNameAsString());
1460    }
1461  }
1462
1463  /**
1464   * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this
1465   * does not add its daughter's as different rows, but adds information about the daughters in the
1466   * same row as the parent. Use
1467   * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if
1468   * you want to do that.
1469   * @param connection connection we're using
1470   * @param regionInfo region information
1471   * @throws IOException if problem connecting or updating meta
1472   */
1473  public static void addRegionToMeta(Connection connection, RegionInfo regionInfo)
1474    throws IOException {
1475    addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1);
1476  }
1477
1478  /**
1479   * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is
1480   * CLOSED.
1481   * @param connection  connection we're using
1482   * @param regionInfos region information list
1483   * @throws IOException if problem connecting or updating meta
1484   */
1485  public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos,
1486    int regionReplication) throws IOException {
1487    addRegionsToMeta(connection, regionInfos, regionReplication,
1488      EnvironmentEdgeManager.currentTime());
1489  }
1490
1491  /**
1492   * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is
1493   * CLOSED.
1494   * @param connection  connection we're using
1495   * @param regionInfos region information list
1496   * @param ts          desired timestamp
1497   * @throws IOException if problem connecting or updating meta
1498   */
1499  private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos,
1500    int regionReplication, long ts) throws IOException {
1501    List<Put> puts = new ArrayList<>();
1502    for (RegionInfo regionInfo : regionInfos) {
1503      if (RegionReplicaUtil.isDefaultReplica(regionInfo)) {
1504        Put put = makePutFromRegionInfo(regionInfo, ts);
1505        // New regions are added with initial state of CLOSED.
1506        addRegionStateToPut(put, regionInfo.getReplicaId(), RegionState.State.CLOSED);
1507        // Add empty locations for region replicas so that number of replicas can be cached
1508        // whenever the primary region is looked up from meta
1509        for (int i = 1; i < regionReplication; i++) {
1510          addEmptyLocation(put, i);
1511        }
1512        puts.add(put);
1513      }
1514    }
1515    putsToMetaTable(connection, puts);
1516    LOG.info("Added {} regions to meta.", puts.size());
1517  }
1518
1519  static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException {
1520    int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more.
1521    int max = mergeRegions.size();
1522    if (max > limit) {
1523      // Should never happen!!!!! But just in case.
1524      throw new RuntimeException(
1525        "Can't merge " + max + " regions in one go; " + limit + " is upper-limit.");
1526    }
1527    int counter = 0;
1528    for (RegionInfo ri : mergeRegions) {
1529      String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++);
1530      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1531        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier))
1532        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1533        .setValue(RegionInfo.toByteArray(ri)).build());
1534    }
1535    return put;
1536  }
1537
1538  /**
1539   * Merge regions into one in an atomic operation. Deletes the merging regions in hbase:meta and
1540   * adds the merged region.
1541   * @param connection   connection we're using
1542   * @param mergedRegion the merged region
1543   * @param parentSeqNum Parent regions to merge and their next open sequence id used by serial
1544   *                     replication. Set to -1 if not needed by this table.
1545   * @param sn           the location of the region
1546   */
1547  public static void mergeRegions(Connection connection, RegionInfo mergedRegion,
1548    Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) throws IOException {
1549    try (Table meta = getMetaHTable(connection)) {
1550      long time = HConstants.LATEST_TIMESTAMP;
1551      List<Mutation> mutations = new ArrayList<>();
1552      List<RegionInfo> replicationParents = new ArrayList<>();
1553      for (Map.Entry<RegionInfo, Long> e : parentSeqNum.entrySet()) {
1554        RegionInfo ri = e.getKey();
1555        long seqNum = e.getValue();
1556        // Deletes for merging regions
1557        mutations.add(makeDeleteFromRegionInfo(ri, time));
1558        if (seqNum > 0) {
1559          mutations.add(makePutForReplicationBarrier(ri, seqNum, time));
1560          replicationParents.add(ri);
1561        }
1562      }
1563      // Put for parent
1564      Put putOfMerged = makePutFromRegionInfo(mergedRegion, time);
1565      putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet());
1566      // Set initial state to CLOSED.
1567      // NOTE: If initial state is not set to CLOSED then merged region gets added with the
1568      // default OFFLINE state. If Master gets restarted after this step, start up sequence of
1569      // master tries to assign this offline region. This is followed by re-assignments of the
1570      // merged region from resumed {@link MergeTableRegionsProcedure}
1571      addRegionStateToPut(putOfMerged, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED);
1572      mutations.add(putOfMerged);
1573      // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null
1574      // if crash after merge happened but before we got to here.. means in-memory
1575      // locations of offlined merged, now-closed, regions is lost. Should be ok. We
1576      // assign the merged region later.
1577      if (sn != null) {
1578        addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId());
1579      }
1580
1581      // Add empty locations for region replicas of the merged region so that number of replicas
1582      // can be cached whenever the primary region is looked up from meta
1583      for (int i = 1; i < regionReplication; i++) {
1584        addEmptyLocation(putOfMerged, i);
1585      }
1586      // add parent reference for serial replication
1587      if (!replicationParents.isEmpty()) {
1588        addReplicationParent(putOfMerged, replicationParents);
1589      }
1590      byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER);
1591      multiMutate(meta, tableRow, mutations);
1592    }
1593  }
1594
1595  /**
1596   * Splits the region into two in an atomic operation. Offlines the parent region with the
1597   * information that it is split into two, and also adds the daughter regions. Does not add the
1598   * location information to the daughter regions since they are not open yet.
1599   * @param connection       connection we're using
1600   * @param parent           the parent region which is split
1601   * @param parentOpenSeqNum the next open sequence id for parent region, used by serial
1602   *                         replication. -1 if not necessary.
1603   * @param splitA           Split daughter region A
1604   * @param splitB           Split daughter region B
1605   * @param sn               the location of the region
1606   */
1607  public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum,
1608    RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) throws IOException {
1609    try (Table meta = getMetaHTable(connection)) {
1610      long time = EnvironmentEdgeManager.currentTime();
1611      // Put for parent
1612      Put putParent = makePutFromRegionInfo(
1613        RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time);
1614      addDaughtersToPut(putParent, splitA, splitB);
1615
1616      // Puts for daughters
1617      Put putA = makePutFromRegionInfo(splitA, time);
1618      Put putB = makePutFromRegionInfo(splitB, time);
1619      if (parentOpenSeqNum > 0) {
1620        addReplicationBarrier(putParent, parentOpenSeqNum);
1621        addReplicationParent(putA, Collections.singletonList(parent));
1622        addReplicationParent(putB, Collections.singletonList(parent));
1623      }
1624      // Set initial state to CLOSED
1625      // NOTE: If initial state is not set to CLOSED then daughter regions get added with the
1626      // default OFFLINE state. If Master gets restarted after this step, start up sequence of
1627      // master tries to assign these offline regions. This is followed by re-assignments of the
1628      // daughter regions from resumed {@link SplitTableRegionProcedure}
1629      addRegionStateToPut(putA, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED);
1630      addRegionStateToPut(putB, RegionInfo.DEFAULT_REPLICA_ID, RegionState.State.CLOSED);
1631
1632      addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine.
1633      addSequenceNum(putB, 1, splitB.getReplicaId());
1634
1635      // Add empty locations for region replicas of daughters so that number of replicas can be
1636      // cached whenever the primary region is looked up from meta
1637      for (int i = 1; i < regionReplication; i++) {
1638        addEmptyLocation(putA, i);
1639        addEmptyLocation(putB, i);
1640      }
1641
1642      byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER);
1643      multiMutate(meta, tableRow, putParent, putA, putB);
1644    }
1645  }
1646
1647  /**
1648   * Update state of the table in meta.
1649   * @param connection what we use for update
1650   * @param state      new state
1651   */
1652  private static void updateTableState(Connection connection, TableState state) throws IOException {
1653    Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime());
1654    putToMetaTable(connection, put);
1655    LOG.info("Updated {} in hbase:meta", state);
1656  }
1657
1658  /**
1659   * Construct PUT for given state
1660   * @param state new state
1661   */
1662  public static Put makePutFromTableState(TableState state, long ts) {
1663    Put put = new Put(state.getTableName().getName(), ts);
1664    put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray());
1665    return put;
1666  }
1667
1668  /**
1669   * Remove state for table from meta
1670   * @param connection to use for deletion
1671   * @param table      to delete state for
1672   */
1673  public static void deleteTableState(Connection connection, TableName table) throws IOException {
1674    long time = EnvironmentEdgeManager.currentTime();
1675    Delete delete = new Delete(table.getName());
1676    delete.addColumns(getTableFamily(), getTableStateColumn(), time);
1677    deleteFromMetaTable(connection, delete);
1678    LOG.info("Deleted table " + table + " state from META");
1679  }
1680
1681  private static void multiMutate(Table table, byte[] row, Mutation... mutations)
1682    throws IOException {
1683    multiMutate(table, row, Arrays.asList(mutations));
1684  }
1685
1686  /**
1687   * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge
1688   * and split as these want to make atomic mutations across multiple rows.
1689   * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE.
1690   */
1691  static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations)
1692    throws IOException {
1693    debugLogMutations(mutations);
1694    Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> {
1695      MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder();
1696      for (Mutation mutation : mutations) {
1697        if (mutation instanceof Put) {
1698          builder.addMutationRequest(
1699            ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation));
1700        } else if (mutation instanceof Delete) {
1701          builder.addMutationRequest(
1702            ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation));
1703        } else {
1704          throw new DoNotRetryIOException(
1705            "multi in MetaEditor doesn't support " + mutation.getClass().getName());
1706        }
1707      }
1708      ServerRpcController controller = new ServerRpcController();
1709      CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback =
1710        new CoprocessorRpcUtils.BlockingRpcCallback<>();
1711      instance.mutateRows(controller, builder.build(), rpcCallback);
1712      MutateRowsResponse resp = rpcCallback.get();
1713      if (controller.failedOnException()) {
1714        throw controller.getFailedOn();
1715      }
1716      return resp;
1717    };
1718    try {
1719      table.coprocessorService(MultiRowMutationService.class, row, row, callable);
1720    } catch (Throwable e) {
1721      // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g.
1722      // a RejectedExecutionException because the hosting exception is shutting down.
1723      // This is old behavior worth reexamining. Procedures doing merge or split
1724      // currently don't handle RuntimeExceptions coming up out of meta table edits.
1725      // Would have to work on this at least. See HBASE-23904.
1726      Throwables.throwIfInstanceOf(e, IOException.class);
1727      throw new IOException(e);
1728    }
1729  }
1730
1731  /**
1732   * Updates the location of the specified region in hbase:meta to be the specified server hostname
1733   * and startcode.
1734   * <p>
1735   * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes
1736   * edits to that region.
1737   * @param connection       connection we're using
1738   * @param regionInfo       region to update location of
1739   * @param openSeqNum       the latest sequence number obtained when the region was open
1740   * @param sn               Server name
1741   * @param masterSystemTime wall clock time from master if passed in the open region RPC
1742   */
1743  public static void updateRegionLocation(Connection connection, RegionInfo regionInfo,
1744    ServerName sn, long openSeqNum, long masterSystemTime) throws IOException {
1745    updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime);
1746  }
1747
1748  /**
1749   * Updates the location of the specified region to be the specified server.
1750   * <p>
1751   * Connects to the specified server which should be hosting the specified catalog region name to
1752   * perform the edit.
1753   * @param connection       connection we're using
1754   * @param regionInfo       region to update location of
1755   * @param sn               Server name
1756   * @param openSeqNum       the latest sequence number obtained when the region was open
1757   * @param masterSystemTime wall clock time from master if passed in the open region RPC
1758   * @throws IOException In particular could throw {@link java.net.ConnectException} if the server
1759   *                     is down on other end.
1760   */
1761  private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn,
1762    long openSeqNum, long masterSystemTime) throws IOException {
1763    // region replicas are kept in the primary region's row
1764    Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime);
1765    addRegionInfo(put, regionInfo);
1766    addLocation(put, sn, openSeqNum, regionInfo.getReplicaId());
1767    putToMetaTable(connection, put);
1768    LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn);
1769  }
1770
1771  /**
1772   * Deletes the specified region from META.
1773   * @param connection connection we're using
1774   * @param regionInfo region to be deleted from META
1775   */
1776  public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo)
1777    throws IOException {
1778    Delete delete = new Delete(regionInfo.getRegionName());
1779    delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP);
1780    deleteFromMetaTable(connection, delete);
1781    LOG.info("Deleted " + regionInfo.getRegionNameAsString());
1782  }
1783
1784  /**
1785   * Deletes the specified regions from META.
1786   * @param connection  connection we're using
1787   * @param regionsInfo list of regions to be deleted from META
1788   */
1789  public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo)
1790    throws IOException {
1791    deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime());
1792  }
1793
1794  /**
1795   * Deletes the specified regions from META.
1796   * @param connection  connection we're using
1797   * @param regionsInfo list of regions to be deleted from META
1798   */
1799  private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo,
1800    long ts) throws IOException {
1801    List<Delete> deletes = new ArrayList<>(regionsInfo.size());
1802    for (RegionInfo hri : regionsInfo) {
1803      Delete e = new Delete(hri.getRegionName());
1804      e.addFamily(getCatalogFamily(), ts);
1805      deletes.add(e);
1806    }
1807    deleteFromMetaTable(connection, deletes);
1808    LOG.info("Deleted {} regions from META", regionsInfo.size());
1809    LOG.debug("Deleted regions: {}", regionsInfo);
1810  }
1811
1812  /**
1813   * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and
1814   * adds new ones. Regions added back have state CLOSED.
1815   * @param connection  connection we're using
1816   * @param regionInfos list of regions to be added to META
1817   */
1818  public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos,
1819    int regionReplication) throws IOException {
1820    // use master time for delete marker and the Put
1821    long now = EnvironmentEdgeManager.currentTime();
1822    deleteRegionInfos(connection, regionInfos, now);
1823    // Why sleep? This is the easiest way to ensure that the previous deletes does not
1824    // eclipse the following puts, that might happen in the same ts from the server.
1825    // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed,
1826    // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep.
1827    //
1828    // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed
1829    addRegionsToMeta(connection, regionInfos, regionReplication, now + 1);
1830    LOG.info("Overwritten " + regionInfos.size() + " regions to Meta");
1831    LOG.debug("Overwritten regions: {} ", regionInfos);
1832  }
1833
1834  /**
1835   * Deletes merge qualifiers for the specified merge region.
1836   * @param connection  connection we're using
1837   * @param mergeRegion the merged region
1838   */
1839  public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion)
1840    throws IOException {
1841    Delete delete = new Delete(mergeRegion.getRegionName());
1842    // NOTE: We are doing a new hbase:meta read here.
1843    Cell[] cells = getRegionResult(connection, mergeRegion).rawCells();
1844    if (cells == null || cells.length == 0) {
1845      return;
1846    }
1847    List<byte[]> qualifiers = new ArrayList<>();
1848    for (Cell cell : cells) {
1849      if (!isMergeQualifierPrefix(cell)) {
1850        continue;
1851      }
1852      byte[] qualifier = CellUtil.cloneQualifier(cell);
1853      qualifiers.add(qualifier);
1854      delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP);
1855    }
1856
1857    // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while
1858    // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second
1859    // GCMultipleMergedRegionsProcedure could delete the merged region by accident!
1860    if (qualifiers.isEmpty()) {
1861      LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString()
1862        + " in meta table, they are cleaned up already, Skip.");
1863      return;
1864    }
1865
1866    deleteFromMetaTable(connection, delete);
1867    LOG.info(
1868      "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers "
1869        + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")));
1870  }
1871
1872  public static Put addRegionInfo(final Put p, final RegionInfo hri) throws IOException {
1873    p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow())
1874      .setFamily(getCatalogFamily()).setQualifier(HConstants.REGIONINFO_QUALIFIER)
1875      .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put)
1876      // Serialize the Default Replica HRI otherwise scan of hbase:meta
1877      // shows an info:regioninfo value with encoded name and region
1878      // name that differs from that of the hbase;meta row.
1879      .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri)))
1880      .build());
1881    return p;
1882  }
1883
1884  public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId)
1885    throws IOException {
1886    CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
1887    return p
1888      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1889        .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp())
1890        .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getAddress().toString())).build())
1891      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1892        .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp())
1893        .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getStartcode())).build())
1894      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1895        .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp())
1896        .setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)).build());
1897  }
1898
1899  private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) {
1900    for (byte b : regionName) {
1901      if (b == ESCAPE_BYTE) {
1902        out.write(ESCAPE_BYTE);
1903      }
1904      out.write(b);
1905    }
1906  }
1907
1908  public static byte[] getParentsBytes(List<RegionInfo> parents) {
1909    ByteArrayOutputStream bos = new ByteArrayOutputStream();
1910    Iterator<RegionInfo> iter = parents.iterator();
1911    writeRegionName(bos, iter.next().getRegionName());
1912    while (iter.hasNext()) {
1913      bos.write(ESCAPE_BYTE);
1914      bos.write(SEPARATED_BYTE);
1915      writeRegionName(bos, iter.next().getRegionName());
1916    }
1917    return bos.toByteArray();
1918  }
1919
1920  private static List<byte[]> parseParentsBytes(byte[] bytes) {
1921    List<byte[]> parents = new ArrayList<>();
1922    ByteArrayOutputStream bos = new ByteArrayOutputStream();
1923    for (int i = 0; i < bytes.length; i++) {
1924      if (bytes[i] == ESCAPE_BYTE) {
1925        i++;
1926        if (bytes[i] == SEPARATED_BYTE) {
1927          parents.add(bos.toByteArray());
1928          bos.reset();
1929          continue;
1930        }
1931        // fall through to append the byte
1932      }
1933      bos.write(bytes[i]);
1934    }
1935    if (bos.size() > 0) {
1936      parents.add(bos.toByteArray());
1937    }
1938    return parents;
1939  }
1940
1941  private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException {
1942    byte[] value = getParentsBytes(parents);
1943    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1944      .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER)
1945      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(value).build());
1946  }
1947
1948  public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts)
1949    throws IOException {
1950    Put put = new Put(regionInfo.getRegionName(), ts);
1951    addReplicationBarrier(put, openSeqNum);
1952    return put;
1953  }
1954
1955  /**
1956   * See class comment on SerialReplicationChecker
1957   */
1958  public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException {
1959    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1960      .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(HConstants.SEQNUM_QUALIFIER)
1961      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum))
1962      .build());
1963  }
1964
1965  public static Put addEmptyLocation(Put p, int replicaId) throws IOException {
1966    CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
1967    return p
1968      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1969        .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp())
1970        .setType(Cell.Type.Put).build())
1971      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1972        .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp())
1973        .setType(Cell.Type.Put).build())
1974      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1975        .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp())
1976        .setType(Cell.Type.Put).build());
1977  }
1978
1979  public static final class ReplicationBarrierResult {
1980    private final long[] barriers;
1981    private final RegionState.State state;
1982    private final List<byte[]> parentRegionNames;
1983
1984    ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) {
1985      this.barriers = barriers;
1986      this.state = state;
1987      this.parentRegionNames = parentRegionNames;
1988    }
1989
1990    public long[] getBarriers() {
1991      return barriers;
1992    }
1993
1994    public RegionState.State getState() {
1995      return state;
1996    }
1997
1998    public List<byte[]> getParentRegionNames() {
1999      return parentRegionNames;
2000    }
2001
2002    @Override
2003    public String toString() {
2004      return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + state
2005        + ", parentRegionNames="
2006        + parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))
2007        + "]";
2008    }
2009  }
2010
2011  private static long getReplicationBarrier(Cell c) {
2012    return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength());
2013  }
2014
2015  public static long[] getReplicationBarriers(Result result) {
2016    return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER)
2017      .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray();
2018  }
2019
2020  private static ReplicationBarrierResult getReplicationBarrierResult(Result result) {
2021    long[] barriers = getReplicationBarriers(result);
2022    byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn());
2023    RegionState.State state =
2024      stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null;
2025    byte[] parentRegionsBytes =
2026      result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER);
2027    List<byte[]> parentRegionNames =
2028      parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList();
2029    return new ReplicationBarrierResult(barriers, state, parentRegionNames);
2030  }
2031
2032  public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn,
2033    TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException {
2034    byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
2035    byte[] metaStopKey =
2036      RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false);
2037    Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey)
2038      .addColumn(getCatalogFamily(), getRegionStateColumn())
2039      .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true)
2040      .setCaching(10);
2041    try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) {
2042      for (Result result;;) {
2043        result = scanner.next();
2044        if (result == null) {
2045          return new ReplicationBarrierResult(new long[0], null, Collections.emptyList());
2046        }
2047        byte[] regionName = result.getRow();
2048        // TODO: we may look up a region which has already been split or merged so we need to check
2049        // whether the encoded name matches. Need to find a way to quit earlier when there is no
2050        // record for the given region, for now it will scan to the end of the table.
2051        if (
2052          !Bytes.equals(encodedRegionName, Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))
2053        ) {
2054          continue;
2055        }
2056        return getReplicationBarrierResult(result);
2057      }
2058    }
2059  }
2060
2061  public static long[] getReplicationBarrier(Connection conn, byte[] regionName)
2062    throws IOException {
2063    try (Table table = getMetaHTable(conn)) {
2064      Result result = table.get(new Get(regionName)
2065        .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER)
2066        .readAllVersions());
2067      return getReplicationBarriers(result);
2068    }
2069  }
2070
2071  public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn,
2072    TableName tableName) throws IOException {
2073    List<Pair<String, Long>> list = new ArrayList<>();
2074    scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION),
2075      getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> {
2076        byte[] value =
2077          r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER);
2078        if (value == null) {
2079          return true;
2080        }
2081        long lastBarrier = Bytes.toLong(value);
2082        String encodedRegionName = RegionInfo.encodeRegionName(r.getRow());
2083        list.add(Pair.newPair(encodedRegionName, lastBarrier));
2084        return true;
2085      });
2086    return list;
2087  }
2088
2089  public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn,
2090    TableName tableName) throws IOException {
2091    List<String> list = new ArrayList<>();
2092    scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION),
2093      getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION,
2094      new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> {
2095        list.add(RegionInfo.encodeRegionName(r.getRow()));
2096        return true;
2097      }, CatalogReplicaMode.NONE);
2098    return list;
2099  }
2100
2101  private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
2102    if (!METALOG.isDebugEnabled()) {
2103      return;
2104    }
2105    // Logging each mutation in separate line makes it easier to see diff between them visually
2106    // because of common starting indentation.
2107    for (Mutation mutation : mutations) {
2108      debugLogMutation(mutation);
2109    }
2110  }
2111
2112  private static void debugLogMutation(Mutation p) throws IOException {
2113    METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON());
2114  }
2115
2116  private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException {
2117    return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow())
2118      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getSeqNumColumn(replicaId))
2119      .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum))
2120      .build());
2121  }
2122}