001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapred;
019
020import java.io.DataInput;
021import java.io.DataOutput;
022import java.io.IOException;
023import java.util.List;
024import org.apache.hadoop.fs.Path;
025import org.apache.hadoop.hbase.HRegionInfo;
026import org.apache.hadoop.hbase.HTableDescriptor;
027import org.apache.hadoop.hbase.client.Result;
028import org.apache.hadoop.hbase.client.Scan;
029import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
030import org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormatImpl;
031import org.apache.hadoop.hbase.util.RegionSplitter;
032import org.apache.hadoop.mapred.InputFormat;
033import org.apache.hadoop.mapred.InputSplit;
034import org.apache.hadoop.mapred.JobConf;
035import org.apache.hadoop.mapred.RecordReader;
036import org.apache.hadoop.mapred.Reporter;
037import org.apache.yetus.audience.InterfaceAudience;
038
039/**
040 * TableSnapshotInputFormat allows a MapReduce job to run over a table snapshot. Further
041 * documentation available on {@link org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat}.
042 * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
043 */
044@InterfaceAudience.Public
045public class TableSnapshotInputFormat implements InputFormat<ImmutableBytesWritable, Result> {
046
047  public static class TableSnapshotRegionSplit implements InputSplit {
048    private TableSnapshotInputFormatImpl.InputSplit delegate;
049
050    // constructor for mapreduce framework / Writable
051    public TableSnapshotRegionSplit() {
052      this.delegate = new TableSnapshotInputFormatImpl.InputSplit();
053    }
054
055    public TableSnapshotRegionSplit(TableSnapshotInputFormatImpl.InputSplit delegate) {
056      this.delegate = delegate;
057    }
058
059    public TableSnapshotRegionSplit(HTableDescriptor htd, HRegionInfo regionInfo,
060      List<String> locations, Scan scan, Path restoreDir) {
061      this.delegate =
062        new TableSnapshotInputFormatImpl.InputSplit(htd, regionInfo, locations, scan, restoreDir);
063    }
064
065    @Override
066    public long getLength() throws IOException {
067      return delegate.getLength();
068    }
069
070    @Override
071    public String[] getLocations() throws IOException {
072      return delegate.getLocations();
073    }
074
075    @Override
076    public void write(DataOutput out) throws IOException {
077      delegate.write(out);
078    }
079
080    @Override
081    public void readFields(DataInput in) throws IOException {
082      delegate.readFields(in);
083    }
084  }
085
086  static class TableSnapshotRecordReader implements RecordReader<ImmutableBytesWritable, Result> {
087
088    private TableSnapshotInputFormatImpl.RecordReader delegate;
089
090    public TableSnapshotRecordReader(TableSnapshotRegionSplit split, JobConf job)
091      throws IOException {
092      delegate = new TableSnapshotInputFormatImpl.RecordReader();
093      delegate.initialize(split.delegate, job);
094    }
095
096    @Override
097    public boolean next(ImmutableBytesWritable key, Result value) throws IOException {
098      if (!delegate.nextKeyValue()) {
099        return false;
100      }
101      ImmutableBytesWritable currentKey = delegate.getCurrentKey();
102      key.set(currentKey.get(), currentKey.getOffset(), currentKey.getLength());
103      value.copyFrom(delegate.getCurrentValue());
104      return true;
105    }
106
107    @Override
108    public ImmutableBytesWritable createKey() {
109      return new ImmutableBytesWritable();
110    }
111
112    @Override
113    public Result createValue() {
114      return new Result();
115    }
116
117    @Override
118    public long getPos() throws IOException {
119      return delegate.getPos();
120    }
121
122    @Override
123    public void close() throws IOException {
124      delegate.close();
125    }
126
127    @Override
128    public float getProgress() throws IOException {
129      return delegate.getProgress();
130    }
131  }
132
133  @Override
134  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
135    List<TableSnapshotInputFormatImpl.InputSplit> splits =
136      TableSnapshotInputFormatImpl.getSplits(job);
137    InputSplit[] results = new InputSplit[splits.size()];
138    for (int i = 0; i < splits.size(); i++) {
139      results[i] = new TableSnapshotRegionSplit(splits.get(i));
140    }
141    return results;
142  }
143
144  @Override
145  public RecordReader<ImmutableBytesWritable, Result> getRecordReader(InputSplit split, JobConf job,
146    Reporter reporter) throws IOException {
147    return new TableSnapshotRecordReader((TableSnapshotRegionSplit) split, job);
148  }
149
150  /**
151   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
152   * @param job          the job to configure
153   * @param snapshotName the name of the snapshot to read from
154   * @param restoreDir   a temporary directory to restore the snapshot into. Current user should
155   *                     have write permissions to this directory, and this should not be a
156   *                     subdirectory of rootdir. After the job is finished, restoreDir can be
157   *                     deleted.
158   * @throws IOException if an error occurs
159   */
160  public static void setInput(JobConf job, String snapshotName, Path restoreDir)
161    throws IOException {
162    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir);
163  }
164
165  /**
166   * Configures the job to use TableSnapshotInputFormat to read from a snapshot.
167   * @param job                the job to configure
168   * @param snapshotName       the name of the snapshot to read from
169   * @param restoreDir         a temporary directory to restore the snapshot into. Current user
170   *                           should have write permissions to this directory, and this should not
171   *                           be a subdirectory of rootdir. After the job is finished, restoreDir
172   *                           can be deleted.
173   * @param splitAlgo          split algorithm to generate splits from region
174   * @param numSplitsPerRegion how many input splits to generate per one region
175   * @throws IOException if an error occurs
176   */
177  public static void setInput(JobConf job, String snapshotName, Path restoreDir,
178    RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException {
179    TableSnapshotInputFormatImpl.setInput(job, snapshotName, restoreDir, splitAlgo,
180      numSplitsPerRegion);
181  }
182}