001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client.example;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.concurrent.Callable;
024import java.util.concurrent.ExecutorService;
025import java.util.concurrent.Executors;
026import java.util.concurrent.Future;
027import java.util.concurrent.LinkedBlockingQueue;
028import java.util.concurrent.ThreadFactory;
029import java.util.concurrent.ThreadLocalRandom;
030import java.util.concurrent.ThreadPoolExecutor;
031import java.util.concurrent.TimeUnit;
032import org.apache.hadoop.conf.Configured;
033import org.apache.hadoop.hbase.Cell;
034import org.apache.hadoop.hbase.CellBuilderFactory;
035import org.apache.hadoop.hbase.CellBuilderType;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.client.Connection;
038import org.apache.hadoop.hbase.client.ConnectionFactory;
039import org.apache.hadoop.hbase.client.Put;
040import org.apache.hadoop.hbase.client.RegionLocator;
041import org.apache.hadoop.hbase.client.Result;
042import org.apache.hadoop.hbase.client.ResultScanner;
043import org.apache.hadoop.hbase.client.Scan;
044import org.apache.hadoop.hbase.client.Table;
045import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
046import org.apache.hadoop.hbase.util.Bytes;
047import org.apache.hadoop.util.Tool;
048import org.apache.hadoop.util.ToolRunner;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
054
055/**
056 * Example on how to use HBase's {@link Connection} and {@link Table} in a multi-threaded
057 * environment. Each table is a light weight object that is created and thrown away. Connections are
058 * heavy weight objects that hold on to zookeeper connections, async processes, and other state.
059 *
060 * <pre>
061 * Usage:
062 * bin/hbase org.apache.hadoop.hbase.client.example.MultiThreadedClientExample testTableName 500000
063 * </pre>
064 * <p>
065 * The table should already be created before running the command. This example expects one column
066 * family named d.
067 * </p>
068 * <p>
069 * This is meant to show different operations that are likely to be done in a real world
070 * application. These operations are:
071 * </p>
072 * <ul>
073 * <li>30% of all operations performed are batch writes. 30 puts are created and sent out at a time.
074 * The response for all puts is waited on.</li>
075 * <li>20% of all operations are single writes. A single put is sent out and the response is waited
076 * for.</li>
077 * <li>50% of all operations are scans. These scans start at a random place and scan up to 100 rows.
078 * </li>
079 * </ul>
080 */
081@InterfaceAudience.Private
082public class MultiThreadedClientExample extends Configured implements Tool {
083  private static final Logger LOG = LoggerFactory.getLogger(MultiThreadedClientExample.class);
084  private static final int DEFAULT_NUM_OPERATIONS = 500000;
085
086  /**
087   * The name of the column family. d for default.
088   */
089  private static final byte[] FAMILY = Bytes.toBytes("d");
090
091  /**
092   * For the example we're just using one qualifier.
093   */
094  private static final byte[] QUAL = Bytes.toBytes("test");
095
096  private final ExecutorService internalPool;
097
098  private final int threads;
099
100  public MultiThreadedClientExample() throws IOException {
101    // Base number of threads.
102    // This represents the number of threads you application has
103    // that can be interacting with an hbase client.
104    this.threads = Runtime.getRuntime().availableProcessors() * 4;
105
106    // Daemon threads are great for things that get shut down.
107    ThreadFactory threadFactory =
108      new ThreadFactoryBuilder().setDaemon(true).setNameFormat("internal-pol-%d").build();
109
110    this.internalPool = Executors.newFixedThreadPool(threads, threadFactory);
111  }
112
113  @Override
114  public int run(String[] args) throws Exception {
115
116    if (args.length < 1 || args.length > 2) {
117      System.out.println("Usage: " + this.getClass().getName() + " tableName [num_operations]");
118      return -1;
119    }
120
121    final TableName tableName = TableName.valueOf(args[0]);
122    int numOperations = DEFAULT_NUM_OPERATIONS;
123
124    // the second arg is the number of operations to send.
125    if (args.length == 2) {
126      numOperations = Integer.parseInt(args[1]);
127    }
128
129    // Threads for the client only.
130    //
131    // We don't want to mix hbase and business logic.
132    //
133    ThreadPoolExecutor service = new ThreadPoolExecutor(threads * 2, threads * 2, 60L,
134      TimeUnit.SECONDS, new LinkedBlockingQueue<>());
135
136    // Create two different connections showing how it's possible to
137    // separate different types of requests onto different connections
138    final Connection writeConnection = ConnectionFactory.createConnection(getConf(), service);
139    final Connection readConnection = ConnectionFactory.createConnection(getConf(), service);
140
141    // At this point the entire cache for the region locations is full.
142    // Only do this if the number of regions in a table is easy to fit into memory.
143    //
144    // If you are interacting with more than 25k regions on a client then it's probably not good
145    // to do this at all.
146    warmUpConnectionCache(readConnection, tableName);
147    warmUpConnectionCache(writeConnection, tableName);
148
149    List<Future<Boolean>> futures = new ArrayList<>(numOperations);
150    for (int i = 0; i < numOperations; i++) {
151      double r = ThreadLocalRandom.current().nextDouble();
152      Future<Boolean> f;
153
154      // For the sake of generating some synthetic load this queues
155      // some different callables.
156      // These callables are meant to represent real work done by your application.
157      if (r < .30) {
158        f = internalPool.submit(new WriteExampleCallable(writeConnection, tableName));
159      } else if (r < .50) {
160        f = internalPool.submit(new SingleWriteExampleCallable(writeConnection, tableName));
161      } else {
162        f = internalPool.submit(new ReadExampleCallable(writeConnection, tableName));
163      }
164      futures.add(f);
165    }
166
167    // Wait a long time for all the reads/writes to complete
168    for (Future<Boolean> f : futures) {
169      f.get(10, TimeUnit.MINUTES);
170    }
171
172    // Clean up after our selves for cleanliness
173    internalPool.shutdownNow();
174    service.shutdownNow();
175    return 0;
176  }
177
178  private void warmUpConnectionCache(Connection connection, TableName tn) throws IOException {
179    try (RegionLocator locator = connection.getRegionLocator(tn)) {
180      LOG.info("Warmed up region location cache for " + tn + " got "
181        + locator.getAllRegionLocations().size());
182    }
183  }
184
185  /**
186   * Class that will show how to send batches of puts at the same time.
187   */
188  public static class WriteExampleCallable implements Callable<Boolean> {
189    private final Connection connection;
190    private final TableName tableName;
191
192    public WriteExampleCallable(Connection connection, TableName tableName) {
193      this.connection = connection;
194      this.tableName = tableName;
195    }
196
197    @Override
198    public Boolean call() throws Exception {
199
200      // Table implements Closable so we use the try with resource structure here.
201      // https://docs.oracle.com/javase/tutorial/essential/exceptions/tryResourceClose.html
202      try (Table t = connection.getTable(tableName)) {
203        byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
204        int rows = 30;
205
206        // Array to put the batch
207        ArrayList<Put> puts = new ArrayList<>(rows);
208        for (int i = 0; i < 30; i++) {
209          byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
210          Put p = new Put(rk);
211          p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(rk).setFamily(FAMILY)
212            .setQualifier(QUAL).setTimestamp(p.getTimestamp()).setType(Cell.Type.Put)
213            .setValue(value).build());
214          puts.add(p);
215        }
216
217        // now that we've assembled the batch it's time to push it to hbase.
218        t.put(puts);
219      }
220      return true;
221    }
222  }
223
224  /**
225   * Class to show how to send a single put.
226   */
227  public static class SingleWriteExampleCallable implements Callable<Boolean> {
228    private final Connection connection;
229    private final TableName tableName;
230
231    public SingleWriteExampleCallable(Connection connection, TableName tableName) {
232      this.connection = connection;
233      this.tableName = tableName;
234    }
235
236    @Override
237    public Boolean call() throws Exception {
238      try (Table t = connection.getTable(tableName)) {
239
240        byte[] value = Bytes.toBytes(Double.toString(ThreadLocalRandom.current().nextDouble()));
241        byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
242        Put p = new Put(rk);
243        p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(rk).setFamily(FAMILY)
244          .setQualifier(QUAL).setTimestamp(p.getTimestamp()).setType(Cell.Type.Put).setValue(value)
245          .build());
246        t.put(p);
247      }
248      return true;
249    }
250  }
251
252  /**
253   * Class to show how to scan some rows starting at a random location.
254   */
255  public static class ReadExampleCallable implements Callable<Boolean> {
256    private final Connection connection;
257    private final TableName tableName;
258
259    public ReadExampleCallable(Connection connection, TableName tableName) {
260      this.connection = connection;
261      this.tableName = tableName;
262    }
263
264    @Override
265    public Boolean call() throws Exception {
266
267      // total length in bytes of all read rows.
268      int result = 0;
269
270      // Number of rows the scan will read before being considered done.
271      int toRead = 100;
272      try (Table t = connection.getTable(tableName)) {
273        byte[] rk = Bytes.toBytes(ThreadLocalRandom.current().nextLong());
274        Scan s = new Scan(rk);
275
276        // This filter will keep the values from being sent accross the wire.
277        // This is good for counting or other scans that are checking for
278        // existence and don't rely on the value.
279        s.setFilter(new KeyOnlyFilter());
280
281        // Don't go back to the server for every single row.
282        // We know these rows are small. So ask for 20 at a time.
283        // This would be application specific.
284        //
285        // The goal is to reduce round trips but asking for too
286        // many rows can lead to GC problems on client and server sides.
287        s.setCaching(20);
288
289        // Don't use the cache. While this is a silly test program it's still good to be
290        // explicit that scans normally don't use the block cache.
291        s.setCacheBlocks(false);
292
293        // Open up the scanner and close it automatically when done.
294        try (ResultScanner rs = t.getScanner(s)) {
295
296          // Now go through rows.
297          for (Result r : rs) {
298            // Keep track of things size to simulate doing some real work.
299            result += r.getRow().length;
300            toRead -= 1;
301
302            // Most online applications won't be
303            // reading the entire table so this break
304            // simulates small to medium size scans,
305            // without needing to know an end row.
306            if (toRead <= 0) {
307              break;
308            }
309          }
310        }
311      }
312      return result > 0;
313    }
314  }
315
316  public static void main(String[] args) throws Exception {
317    ToolRunner.run(new MultiThreadedClientExample(), args);
318  }
319}