001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import static org.junit.Assert.assertTrue; 021import static org.junit.Assert.fail; 022 023import java.io.File; 024import java.io.IOException; 025import java.util.Iterator; 026import java.util.Map; 027import java.util.NavigableMap; 028import org.apache.hadoop.fs.FileUtil; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellUtil; 032import org.apache.hadoop.hbase.HBaseClassTestRule; 033import org.apache.hadoop.hbase.HBaseTestingUtility; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Put; 037import org.apache.hadoop.hbase.client.Result; 038import org.apache.hadoop.hbase.client.ResultScanner; 039import org.apache.hadoop.hbase.client.Scan; 040import org.apache.hadoop.hbase.client.Table; 041import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 042import org.apache.hadoop.hbase.testclassification.LargeTests; 043import org.apache.hadoop.hbase.testclassification.MapReduceTests; 044import org.apache.hadoop.hbase.util.Bytes; 045import org.apache.hadoop.mapreduce.Job; 046import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 047import org.junit.AfterClass; 048import org.junit.BeforeClass; 049import org.junit.ClassRule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Test Map/Reduce job over HBase tables. The map/reduce process we're testing on our tables is 057 * simple - take every row in the table, reverse the value of a particular cell, and write it back 058 * to the table. 059 */ 060@Category({ MapReduceTests.class, LargeTests.class }) 061public class TestMultithreadedTableMapper { 062 063 @ClassRule 064 public static final HBaseClassTestRule CLASS_RULE = 065 HBaseClassTestRule.forClass(TestMultithreadedTableMapper.class); 066 067 private static final Logger LOG = LoggerFactory.getLogger(TestMultithreadedTableMapper.class); 068 private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); 069 static final TableName MULTI_REGION_TABLE_NAME = TableName.valueOf("mrtest"); 070 static final byte[] INPUT_FAMILY = Bytes.toBytes("contents"); 071 static final byte[] OUTPUT_FAMILY = Bytes.toBytes("text"); 072 static final int NUMBER_OF_THREADS = 10; 073 074 @BeforeClass 075 public static void beforeClass() throws Exception { 076 // Up the handlers; this test needs more than usual. 077 UTIL.getConfiguration().setInt(HConstants.REGION_SERVER_HIGH_PRIORITY_HANDLER_COUNT, 10); 078 UTIL.startMiniCluster(); 079 Table table = UTIL.createMultiRegionTable(MULTI_REGION_TABLE_NAME, 080 new byte[][] { INPUT_FAMILY, OUTPUT_FAMILY }); 081 UTIL.loadTable(table, INPUT_FAMILY, false); 082 UTIL.waitUntilAllRegionsAssigned(MULTI_REGION_TABLE_NAME); 083 } 084 085 @AfterClass 086 public static void afterClass() throws Exception { 087 UTIL.shutdownMiniCluster(); 088 } 089 090 /** 091 * Pass the given key and processed record reduce 092 */ 093 public static class ProcessContentsMapper extends TableMapper<ImmutableBytesWritable, Put> { 094 095 /** 096 * Pass the key, and reversed value to reduce 097 */ 098 @Override 099 public void map(ImmutableBytesWritable key, Result value, Context context) 100 throws IOException, InterruptedException { 101 if (value.size() != 1) { 102 throw new IOException("There should only be one input column"); 103 } 104 Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf = value.getMap(); 105 if (!cf.containsKey(INPUT_FAMILY)) { 106 throw new IOException( 107 "Wrong input columns. Missing: '" + Bytes.toString(INPUT_FAMILY) + "'."); 108 } 109 // Get the original value and reverse it 110 String originalValue = Bytes.toString(value.getValue(INPUT_FAMILY, INPUT_FAMILY)); 111 StringBuilder newValue = new StringBuilder(originalValue); 112 newValue.reverse(); 113 // Now set the value to be collected 114 Put outval = new Put(key.get()); 115 outval.addColumn(OUTPUT_FAMILY, null, Bytes.toBytes(newValue.toString())); 116 context.write(key, outval); 117 } 118 } 119 120 /** 121 * Test multithreadedTableMappper map/reduce against a multi-region table 122 */ 123 @Test 124 public void testMultithreadedTableMapper() 125 throws IOException, InterruptedException, ClassNotFoundException { 126 runTestOnTable(UTIL.getConnection().getTable(MULTI_REGION_TABLE_NAME)); 127 } 128 129 private void runTestOnTable(Table table) 130 throws IOException, InterruptedException, ClassNotFoundException { 131 Job job = null; 132 try { 133 LOG.info("Before map/reduce startup"); 134 job = new Job(table.getConfiguration(), "process column contents"); 135 job.setNumReduceTasks(1); 136 Scan scan = new Scan(); 137 scan.addFamily(INPUT_FAMILY); 138 TableMapReduceUtil.initTableMapperJob(table.getName(), scan, MultithreadedTableMapper.class, 139 ImmutableBytesWritable.class, Put.class, job); 140 MultithreadedTableMapper.setMapperClass(job, ProcessContentsMapper.class); 141 MultithreadedTableMapper.setNumberOfThreads(job, NUMBER_OF_THREADS); 142 TableMapReduceUtil.initTableReducerJob(table.getName().getNameAsString(), 143 IdentityTableReducer.class, job); 144 FileOutputFormat.setOutputPath(job, new Path("test")); 145 LOG.info("Started " + table.getName()); 146 assertTrue(job.waitForCompletion(true)); 147 LOG.info("After map/reduce completion"); 148 // verify map-reduce results 149 verify(table.getName()); 150 } finally { 151 table.close(); 152 if (job != null) { 153 FileUtil.fullyDelete(new File(job.getConfiguration().get("hadoop.tmp.dir"))); 154 } 155 } 156 } 157 158 private void verify(TableName tableName) throws IOException { 159 Table table = UTIL.getConnection().getTable(tableName); 160 boolean verified = false; 161 long pause = UTIL.getConfiguration().getLong("hbase.client.pause", 5 * 1000); 162 int numRetries = UTIL.getConfiguration().getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 5); 163 for (int i = 0; i < numRetries; i++) { 164 try { 165 LOG.info("Verification attempt #" + i); 166 verifyAttempt(table); 167 verified = true; 168 break; 169 } catch (NullPointerException e) { 170 // If here, a cell was empty. Presume its because updates came in 171 // after the scanner had been opened. Wait a while and retry. 172 LOG.debug("Verification attempt failed: " + e.getMessage()); 173 } 174 try { 175 Thread.sleep(pause); 176 } catch (InterruptedException e) { 177 // continue 178 } 179 } 180 assertTrue(verified); 181 table.close(); 182 } 183 184 /** 185 * Looks at every value of the mapreduce output and verifies that indeed the values have been 186 * reversed. 187 * @param table Table to scan. 188 * @throws NullPointerException if we failed to find a cell value 189 */ 190 private void verifyAttempt(final Table table) throws IOException, NullPointerException { 191 Scan scan = new Scan(); 192 scan.addFamily(INPUT_FAMILY); 193 scan.addFamily(OUTPUT_FAMILY); 194 ResultScanner scanner = table.getScanner(scan); 195 try { 196 Iterator<Result> itr = scanner.iterator(); 197 assertTrue(itr.hasNext()); 198 while (itr.hasNext()) { 199 Result r = itr.next(); 200 if (LOG.isDebugEnabled()) { 201 if (r.size() > 2) { 202 throw new IOException("Too many results, expected 2 got " + r.size()); 203 } 204 } 205 byte[] firstValue = null; 206 byte[] secondValue = null; 207 int count = 0; 208 for (Cell kv : r.listCells()) { 209 if (count == 0) { 210 firstValue = CellUtil.cloneValue(kv); 211 } else if (count == 1) { 212 secondValue = CellUtil.cloneValue(kv); 213 } else if (count == 2) { 214 break; 215 } 216 count++; 217 } 218 String first = ""; 219 if (firstValue == null) { 220 throw new NullPointerException(Bytes.toString(r.getRow()) + ": first value is null"); 221 } 222 first = Bytes.toString(firstValue); 223 String second = ""; 224 if (secondValue == null) { 225 throw new NullPointerException(Bytes.toString(r.getRow()) + ": second value is null"); 226 } 227 byte[] secondReversed = new byte[secondValue.length]; 228 for (int i = 0, j = secondValue.length - 1; j >= 0; j--, i++) { 229 secondReversed[i] = secondValue[j]; 230 } 231 second = Bytes.toString(secondReversed); 232 if (first.compareTo(second) != 0) { 233 if (LOG.isDebugEnabled()) { 234 LOG.debug( 235 "second key is not the reverse of first. row=" + Bytes.toStringBinary(r.getRow()) 236 + ", first value=" + first + ", second value=" + second); 237 } 238 fail(); 239 } 240 } 241 } finally { 242 scanner.close(); 243 } 244 } 245 246}