001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.util.ArrayList; 021import java.util.List; 022import java.util.Random; 023import java.util.concurrent.ThreadLocalRandom; 024import org.apache.hadoop.hbase.CompareOperator; 025import org.apache.hadoop.hbase.DoNotRetryIOException; 026import org.apache.hadoop.hbase.HBaseClassTestRule; 027import org.apache.hadoop.hbase.HBaseTestingUtil; 028import org.apache.hadoop.hbase.StartTestingClusterOption; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.Admin; 031import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 033import org.apache.hadoop.hbase.client.Connection; 034import org.apache.hadoop.hbase.client.Put; 035import org.apache.hadoop.hbase.client.Result; 036import org.apache.hadoop.hbase.client.ResultScanner; 037import org.apache.hadoop.hbase.client.Scan; 038import org.apache.hadoop.hbase.client.Table; 039import org.apache.hadoop.hbase.client.TableDescriptor; 040import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 041import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 042import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 043import org.apache.hadoop.hbase.testclassification.LargeTests; 044import org.apache.hadoop.hbase.testclassification.RegionServerTests; 045import org.apache.hadoop.hbase.util.Bytes; 046import org.junit.AfterClass; 047import org.junit.BeforeClass; 048import org.junit.ClassRule; 049import org.junit.Rule; 050import org.junit.Test; 051import org.junit.experimental.categories.Category; 052import org.junit.rules.TestName; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 057import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser; 058import org.apache.hbase.thirdparty.org.apache.commons.cli.GnuParser; 059import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter; 060import org.apache.hbase.thirdparty.org.apache.commons.cli.Option; 061import org.apache.hbase.thirdparty.org.apache.commons.cli.Options; 062 063/** 064 * Test performance improvement of joined scanners optimization: 065 * https://issues.apache.org/jira/browse/HBASE-5416 066 */ 067@Category({ RegionServerTests.class, LargeTests.class }) 068public class TestJoinedScanners { 069 070 @ClassRule 071 public static final HBaseClassTestRule CLASS_RULE = 072 HBaseClassTestRule.forClass(TestJoinedScanners.class); 073 074 private static final Logger LOG = LoggerFactory.getLogger(TestJoinedScanners.class); 075 076 private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); 077 078 private static final byte[] cf_essential = Bytes.toBytes("essential"); 079 private static final byte[] cf_joined = Bytes.toBytes("joined"); 080 private static final byte[] col_name = Bytes.toBytes("a"); 081 private static final byte[] flag_yes = Bytes.toBytes("Y"); 082 private static final byte[] flag_no = Bytes.toBytes("N"); 083 084 private static DataBlockEncoding blockEncoding = DataBlockEncoding.FAST_DIFF; 085 private static int selectionRatio = 30; 086 private static int valueWidth = 128 * 1024; 087 088 @Rule 089 public TestName name = new TestName(); 090 091 @BeforeClass 092 public static void setUpBeforeClass() throws Exception { 093 final int DEFAULT_BLOCK_SIZE = 1024 * 1024; 094 TEST_UTIL.getConfiguration().setLong("dfs.blocksize", DEFAULT_BLOCK_SIZE); 095 TEST_UTIL.getConfiguration().setInt("dfs.replication", 1); 096 TEST_UTIL.getConfiguration().setLong("hbase.hregion.max.filesize", 322122547200L); 097 098 String[] dataNodeHosts = new String[] { "host1", "host2", "host3" }; 099 int regionServersCount = 3; 100 StartTestingClusterOption option = StartTestingClusterOption.builder() 101 .numRegionServers(regionServersCount).dataNodeHosts(dataNodeHosts).build(); 102 TEST_UTIL.startMiniCluster(option); 103 } 104 105 @AfterClass 106 public static void tearDownAfterClass() throws Exception { 107 TEST_UTIL.shutdownMiniCluster(); 108 } 109 110 @Test 111 public void testJoinedScanners() throws Exception { 112 byte[][] families = { cf_essential, cf_joined }; 113 114 final TableName tableName = TableName.valueOf(name.getMethodName()); 115 TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); 116 for (byte[] family : families) { 117 ColumnFamilyDescriptor familyDescriptor = ColumnFamilyDescriptorBuilder.newBuilder(family) 118 .setDataBlockEncoding(blockEncoding).build(); 119 builder.setColumnFamily(familyDescriptor); 120 } 121 TableDescriptor tableDescriptor = builder.build(); 122 TEST_UTIL.getAdmin().createTable(tableDescriptor); 123 Table ht = TEST_UTIL.getConnection().getTable(tableName); 124 125 long rows_to_insert = 1000; 126 int insert_batch = 20; 127 128 LOG.info("Make " + Long.toString(rows_to_insert) + " rows, total size = " 129 + Float.toString(rows_to_insert * valueWidth / 1024 / 1024) + " MB"); 130 131 long time = System.nanoTime(); 132 Random rand = ThreadLocalRandom.current(); 133 byte[] val_large = new byte[valueWidth]; 134 List<Put> puts = new ArrayList<>(); 135 for (long i = 0; i < rows_to_insert; i++) { 136 Put put = new Put(Bytes.toBytes(Long.toString(i))); 137 if (rand.nextInt(100) <= selectionRatio) { 138 put.addColumn(cf_essential, col_name, flag_yes); 139 } else { 140 put.addColumn(cf_essential, col_name, flag_no); 141 } 142 put.addColumn(cf_joined, col_name, val_large); 143 puts.add(put); 144 if (puts.size() >= insert_batch) { 145 ht.put(puts); 146 puts.clear(); 147 } 148 } 149 if (!puts.isEmpty()) { 150 ht.put(puts); 151 puts.clear(); 152 } 153 154 LOG.info("Data generated in " + Double.toString((System.nanoTime() - time) / 1000000000.0) 155 + " seconds"); 156 157 boolean slow = true; 158 for (int i = 0; i < 10; ++i) { 159 runScanner(ht, slow); 160 slow = !slow; 161 } 162 163 ht.close(); 164 } 165 166 private void runScanner(Table table, boolean slow) throws Exception { 167 long time = System.nanoTime(); 168 Scan scan = new Scan(); 169 scan.addColumn(cf_essential, col_name); 170 scan.addColumn(cf_joined, col_name); 171 172 SingleColumnValueFilter filter = 173 new SingleColumnValueFilter(cf_essential, col_name, CompareOperator.EQUAL, flag_yes); 174 filter.setFilterIfMissing(true); 175 scan.setFilter(filter); 176 scan.setLoadColumnFamiliesOnDemand(!slow); 177 178 ResultScanner result_scanner = table.getScanner(scan); 179 Result res; 180 long rows_count = 0; 181 while ((res = result_scanner.next()) != null) { 182 rows_count++; 183 } 184 185 double timeSec = (System.nanoTime() - time) / 1000000000.0; 186 result_scanner.close(); 187 LOG.info((slow ? "Slow" : "Joined") + " scanner finished in " + Double.toString(timeSec) 188 + " seconds, got " + Long.toString(rows_count / 2) + " rows"); 189 } 190 191 private static Options options = new Options(); 192 193 /** 194 * Command line interface: 195 * @throws IOException if there is a bug while reading from disk 196 */ 197 public static void main(final String[] args) throws Exception { 198 Option encodingOption = 199 new Option("e", "blockEncoding", true, "Data block encoding; Default: FAST_DIFF"); 200 encodingOption.setRequired(false); 201 options.addOption(encodingOption); 202 203 Option ratioOption = new Option("r", "selectionRatio", true, 204 "Ratio of selected rows using essential column family"); 205 ratioOption.setRequired(false); 206 options.addOption(ratioOption); 207 208 Option widthOption = 209 new Option("w", "valueWidth", true, "Width of value for non-essential column family"); 210 widthOption.setRequired(false); 211 options.addOption(widthOption); 212 213 CommandLineParser parser = new GnuParser(); 214 CommandLine cmd = parser.parse(options, args); 215 if (args.length < 1) { 216 HelpFormatter formatter = new HelpFormatter(); 217 formatter.printHelp("TestJoinedScanners", options, true); 218 } 219 220 if (cmd.hasOption("e")) { 221 blockEncoding = DataBlockEncoding.valueOf(cmd.getOptionValue("e")); 222 } 223 if (cmd.hasOption("r")) { 224 selectionRatio = Integer.parseInt(cmd.getOptionValue("r")); 225 } 226 if (cmd.hasOption("w")) { 227 valueWidth = Integer.parseInt(cmd.getOptionValue("w")); 228 } 229 // run the test 230 TestJoinedScanners test = new TestJoinedScanners(); 231 test.testJoinedScanners(); 232 } 233 234 @Test(expected = DoNotRetryIOException.class) 235 public void testWithReverseScan() throws Exception { 236 try (Connection con = TEST_UTIL.getConnection(); Admin admin = con.getAdmin()) { 237 TableName tableName = TableName.valueOf(name.getMethodName()); 238 239 TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName) 240 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf1")) 241 .setColumnFamily(ColumnFamilyDescriptorBuilder.of("cf2")).build(); 242 admin.createTable(tableDescriptor); 243 244 try (Table table = con.getTable(tableName)) { 245 SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("cf1"), 246 Bytes.toBytes("col"), CompareOperator.EQUAL, Bytes.toBytes("val")); 247 filter.setFilterIfMissing(true); 248 249 // Reverse scan with loading CFs on demand 250 Scan scan = new Scan(); 251 scan.setFilter(filter); 252 scan.setReversed(true); 253 scan.setLoadColumnFamiliesOnDemand(true); 254 255 try (ResultScanner scanner = table.getScanner(scan)) { 256 // DoNotRetryIOException should occur 257 scanner.next(); 258 } 259 } 260 } 261 } 262}