001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.compress.zstd; 019 020import static org.junit.Assert.assertNotNull; 021import static org.junit.Assert.assertTrue; 022 023import java.io.File; 024import java.io.FileOutputStream; 025import java.io.IOException; 026import java.util.Random; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.CommonConfigurationKeys; 029import org.apache.hadoop.hbase.HBaseClassTestRule; 030import org.apache.hadoop.hbase.io.compress.CompressionTestBase; 031import org.apache.hadoop.hbase.io.compress.DictionaryCache; 032import org.apache.hadoop.hbase.testclassification.SmallTests; 033import org.apache.hadoop.hbase.util.RandomDistribution; 034import org.junit.BeforeClass; 035import org.junit.ClassRule; 036import org.junit.Test; 037import org.junit.experimental.categories.Category; 038 039@Category(SmallTests.class) 040public class TestZstdDictionary extends CompressionTestBase { 041 042 @ClassRule 043 public static final HBaseClassTestRule CLASS_RULE = 044 HBaseClassTestRule.forClass(TestZstdDictionary.class); 045 046 private static final String DICTIONARY_PATH = DictionaryCache.RESOURCE_SCHEME + "zstd.test.dict"; 047 // zstd.test.data compressed with zstd.test.dict at level 3 with a default buffer size of 262144 048 // will produce a result of 359909 bytes 049 private static final int EXPECTED_COMPRESSED_SIZE = 359909; 050 051 private static byte[] TEST_DATA; 052 053 @BeforeClass 054 public static void setUp() throws Exception { 055 Configuration conf = new Configuration(); 056 TEST_DATA = DictionaryCache.loadFromResource(conf, 057 DictionaryCache.RESOURCE_SCHEME + "zstd.test.data", /* maxSize */ 1024 * 1024); 058 assertNotNull("Failed to load test data", TEST_DATA); 059 } 060 061 @Test 062 public void test() throws Exception { 063 Configuration conf = new Configuration(); 064 conf.setInt(CommonConfigurationKeys.IO_COMPRESSION_CODEC_ZSTD_LEVEL_KEY, 3); 065 conf.set(ZstdCodec.ZSTD_DICTIONARY_KEY, DICTIONARY_PATH); 066 ZstdCodec codec = new ZstdCodec(); 067 codec.setConf(conf); 068 codecTest(codec, new byte[][] { TEST_DATA }, EXPECTED_COMPRESSED_SIZE); 069 // Assert that the dictionary was actually loaded 070 assertTrue("Dictionary was not loaded by codec", DictionaryCache.contains(DICTIONARY_PATH)); 071 } 072 073 // 074 // For generating the test data in src/test/resources/ 075 // 076 077 public static void main(String[] args) throws IOException { 078 // Write 1000 1k blocks for training to the specified file 079 // Train with: 080 // zstd --train -B1024 -o <dictionary_file> <input_file> 081 if (args.length < 1) { 082 System.err.println("Usage: TestZstdCodec <outFile>"); 083 System.exit(-1); 084 } 085 final RandomDistribution.DiscreteRNG rng = 086 new RandomDistribution.Zipf(new Random(), 0, Byte.MAX_VALUE, 2); 087 final File outFile = new File(args[0]); 088 final byte[] buffer = new byte[1024]; 089 System.out.println("Generating " + outFile); 090 try (FileOutputStream os = new FileOutputStream(outFile)) { 091 for (int i = 0; i < 1000; i++) { 092 fill(rng, buffer); 093 os.write(buffer); 094 } 095 } 096 System.out.println("Done"); 097 } 098 099}