igzip: Add new functions for faster dictionary compression

Change-Id: Id55728fea286d144f8a11192ab02ccc8503d7b25
Signed-off-by: Greg Tucker <greg.b.tucker@intel.com>
This commit is contained in:
Greg Tucker 2020-10-20 09:56:09 -07:00
parent 438ecd8187
commit 19035917f4
4 changed files with 162 additions and 8 deletions

View File

@ -1241,6 +1241,94 @@ void isal_deflate_hash(struct isal_zstream *stream, uint8_t * dict, uint32_t dic
stream->internal_state.has_hist = IGZIP_HIST; stream->internal_state.has_hist = IGZIP_HIST;
} }
int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict,
uint8_t * dict_data, uint32_t dict_len)
{
if ((dict->level > ISAL_DEF_MAX_LEVEL)
|| (dict_len == 0)
|| (dict == NULL))
return ISAL_INVALID_STATE;
if (dict_len > IGZIP_HIST_SIZE) {
dict_data = dict_data + dict_len - IGZIP_HIST_SIZE;
dict_len = IGZIP_HIST_SIZE;
}
dict->level = stream->level;
dict->hist_size = dict_len;
memcpy(dict->history, dict_data, dict_len);
memset(dict->hashtable, -1, sizeof(dict->hashtable));
switch (stream->level) {
case 3:
dict->hash_size = IGZIP_LVL3_HASH_SIZE;
isal_deflate_hash_lvl3(dict->hashtable, LVL3_HASH_MASK,
0, dict_data, dict_len);
break;
case 2:
dict->hash_size = IGZIP_LVL2_HASH_SIZE;
isal_deflate_hash_lvl2(dict->hashtable, LVL2_HASH_MASK,
0, dict_data, dict_len);
break;
case 1:
dict->hash_size = IGZIP_LVL1_HASH_SIZE;
isal_deflate_hash_lvl1(dict->hashtable, LVL1_HASH_MASK,
0, dict_data, dict_len);
break;
default:
dict->hash_size = IGZIP_LVL0_HASH_SIZE;
isal_deflate_hash_lvl0(dict->hashtable, LVL0_HASH_MASK,
0, dict_data, dict_len);
}
return COMP_OK;
}
int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict)
{
struct isal_zstate *state = &stream->internal_state;
struct level_buf *level_buf = (struct level_buf *)stream->level_buf;
int ret;
if ((state->state != ZSTATE_NEW_HDR)
|| (state->b_bytes_processed != state->b_bytes_valid)
|| (dict->level != stream->level)
|| (dict->hist_size == 0)
|| (dict->hist_size > IGZIP_HIST_SIZE)
|| (dict->hash_size > IGZIP_LVL3_HASH_SIZE))
return ISAL_INVALID_STATE;
ret = check_level_req(stream);
if (ret)
return ret;
memcpy(state->buffer, dict->history, dict->hist_size);
state->b_bytes_processed = dict->hist_size;
state->b_bytes_valid = dict->hist_size;
state->has_hist = IGZIP_DICT_HASH_SET;
switch (stream->level) {
case 3:
memcpy(level_buf->lvl3.hash_table, dict->hashtable,
sizeof(level_buf->lvl3.hash_table));
break;
case 2:
memcpy(level_buf->lvl2.hash_table, dict->hashtable,
sizeof(level_buf->lvl2.hash_table));
break;
case 1:
memcpy(level_buf->lvl1.hash_table, dict->hashtable,
sizeof(level_buf->lvl1.hash_table));
break;
default:
memcpy(stream->internal_state.head, dict->hashtable,
sizeof(stream->internal_state.head));
}
return COMP_OK;
}
int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len) int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t * dict, uint32_t dict_len)
{ {
struct isal_zstate *state = &stream->internal_state; struct isal_zstate *state = &stream->internal_state;
@ -1465,6 +1553,9 @@ int isal_deflate(struct isal_zstream *stream)
set_dist_mask(stream); set_dist_mask(stream);
set_hash_mask(stream); set_hash_mask(stream);
isal_deflate_hash(stream, state->buffer, state->b_bytes_processed); isal_deflate_hash(stream, state->buffer, state->b_bytes_processed);
} else if (state->has_hist == IGZIP_DICT_HASH_SET) {
set_dist_mask(stream);
set_hash_mask(stream);
} }
in_size = stream->avail_in + buffered_size; in_size = stream->avail_in + buffered_size;

View File

@ -109,17 +109,21 @@ int usage(void)
void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size, void deflate_perf(struct isal_zstream *stream, uint8_t * inbuf, size_t infile_size,
size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level, size_t inbuf_size, uint8_t * outbuf, size_t outbuf_size, int level,
uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf, uint8_t * level_buf, int level_size, uint32_t hist_bits, uint8_t * dictbuf,
size_t dictfile_size, struct isal_hufftables *hufftables_custom) size_t dictfile_size, struct isal_dict *dict_str,
struct isal_hufftables *hufftables_custom)
{ {
int avail_in; int avail_in;
isal_deflate_init(stream); isal_deflate_init(stream);
if (dictbuf != NULL)
isal_deflate_set_dict(stream, dictbuf, dictfile_size);
stream->end_of_stream = 0;
stream->flush = NO_FLUSH;
stream->level = level; stream->level = level;
stream->level_buf = level_buf; stream->level_buf = level_buf;
stream->level_buf_size = level_size; stream->level_buf_size = level_size;
if (COMP_OK != isal_deflate_reset_dict(stream, dict_str))
if (dictbuf != NULL)
isal_deflate_set_dict(stream, dictbuf, dictfile_size);
stream->end_of_stream = 0;
stream->flush = NO_FLUSH;
stream->next_out = outbuf; stream->next_out = outbuf;
stream->avail_out = outbuf_size; stream->avail_out = outbuf_size;
stream->next_in = inbuf; stream->next_in = inbuf;
@ -285,16 +289,20 @@ int main(int argc, char *argv[])
exit(0); exit(0);
} }
struct isal_dict dict_str;
stream.level = level;
isal_deflate_process_dict(&stream, &dict_str, dictbuf, dictfile_size);
struct perf start; struct perf start;
if (time > 0) { if (time > 0) {
BENCHMARK(&start, time, BENCHMARK(&start, time,
deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf,
outbuf_size, level, level_buf, level_size, hist_bits, outbuf_size, level, level_buf, level_size, hist_bits,
dictbuf, dictfile_size, NULL)); dictbuf, dictfile_size, &dict_str, NULL));
} else { } else {
deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size, deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
level, level_buf, level_size, hist_bits, dictbuf, level, level_buf, level_size, hist_bits, dictbuf,
dictfile_size, NULL); dictfile_size, &dict_str, NULL);
} }
if (stream.avail_in != 0) { if (stream.avail_in != 0) {
fprintf(stderr, "Could not compress all of inbuf\n"); fprintf(stderr, "Could not compress all of inbuf\n");
@ -313,7 +321,7 @@ int main(int argc, char *argv[])
deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size, deflate_perf(&stream, inbuf, infile_size, inbuf_size, outbuf, outbuf_size,
level, level_buf, level_size, hist_bits, dictbuf, level, level_buf, level_size, hist_bits, dictbuf,
dictfile_size, &hufftables_custom); dictfile_size, &dict_str, &hufftables_custom);
printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size); printf(" ratio_custom=%3.1f%%", 100.0 * stream.total_out / infile_size);
} }

View File

@ -314,6 +314,7 @@ struct isal_mod_hist {
#define IGZIP_NO_HIST 0 #define IGZIP_NO_HIST 0
#define IGZIP_HIST 1 #define IGZIP_HIST 1
#define IGZIP_DICT_HIST 2 #define IGZIP_DICT_HIST 2
#define IGZIP_DICT_HASH_SET 3
/** @brief Holds Bit Buffer information*/ /** @brief Holds Bit Buffer information*/
struct BitBuf2 { struct BitBuf2 {
@ -685,6 +686,58 @@ void isal_deflate_stateless_init(struct isal_zstream *stream);
*/ */
int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len); int isal_deflate_set_dict(struct isal_zstream *stream, uint8_t *dict, uint32_t dict_len);
/** @brief Structure for holding processed dictionary information */
struct isal_dict {
uint32_t params;
uint32_t level;
uint32_t hist_size;
uint32_t hash_size;
uint8_t history[ISAL_DEF_HIST_SIZE];
uint16_t hashtable[IGZIP_LVL3_HASH_SIZE];
};
/**
* @brief Process dictionary to reuse later
*
* Processes a dictionary so that the generated output can be reused to reset a
* new deflate stream more quickly than isal_deflate_set_dict() alone. This
* function is paired with isal_deflate_reset_dict() when using the same
* dictionary on multiple deflate objects. The stream.level must be set prior to
* calling this function to process the dictionary correctly. If the dictionary
* is longer than IGZIP_HIST_SIZE, only the last IGZIP_HIST_SIZE bytes will be
* used.
*
* @param stream Structure holding state information on the compression streams.
* @param dict_str: Structure to hold processed dictionary info to reuse later.
* @param dict: Array containing dictionary to use.
* @param dict_len: Length of dict.
* @returns COMP_OK,
* ISAL_INVALID_STATE (dictionary could not be processed)
*/
int isal_deflate_process_dict(struct isal_zstream *stream, struct isal_dict *dict_str,
uint8_t *dict, uint32_t dict_len);
/**
* @brief Reset compression dictionary to use
*
* Similar to isal_deflate_set_dict() but on pre-processed dictionary
* data. Pairing with isal_deflate_process_dict() can reduce the processing time
* on subsequent compression with dictionary especially on small files.
*
* Like isal_deflate_set_dict(), this function is to be called after
* isal_deflate_init, or after completing a SYNC_FLUSH or FULL_FLUSH and before
* the next call do isal_deflate. Changing compression level between dictionary
* process and reset will cause return of ISAL_INVALID_STATE.
*
* @param stream Structure holding state information on the compression streams.
* @param dict_str: Structure with pre-processed dictionary info.
* @returns COMP_OK,
* ISAL_INVALID_STATE or other (dictionary could not be reset)
*/
int isal_deflate_reset_dict(struct isal_zstream *stream, struct isal_dict *dict_str);
/** /**
* @brief Fast data (deflate) compression for storage applications. * @brief Fast data (deflate) compression for storage applications.
* *

View File

@ -113,3 +113,5 @@ isal_write_zlib_header @108
isal_zero_detect @109 isal_zero_detect @109
isal_gzip_header_init @110 isal_gzip_header_init @110
isal_adler32 @111 isal_adler32 @111
isal_deflate_process_dict @112
isal_deflate_reset_dict @113