Add a second multi-thread method
method 1 grouping: [parse + reconstruction] // [filtering + output] method 2 grouping: [parse] // [reconstruction+filtering + output] Depending on some heuristics (see VP8ThreadMethod()), we can pick one of the other when -mt flag (or option.use_threads) is selected. Conservatively, we always use method #2 for now until the heuristic is refined (so, timing should be the same the before this patch) + replace 'use_threads' by 'mt_method' + define MIN_WIDTH_FOR_THREADS constant + fix comment alignment Change-Id: I11a756dea9070d6e21b1a9481d357a1e8aa0663e
This commit is contained in:
parent
7d6f2da075
commit
8a2fa099cc
@ -181,7 +181,9 @@ static int FinishRow(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
const int is_first_row = (mb_y == 0);
|
const int is_first_row = (mb_y == 0);
|
||||||
const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
|
const int is_last_row = (mb_y >= dec->br_mb_y_ - 1);
|
||||||
|
|
||||||
ReconstructRow(dec, ctx);
|
if (dec->mt_method_ == 2) {
|
||||||
|
ReconstructRow(dec, ctx);
|
||||||
|
}
|
||||||
|
|
||||||
if (ctx->filter_row_) {
|
if (ctx->filter_row_) {
|
||||||
FilterRow(dec);
|
FilterRow(dec);
|
||||||
@ -263,10 +265,11 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
const int filter_row =
|
const int filter_row =
|
||||||
(dec->filter_type_ > 0) &&
|
(dec->filter_type_ > 0) &&
|
||||||
(dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
|
(dec->mb_y_ >= dec->tl_mb_y_) && (dec->mb_y_ <= dec->br_mb_y_);
|
||||||
if (!dec->use_threads_) {
|
if (dec->mt_method_ == 0) {
|
||||||
// ctx->id_ and ctx->f_info_ are already set
|
// ctx->id_ and ctx->f_info_ are already set
|
||||||
ctx->mb_y_ = dec->mb_y_;
|
ctx->mb_y_ = dec->mb_y_;
|
||||||
ctx->filter_row_ = filter_row;
|
ctx->filter_row_ = filter_row;
|
||||||
|
ReconstructRow(dec, ctx);
|
||||||
ok = FinishRow(dec, io);
|
ok = FinishRow(dec, io);
|
||||||
} else {
|
} else {
|
||||||
WebPWorker* const worker = &dec->worker_;
|
WebPWorker* const worker = &dec->worker_;
|
||||||
@ -278,17 +281,20 @@ int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
ctx->id_ = dec->cache_id_;
|
ctx->id_ = dec->cache_id_;
|
||||||
ctx->mb_y_ = dec->mb_y_;
|
ctx->mb_y_ = dec->mb_y_;
|
||||||
ctx->filter_row_ = filter_row;
|
ctx->filter_row_ = filter_row;
|
||||||
{
|
if (dec->mt_method_ == 2) { // swap macroblock data
|
||||||
VP8MBData* const tmp = ctx->mb_data_;
|
VP8MBData* const tmp = ctx->mb_data_;
|
||||||
ctx->mb_data_ = dec->mb_data_;
|
ctx->mb_data_ = dec->mb_data_;
|
||||||
dec->mb_data_ = tmp;
|
dec->mb_data_ = tmp;
|
||||||
|
} else {
|
||||||
|
// perform reconstruction directly in main thread
|
||||||
|
ReconstructRow(dec, ctx);
|
||||||
}
|
}
|
||||||
if (filter_row) { // just swap filter info
|
if (filter_row) { // swap filter info
|
||||||
VP8FInfo* const tmp = ctx->f_info_;
|
VP8FInfo* const tmp = ctx->f_info_;
|
||||||
ctx->f_info_ = dec->f_info_;
|
ctx->f_info_ = dec->f_info_;
|
||||||
dec->f_info_ = tmp;
|
dec->f_info_ = tmp;
|
||||||
}
|
}
|
||||||
WebPWorkerLaunch(worker);
|
WebPWorkerLaunch(worker); // (reconstruct)+filter in parallel
|
||||||
if (++dec->cache_id_ == dec->num_caches_) {
|
if (++dec->cache_id_ == dec->num_caches_) {
|
||||||
dec->cache_id_ = 0;
|
dec->cache_id_ = 0;
|
||||||
}
|
}
|
||||||
@ -357,7 +363,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
|
|
||||||
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
|
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
|
||||||
int ok = 1;
|
int ok = 1;
|
||||||
if (dec->use_threads_) {
|
if (dec->mt_method_ > 0) {
|
||||||
ok = WebPWorkerSync(&dec->worker_);
|
ok = WebPWorkerSync(&dec->worker_);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -397,7 +403,7 @@ int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io) {
|
|||||||
// Initialize multi/single-thread worker
|
// Initialize multi/single-thread worker
|
||||||
static int InitThreadContext(VP8Decoder* const dec) {
|
static int InitThreadContext(VP8Decoder* const dec) {
|
||||||
dec->cache_id_ = 0;
|
dec->cache_id_ = 0;
|
||||||
if (dec->use_threads_) {
|
if (dec->mt_method_ > 0) {
|
||||||
WebPWorker* const worker = &dec->worker_;
|
WebPWorker* const worker = &dec->worker_;
|
||||||
if (!WebPWorkerReset(worker)) {
|
if (!WebPWorkerReset(worker)) {
|
||||||
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
return VP8SetError(dec, VP8_STATUS_OUT_OF_MEMORY,
|
||||||
@ -414,6 +420,28 @@ static int InitThreadContext(VP8Decoder* const dec) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
|
||||||
|
const WebPHeaderStructure* const headers,
|
||||||
|
int width, int height) {
|
||||||
|
if (options == NULL || options->use_threads == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
(void)headers;
|
||||||
|
(void)width;
|
||||||
|
(void)height;
|
||||||
|
assert(!headers->is_lossless);
|
||||||
|
#if defined(WEBP_USE_THREAD)
|
||||||
|
if (width < MIN_WIDTH_FOR_THREADS) return 0;
|
||||||
|
// TODO(skal): tune the heuristic further
|
||||||
|
#if 0
|
||||||
|
if (height < 2 * width) return 2;
|
||||||
|
#endif
|
||||||
|
return 2;
|
||||||
|
#else // !WEBP_USE_THREAD
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#undef MT_CACHE_LINES
|
#undef MT_CACHE_LINES
|
||||||
#undef ST_CACHE_LINES
|
#undef ST_CACHE_LINES
|
||||||
|
|
||||||
@ -429,11 +457,11 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
|
const size_t mb_info_size = (mb_w + 1) * sizeof(VP8MB);
|
||||||
const size_t f_info_size =
|
const size_t f_info_size =
|
||||||
(dec->filter_type_ > 0) ?
|
(dec->filter_type_ > 0) ?
|
||||||
mb_w * (dec->use_threads_ ? 2 : 1) * sizeof(VP8FInfo)
|
mb_w * (dec->mt_method_ > 0 ? 2 : 1) * sizeof(VP8FInfo)
|
||||||
: 0;
|
: 0;
|
||||||
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
|
const size_t yuv_size = YUV_SIZE * sizeof(*dec->yuv_b_);
|
||||||
const size_t mb_data_size =
|
const size_t mb_data_size =
|
||||||
(dec->use_threads_ ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
|
(dec->mt_method_ == 2 ? 2 : 1) * mb_w * sizeof(*dec->mb_data_);
|
||||||
const size_t cache_height = (16 * num_caches
|
const size_t cache_height = (16 * num_caches
|
||||||
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
|
+ kFilterExtraRows[dec->filter_type_]) * 3 / 2;
|
||||||
const size_t cache_size = top_size * cache_height;
|
const size_t cache_size = top_size * cache_height;
|
||||||
@ -473,7 +501,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
mem += f_info_size;
|
mem += f_info_size;
|
||||||
dec->thread_ctx_.id_ = 0;
|
dec->thread_ctx_.id_ = 0;
|
||||||
dec->thread_ctx_.f_info_ = dec->f_info_;
|
dec->thread_ctx_.f_info_ = dec->f_info_;
|
||||||
if (dec->use_threads_) {
|
if (dec->mt_method_ > 0) {
|
||||||
// secondary cache line. The deblocking process need to make use of the
|
// secondary cache line. The deblocking process need to make use of the
|
||||||
// filtering strength from previous macroblock row, while the new ones
|
// filtering strength from previous macroblock row, while the new ones
|
||||||
// are being decoded in parallel. We'll just swap the pointers.
|
// are being decoded in parallel. We'll just swap the pointers.
|
||||||
@ -487,7 +515,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
|
|||||||
|
|
||||||
dec->mb_data_ = (VP8MBData*)mem;
|
dec->mb_data_ = (VP8MBData*)mem;
|
||||||
dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
|
dec->thread_ctx_.mb_data_ = (VP8MBData*)mem;
|
||||||
if (dec->use_threads_) {
|
if (dec->mt_method_ == 2) {
|
||||||
dec->thread_ctx_.mb_data_ += mb_w;
|
dec->thread_ctx_.mb_data_ += mb_w;
|
||||||
}
|
}
|
||||||
mem += mb_data_size;
|
mem += mb_data_size;
|
||||||
|
@ -329,12 +329,6 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
|
|||||||
return VP8_STATUS_OUT_OF_MEMORY;
|
return VP8_STATUS_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
idec->dec_ = dec;
|
idec->dec_ = dec;
|
||||||
#ifdef WEBP_USE_THREAD
|
|
||||||
dec->use_threads_ = (idec->params_.options != NULL) &&
|
|
||||||
(idec->params_.options->use_threads > 0);
|
|
||||||
#else
|
|
||||||
dec->use_threads_ = 0;
|
|
||||||
#endif
|
|
||||||
dec->alpha_data_ = headers.alpha_data;
|
dec->alpha_data_ = headers.alpha_data;
|
||||||
dec->alpha_data_size_ = headers.alpha_data_size;
|
dec->alpha_data_size_ = headers.alpha_data_size;
|
||||||
ChangeState(idec, STATE_VP8_HEADER, headers.offset);
|
ChangeState(idec, STATE_VP8_HEADER, headers.offset);
|
||||||
@ -352,13 +346,14 @@ static VP8StatusCode DecodeWebPHeaders(WebPIDecoder* const idec) {
|
|||||||
static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
|
static VP8StatusCode DecodeVP8FrameHeader(WebPIDecoder* const idec) {
|
||||||
const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
|
const uint8_t* data = idec->mem_.buf_ + idec->mem_.start_;
|
||||||
const size_t curr_size = MemDataSize(&idec->mem_);
|
const size_t curr_size = MemDataSize(&idec->mem_);
|
||||||
|
int width, height;
|
||||||
uint32_t bits;
|
uint32_t bits;
|
||||||
|
|
||||||
if (curr_size < VP8_FRAME_HEADER_SIZE) {
|
if (curr_size < VP8_FRAME_HEADER_SIZE) {
|
||||||
// Not enough data bytes to extract VP8 Frame Header.
|
// Not enough data bytes to extract VP8 Frame Header.
|
||||||
return VP8_STATUS_SUSPENDED;
|
return VP8_STATUS_SUSPENDED;
|
||||||
}
|
}
|
||||||
if (!VP8GetInfo(data, curr_size, idec->chunk_size_, NULL, NULL)) {
|
if (!VP8GetInfo(data, curr_size, idec->chunk_size_, &width, &height)) {
|
||||||
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
return IDecError(idec, VP8_STATUS_BITSTREAM_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -425,7 +420,9 @@ static VP8StatusCode DecodePartition0(WebPIDecoder* const idec) {
|
|||||||
if (dec->status_ != VP8_STATUS_OK) {
|
if (dec->status_ != VP8_STATUS_OK) {
|
||||||
return IDecError(idec, dec->status_);
|
return IDecError(idec, dec->status_);
|
||||||
}
|
}
|
||||||
|
// This change must be done before calling VP8InitFrame()
|
||||||
|
dec->mt_method_ = VP8GetThreadMethod(params->options, NULL,
|
||||||
|
io->width, io->height);
|
||||||
if (!CopyParts0Data(idec)) {
|
if (!CopyParts0Data(idec)) {
|
||||||
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
|
return IDecError(idec, VP8_STATUS_OUT_OF_MEMORY);
|
||||||
}
|
}
|
||||||
|
@ -662,8 +662,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
|
|||||||
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
|
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (dec->use_threads_ && !WebPWorkerSync(&dec->worker_)) {
|
if (dec->mt_method_ > 0) {
|
||||||
return 0;
|
if (!WebPWorkerSync(&dec->worker_)) return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish
|
// Finish
|
||||||
@ -728,7 +728,7 @@ void VP8Clear(VP8Decoder* const dec) {
|
|||||||
if (dec == NULL) {
|
if (dec == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (dec->use_threads_) {
|
if (dec->mt_method_ > 0) {
|
||||||
WebPWorkerEnd(&dec->worker_);
|
WebPWorkerEnd(&dec->worker_);
|
||||||
}
|
}
|
||||||
ALPHDelete(dec->alph_dec_);
|
ALPHDelete(dec->alph_dec_);
|
||||||
|
@ -100,6 +100,9 @@ enum { MB_FEATURE_TREE_PROBS = 3,
|
|||||||
#define U_OFF (Y_OFF + BPS * 16 + BPS)
|
#define U_OFF (Y_OFF + BPS * 16 + BPS)
|
||||||
#define V_OFF (U_OFF + 16)
|
#define V_OFF (U_OFF + 16)
|
||||||
|
|
||||||
|
// minimal width under which lossy multi-threading is always disabled
|
||||||
|
#define MIN_WIDTH_FOR_THREADS 512
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Headers
|
// Headers
|
||||||
|
|
||||||
@ -229,7 +232,8 @@ struct VP8Decoder {
|
|||||||
|
|
||||||
// Worker
|
// Worker
|
||||||
WebPWorker worker_;
|
WebPWorker worker_;
|
||||||
int use_threads_; // use multi-thread
|
int mt_method_; // multi-thread method: 0=off, 1=[parse+recon][filter]
|
||||||
|
// 2=[parse][recon+filter]
|
||||||
int cache_id_; // current cache row
|
int cache_id_; // current cache row
|
||||||
int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3)
|
int num_caches_; // number of cached rows of 16 pixels (1, 2 or 3)
|
||||||
VP8ThreadContext thread_ctx_; // Thread context
|
VP8ThreadContext thread_ctx_; // Thread context
|
||||||
@ -288,8 +292,8 @@ struct VP8Decoder {
|
|||||||
size_t mem_size_;
|
size_t mem_size_;
|
||||||
|
|
||||||
// Per macroblock non-persistent infos.
|
// Per macroblock non-persistent infos.
|
||||||
int mb_x_, mb_y_; // current position, in macroblock units
|
int mb_x_, mb_y_; // current position, in macroblock units
|
||||||
VP8MBData* mb_data_; // parsed reconstruction data
|
VP8MBData* mb_data_; // parsed reconstruction data
|
||||||
|
|
||||||
// Filtering side-info
|
// Filtering side-info
|
||||||
int filter_type_; // 0=off, 1=simple, 2=complex
|
int filter_type_; // 0=off, 1=simple, 2=complex
|
||||||
@ -333,6 +337,11 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
|
|||||||
// Must always be called in pair with VP8EnterCritical().
|
// Must always be called in pair with VP8EnterCritical().
|
||||||
// Returns false in case of error.
|
// Returns false in case of error.
|
||||||
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
|
||||||
|
// Return the multi-threading method to use (0=off), depending
|
||||||
|
// on options and bitstream size. Only for lossy decoding.
|
||||||
|
int VP8GetThreadMethod(const WebPDecoderOptions* const options,
|
||||||
|
const WebPHeaderStructure* const headers,
|
||||||
|
int width, int height);
|
||||||
// Process the last decoded row (filtering + output)
|
// Process the last decoded row (filtering + output)
|
||||||
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
|
||||||
// To be called at the start of a new scanline, to initialize predictors.
|
// To be called at the start of a new scanline, to initialize predictors.
|
||||||
|
@ -452,11 +452,6 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
|
|||||||
if (dec == NULL) {
|
if (dec == NULL) {
|
||||||
return VP8_STATUS_OUT_OF_MEMORY;
|
return VP8_STATUS_OUT_OF_MEMORY;
|
||||||
}
|
}
|
||||||
#ifdef WEBP_USE_THREAD
|
|
||||||
dec->use_threads_ = params->options && (params->options->use_threads > 0);
|
|
||||||
#else
|
|
||||||
dec->use_threads_ = 0;
|
|
||||||
#endif
|
|
||||||
dec->alpha_data_ = headers.alpha_data;
|
dec->alpha_data_ = headers.alpha_data;
|
||||||
dec->alpha_data_size_ = headers.alpha_data_size;
|
dec->alpha_data_size_ = headers.alpha_data_size;
|
||||||
|
|
||||||
@ -468,6 +463,9 @@ static VP8StatusCode DecodeInto(const uint8_t* const data, size_t data_size,
|
|||||||
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
|
status = WebPAllocateDecBuffer(io.width, io.height, params->options,
|
||||||
params->output);
|
params->output);
|
||||||
if (status == VP8_STATUS_OK) { // Decode
|
if (status == VP8_STATUS_OK) { // Decode
|
||||||
|
// This change must be done before calling VP8Decode()
|
||||||
|
dec->mt_method_ = VP8GetThreadMethod(params->options, &headers,
|
||||||
|
io.width, io.height);
|
||||||
if (!VP8Decode(dec, &io)) {
|
if (!VP8Decode(dec, &io)) {
|
||||||
status = dec->status_;
|
status = dec->status_;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user