merge with Itseez/opencv

This commit is contained in:
Ozan Tonkal 2013-09-15 16:32:56 +02:00
commit 3165b526f2
2634 changed files with 198260 additions and 141747 deletions

80
.gitattributes vendored
View File

@ -1,42 +1,58 @@
.git* export-ignore
* text=auto whitespace=trailing-space,space-before-tab,-indent-with-non-tab,tab-in-indent,tabwidth=4
*.py text
*.cpp text
*.hpp text
*.cxx text
*.hxx text
*.mm text
*.c text
*.h text
*.i text
*.js text
*.java text
*.scala text
*.cu text
*.cl text
*.css_t text
*.qrc text
*.qss text
*.S text
*.rst text
*.tex text
*.sty text
.git* text export-ignore
*.aidl text
*.mk text
*.aidl text
*.appxmanifest text
*.bib text
*.c text
*.cl text
*.conf text
*.cpp text
*.css_t text
*.cu text
*.cxx text
*.def text
*.filelist text
*.h text
*.hpp text
*.htm text
*.html text
*.hxx text
*.i text
*.idl text
*.java text
*.js text
*.mk text
*.mm text
*.plist text
*.properties text
*.py text
*.qrc text
*.qss text
*.S text
*.sbt text
*.scala text
*.sty text
*.tex text
*.txt text
*.xaml text
# reST underlines/overlines can look like conflict markers
*.rst text conflict-marker-size=80
*.cmake text whitespace=tabwidth=2
*.cmakein text whitespace=tabwidth=2
*.in text whitespace=tabwidth=2
CMakeLists.txt text whitespace=tabwidth=2
*.png binary
*.jepg binary
*.jpg binary
*.avi binary
*.bmp binary
*.exr binary
*.ico binary
*.jpeg binary
*.jpg binary
*.png binary
*.a binary
*.so binary
@ -47,6 +63,7 @@ CMakeLists.txt text whitespace=tabwidth=2
*.pbxproj binary
*.vec binary
*.doc binary
*.dia binary
*.xml -text whitespace=cr-at-eol
*.yml -text whitespace=cr-at-eol
@ -55,9 +72,12 @@ CMakeLists.txt text whitespace=tabwidth=2
.cproject -text whitespace=cr-at-eol merge=union
org.eclipse.jdt.core.prefs -text whitespace=cr-at-eol merge=union
*.vcproj text eol=crlf merge=union
*.bat text eol=crlf
*.cmd text eol=crlf
*.cmd.tmpl text eol=crlf
*.dsp text eol=crlf -whitespace
*.sln text eol=crlf -whitespace
*.vcproj text eol=crlf -whitespace merge=union
*.vcxproj text eol=crlf -whitespace merge=union
*.sh text eol=lf
*.sh text eol=lf

3
.gitignore vendored
View File

@ -2,7 +2,10 @@
.DS_Store
refman.rst
OpenCV4Tegra/
tegra/
*.user
.sw[a-z]
.*.swp
tags
Thumbs.db
*.autosave

1
3rdparty/.gitattributes vendored Normal file
View File

@ -0,0 +1 @@
* -whitespace

View File

@ -1,4 +1,4 @@
set(NEW_FFMPEG 1)
set(HAVE_FFMPEG 1)
set(HAVE_FFMPEG_CODEC 1)
set(HAVE_FFMPEG_FORMAT 1)
set(HAVE_FFMPEG_UTIL 1)
@ -8,4 +8,4 @@ set(HAVE_GENTOO_FFMPEG 1)
set(ALIASOF_libavcodec_VERSION 53.61.100)
set(ALIASOF_libavformat_VERSION 53.32.100)
set(ALIASOF_libavutil_VERSION 51.35.100)
set(ALIASOF_libswscale_VERSION 2.1.100)
set(ALIASOF_libswscale_VERSION 2.1.100)

View File

@ -1,2 +1,2 @@
set path=c:\dev\msys32\bin;%path% & gcc -Wall -shared -o opencv_ffmpeg.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat -lavcodec -lavdevice -lswscale -lavutil -lwsock32
set path=c:\dev\msys64\bin;%path% & gcc -m64 -Wall -shared -o opencv_ffmpeg_64.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat64 -lavcodec64 -lavdevice64 -lswscale64 -lavutil64 -lavcore64 -lwsock32 -lws2_32
set path=c:\dev\msys64\bin;%path% & gcc -m64 -Wall -shared -o opencv_ffmpeg_64.dll -O2 -x c++ -I../include -I../include/ffmpeg_ -I../../modules/highgui/src ffopencv.c -L../lib -lavformat64 -lavcodec64 -lavdevice64 -lswscale64 -lavutil64 -lavcore64 -lwsock32 -lws2_32

View File

@ -40,5 +40,3 @@ How to update opencv_ffmpeg.dll and opencv_ffmpeg_64.dll when a new version of F
8. Then, go to <opencv>\3rdparty\ffmpeg, edit make.bat
(change paths to the actual paths to your msys32 and msys64 distributions) and then run make.bat

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
3rdparty/lib/mips/libnative_camera_r4.3.0.so vendored Executable file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
3rdparty/lib/x86/libnative_camera_r4.3.0.so vendored Executable file

Binary file not shown.

View File

@ -23,7 +23,9 @@ if(WIN32 AND NOT MINGW)
add_definitions(-DJAS_WIN_MSVC_BUILD)
endif(WIN32 AND NOT MINGW)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes -Wmissing-declarations -Wunused -Wshadow -Wsign-compare)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-implicit-function-declaration -Wno-uninitialized -Wmissing-prototypes
-Wno-unused-but-set-parameter -Wmissing-declarations -Wunused -Wshadow
-Wsign-compare -Wstrict-overflow)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang
ocv_warnings_disable(CMAKE_C_FLAGS /wd4013 /wd4018 /wd4101 /wd4244 /wd4267 /wd4715) # vs2005
@ -47,4 +49,3 @@ endif()
if(NOT BUILD_SHARED_LIBS)
install(TARGETS ${JASPER_LIBRARY} ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT main)
endif()

View File

@ -9,7 +9,7 @@ else()
project(${PNG_LIBRARY})
endif()
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" ${ZLIB_INCLUDE_DIR})
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" ${ZLIB_INCLUDE_DIRS})
file(GLOB lib_srcs *.c)
file(GLOB lib_hdrs *.h)
@ -29,8 +29,12 @@ if(MSVC)
add_definitions(-D_CRT_SECURE_NO_DEPRECATE)
endif(MSVC)
if (HAVE_WINRT)
add_definitions(-DHAVE_WINRT)
endif()
add_library(${PNG_LIBRARY} STATIC ${lib_srcs} ${lib_hdrs})
target_link_libraries(${PNG_LIBRARY} ${ZLIB_LIBRARY})
target_link_libraries(${PNG_LIBRARY} ${ZLIB_LIBRARIES})
if(UNIX)
if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)

22
3rdparty/libpng/opencv-libpng.patch vendored Normal file
View File

@ -0,0 +1,22 @@
diff --git a/3rdparty/libpng/pngpriv.h b/3rdparty/libpng/pngpriv.h
index 07b2b0b..e7824b8 100644
--- a/3rdparty/libpng/pngpriv.h
+++ b/3rdparty/libpng/pngpriv.h
@@ -360,7 +360,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
/* Memory model/platform independent fns */
#ifndef PNG_ABORT
-# ifdef _WINDOWS_
+# if defined(_WINDOWS_) && !defined(HAVE_WINRT)
# define PNG_ABORT() ExitProcess(0)
# else
# define PNG_ABORT() abort()
@@ -378,7 +378,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
# define png_memcpy _fmemcpy
# define png_memset _fmemset
#else
-# ifdef _WINDOWS_ /* Favor Windows over C runtime fns */
+# if defined(_WINDOWS_) && !defined(HAVE_WINRT) /* Favor Windows over C runtime fns */
# define CVT_PTR(ptr) (ptr)
# define CVT_PTR_NOCHECK(ptr) (ptr)
# define png_strlen lstrlenA

View File

@ -360,7 +360,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
/* Memory model/platform independent fns */
#ifndef PNG_ABORT
# ifdef _WINDOWS_
# if defined(_WINDOWS_) && !defined(HAVE_WINRT)
# define PNG_ABORT() ExitProcess(0)
# else
# define PNG_ABORT() abort()
@ -378,7 +378,7 @@ typedef PNG_CONST png_uint_16p FAR * png_const_uint_16pp;
# define png_memcpy _fmemcpy
# define png_memset _fmemset
#else
# ifdef _WINDOWS_ /* Favor Windows over C runtime fns */
# if defined(_WINDOWS_) && !defined(HAVE_WINRT) /* Favor Windows over C runtime fns */
# define CVT_PTR(ptr) (ptr)
# define CVT_PTR_NOCHECK(ptr) (ptr)
# define png_strlen lstrlenA

View File

@ -17,14 +17,14 @@ check_include_file(string.h HAVE_STRING_H)
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
check_include_file(unistd.h HAVE_UNISTD_H)
if(WIN32)
if(WIN32 AND NOT HAVE_WINRT)
set(USE_WIN32_FILEIO 1)
endif()
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/tif_config.h.cmakein"
"${CMAKE_CURRENT_BINARY_DIR}/tif_config.h" @ONLY)
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIR})
ocv_include_directories("${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" ${ZLIB_INCLUDE_DIRS})
set(lib_srcs
tif_aux.c
@ -79,14 +79,12 @@ set(lib_srcs
"${CMAKE_CURRENT_BINARY_DIR}/tif_config.h"
)
if(UNIX)
if(WIN32 AND NOT HAVE_WINRT)
list(APPEND lib_srcs tif_win32.c)
else()
list(APPEND lib_srcs tif_unix.c)
endif()
if(WIN32)
list(APPEND lib_srcs tif_win32.c)
endif(WIN32)
ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare
-Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast)
ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang

View File

@ -168,4 +168,3 @@
/* Support Deflate compression */
#define ZIP_SUPPORT 1

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Alpha-plane decompression.
@ -13,27 +15,17 @@
#include "./vp8i.h"
#include "./vp8li.h"
#include "../utils/filters.h"
#include "../utils/quant_levels.h"
#include "../utils/quant_levels_dec.h"
#include "../webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
// TODO(skal): move to dsp/ ?
static void CopyPlane(const uint8_t* src, int src_stride,
uint8_t* dst, int dst_stride, int width, int height) {
while (height-- > 0) {
memcpy(dst, src, width);
src += src_stride;
dst += dst_stride;
}
}
//------------------------------------------------------------------------------
// Decodes the compressed data 'data' of size 'data_size' into the 'output'.
// The 'output' buffer should be pre-allocated and must be of the same
// dimension 'height'x'stride', as that of the image.
// dimension 'height'x'width', as that of the image.
//
// Returns 1 on successfully decoding the compressed alpha and
// 0 if either:
@ -41,17 +33,16 @@ static void CopyPlane(const uint8_t* src, int src_stride,
// error returned by appropriate compression method.
static int DecodeAlpha(const uint8_t* data, size_t data_size,
int width, int height, int stride, uint8_t* output) {
uint8_t* decoded_data = NULL;
const size_t decoded_size = height * width;
uint8_t* unfiltered_data = NULL;
int width, int height, uint8_t* output) {
WEBP_FILTER_TYPE filter;
int pre_processing;
int rsrv;
int ok = 0;
int method;
const uint8_t* const alpha_data = data + ALPHA_HEADER_LEN;
const size_t alpha_data_size = data_size - ALPHA_HEADER_LEN;
assert(width > 0 && height > 0 && stride >= width);
assert(width > 0 && height > 0);
assert(data != NULL && output != NULL);
if (data_size <= ALPHA_HEADER_LEN) {
@ -71,44 +62,26 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
}
if (method == ALPHA_NO_COMPRESSION) {
ok = (data_size >= decoded_size);
decoded_data = (uint8_t*)data + ALPHA_HEADER_LEN;
const size_t alpha_decoded_size = height * width;
ok = (alpha_data_size >= alpha_decoded_size);
if (ok) memcpy(output, alpha_data, alpha_decoded_size);
} else {
decoded_data = (uint8_t*)malloc(decoded_size);
if (decoded_data == NULL) return 0;
ok = VP8LDecodeAlphaImageStream(width, height,
data + ALPHA_HEADER_LEN,
data_size - ALPHA_HEADER_LEN,
decoded_data);
ok = VP8LDecodeAlphaImageStream(width, height, alpha_data, alpha_data_size,
output);
}
if (ok) {
WebPFilterFunc unfilter_func = WebPUnfilters[filter];
WebPUnfilterFunc unfilter_func = WebPUnfilters[filter];
if (unfilter_func != NULL) {
unfiltered_data = (uint8_t*)malloc(decoded_size);
if (unfiltered_data == NULL) {
ok = 0;
goto Error;
}
// TODO(vikas): Implement on-the-fly decoding & filter mechanism to decode
// and apply filter per image-row.
unfilter_func(decoded_data, width, height, 1, width, unfiltered_data);
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(unfiltered_data, width, output, stride, width, height);
free(unfiltered_data);
} else {
// Construct raw_data (height x stride) from alpha data (height x width).
CopyPlane(decoded_data, width, output, stride, width, height);
unfilter_func(width, height, width, output);
}
if (pre_processing == ALPHA_PREPROCESSED_LEVELS) {
ok = DequantizeLevels(decoded_data, width, height);
ok = DequantizeLevels(output, width, height);
}
}
Error:
if (method != ALPHA_NO_COMPRESSION) {
free(decoded_data);
}
return ok;
}
@ -116,23 +89,25 @@ static int DecodeAlpha(const uint8_t* data, size_t data_size,
const uint8_t* VP8DecompressAlphaRows(VP8Decoder* const dec,
int row, int num_rows) {
const int stride = dec->pic_hdr_.width_;
const int width = dec->pic_hdr_.width_;
const int height = dec->pic_hdr_.height_;
if (row < 0 || num_rows < 0 || row + num_rows > dec->pic_hdr_.height_) {
if (row < 0 || num_rows < 0 || row + num_rows > height) {
return NULL; // sanity check.
}
if (row == 0) {
// Decode everything during the first call.
assert(!dec->is_alpha_decoded_);
if (!DecodeAlpha(dec->alpha_data_, (size_t)dec->alpha_data_size_,
dec->pic_hdr_.width_, dec->pic_hdr_.height_, stride,
dec->alpha_plane_)) {
width, height, dec->alpha_plane_)) {
return NULL; // Error.
}
dec->is_alpha_decoded_ = 1;
}
// Return a pointer to the current decoded row.
return dec->alpha_plane_ + row * stride;
return dec->alpha_plane_ + row * width;
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Everything about WebPDecBuffer

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Low-level API for VP8 decoder

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Frame-reconstruction function. Memory allocation.
@ -97,53 +99,50 @@ static void FilterRow(const VP8Decoder* const dec) {
}
//------------------------------------------------------------------------------
// Precompute the filtering strength for each segment and each i4x4/i16x16 mode.
void VP8StoreBlock(VP8Decoder* const dec) {
static void PrecomputeFilterStrengths(VP8Decoder* const dec) {
if (dec->filter_type_ > 0) {
VP8FInfo* const info = dec->f_info_ + dec->mb_x_;
const int skip = dec->mb_info_[dec->mb_x_].skip_;
int level = dec->filter_levels_[dec->segment_];
if (dec->filter_hdr_.use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += dec->filter_hdr_.ref_lf_delta_[0];
if (dec->is_i4x4_) {
level += dec->filter_hdr_.mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
if (dec->filter_hdr_.sharpness_ > 0) {
if (dec->filter_hdr_.sharpness_ > 4) {
level >>= 2;
int s;
const VP8FilterHeader* const hdr = &dec->filter_hdr_;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int i4x4;
// First, compute the initial level
int base_level;
if (dec->segment_hdr_.use_segment_) {
base_level = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
base_level += hdr->level_;
}
} else {
level >>= 1;
base_level = hdr->level_;
}
if (level > 9 - dec->filter_hdr_.sharpness_) {
level = 9 - dec->filter_hdr_.sharpness_;
}
}
for (i4x4 = 0; i4x4 <= 1; ++i4x4) {
VP8FInfo* const info = &dec->fstrengths_[s][i4x4];
int level = base_level;
if (hdr->use_lf_delta_) {
// TODO(skal): only CURRENT is handled for now.
level += hdr->ref_lf_delta_[0];
if (i4x4) {
level += hdr->mode_lf_delta_[0];
}
}
level = (level < 0) ? 0 : (level > 63) ? 63 : level;
info->f_level_ = level;
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = (!skip || dec->is_i4x4_);
}
{
// Transfer samples to row cache
int y;
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const ydst = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const udst = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const vdst = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (y = 0; y < 16; ++y) {
memcpy(ydst + y * dec->cache_y_stride_,
dec->yuv_b_ + Y_OFF + y * BPS, 16);
}
for (y = 0; y < 8; ++y) {
memcpy(udst + y * dec->cache_uv_stride_,
dec->yuv_b_ + U_OFF + y * BPS, 8);
memcpy(vdst + y * dec->cache_uv_stride_,
dec->yuv_b_ + V_OFF + y * BPS, 8);
if (hdr->sharpness_ > 0) {
if (hdr->sharpness_ > 4) {
level >>= 2;
} else {
level >>= 1;
}
if (level > 9 - hdr->sharpness_) {
level = 9 - hdr->sharpness_;
}
}
info->f_ilevel_ = (level < 1) ? 1 : level;
info->f_inner_ = 0;
}
}
}
}
@ -339,6 +338,7 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io) {
dec->br_mb_y_ = dec->mb_h_;
}
}
PrecomputeFilterStrengths(dec);
return VP8_STATUS_OK;
}
@ -496,6 +496,7 @@ static int AllocateMemory(VP8Decoder* const dec) {
// alpha plane
dec->alpha_plane_ = alpha_size ? (uint8_t*)mem : NULL;
mem += alpha_size;
assert(mem <= (uint8_t*)dec->mem_ + dec->mem_size_);
// note: left-info is initialized once for all.
memset(dec->mb_info_ - 1, 0, mb_info_size);
@ -551,6 +552,7 @@ static WEBP_INLINE void Copy32b(uint8_t* dst, uint8_t* src) {
}
void VP8ReconstructBlock(VP8Decoder* const dec) {
int j;
uint8_t* const y_dst = dec->yuv_b_ + Y_OFF;
uint8_t* const u_dst = dec->yuv_b_ + U_OFF;
uint8_t* const v_dst = dec->yuv_b_ + V_OFF;
@ -558,7 +560,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
// Rotate in the left samples from previously decoded block. We move four
// pixels at a time for alignment reason, and because of in-loop filter.
if (dec->mb_x_ > 0) {
int j;
for (j = -1; j < 16; ++j) {
Copy32b(&y_dst[j * BPS - 4], &y_dst[j * BPS + 12]);
}
@ -567,7 +568,6 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
Copy32b(&v_dst[j * BPS - 4], &v_dst[j * BPS + 4]);
}
} else {
int j;
for (j = 0; j < 16; ++j) {
y_dst[j * BPS - 1] = 129;
}
@ -670,6 +670,21 @@ void VP8ReconstructBlock(VP8Decoder* const dec) {
}
}
}
// Transfer reconstructed samples from yuv_b_ cache to final destination.
{
const int y_offset = dec->cache_id_ * 16 * dec->cache_y_stride_;
const int uv_offset = dec->cache_id_ * 8 * dec->cache_uv_stride_;
uint8_t* const y_out = dec->cache_y_ + dec->mb_x_ * 16 + y_offset;
uint8_t* const u_out = dec->cache_u_ + dec->mb_x_ * 8 + uv_offset;
uint8_t* const v_out = dec->cache_v_ + dec->mb_x_ * 8 + uv_offset;
for (j = 0; j < 16; ++j) {
memcpy(y_out + j * dec->cache_y_stride_, y_dst + j * BPS, 16);
}
for (j = 0; j < 8; ++j) {
memcpy(u_out + j * dec->cache_uv_stride_, u_dst + j * BPS, 8);
memcpy(v_out + j * dec->cache_uv_stride_, v_dst + j * BPS, 8);
}
}
}
//------------------------------------------------------------------------------

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Incremental decoding
@ -97,6 +99,23 @@ static WEBP_INLINE size_t MemDataSize(const MemBuffer* mem) {
return (mem->end_ - mem->start_);
}
// Check if we need to preserve the compressed alpha data, as it may not have
// been decoded yet.
static int NeedCompressedAlpha(const WebPIDecoder* const idec) {
if (idec->state_ == STATE_PRE_VP8) {
// We haven't parsed the headers yet, so we don't know whether the image is
// lossy or lossless. This also means that we haven't parsed the ALPH chunk.
return 0;
}
if (idec->is_lossless_) {
return 0; // ALPH chunk is not present for lossless images.
} else {
const VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
assert(dec != NULL); // Must be true as idec->state_ != STATE_PRE_VP8.
return (dec->alpha_data_ != NULL) && !dec->is_alpha_decoded_;
}
}
static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
MemBuffer* const mem = &idec->mem_;
const uint8_t* const new_base = mem->buf_ + mem->start_;
@ -122,6 +141,7 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
}
assert(last_part >= 0);
dec->parts_[last_part].buf_end_ = mem->buf_ + mem->end_;
if (NeedCompressedAlpha(idec)) dec->alpha_data_ += offset;
} else { // Resize lossless bitreader
VP8LDecoder* const dec = (VP8LDecoder*)idec->dec_;
VP8LBitReaderSetBuffer(&dec->br_, new_base, MemDataSize(mem));
@ -133,8 +153,12 @@ static void DoRemap(WebPIDecoder* const idec, ptrdiff_t offset) {
// size if required and also updates VP8BitReader's if new memory is allocated.
static int AppendToMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data, size_t data_size) {
VP8Decoder* const dec = (VP8Decoder*)idec->dec_;
MemBuffer* const mem = &idec->mem_;
const uint8_t* const old_base = mem->buf_ + mem->start_;
const int need_compressed_alpha = NeedCompressedAlpha(idec);
const uint8_t* const old_start = mem->buf_ + mem->start_;
const uint8_t* const old_base =
need_compressed_alpha ? dec->alpha_data_ : old_start;
assert(mem->mode_ == MEM_MODE_APPEND);
if (data_size > MAX_CHUNK_PAYLOAD) {
// security safeguard: trying to allocate more than what the format
@ -143,7 +167,8 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
}
if (mem->end_ + data_size > mem->buf_size_) { // Need some free memory
const size_t current_size = MemDataSize(mem);
const size_t new_mem_start = old_start - old_base;
const size_t current_size = MemDataSize(mem) + new_mem_start;
const uint64_t new_size = (uint64_t)current_size + data_size;
const uint64_t extra_size = (new_size + CHUNK_SIZE - 1) & ~(CHUNK_SIZE - 1);
uint8_t* const new_buf =
@ -153,7 +178,7 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
free(mem->buf_);
mem->buf_ = new_buf;
mem->buf_size_ = (size_t)extra_size;
mem->start_ = 0;
mem->start_ = new_mem_start;
mem->end_ = current_size;
}
@ -161,14 +186,15 @@ static int AppendToMemBuffer(WebPIDecoder* const idec,
mem->end_ += data_size;
assert(mem->end_ <= mem->buf_size_);
DoRemap(idec, mem->buf_ + mem->start_ - old_base);
DoRemap(idec, mem->buf_ + mem->start_ - old_start);
return 1;
}
static int RemapMemBuffer(WebPIDecoder* const idec,
const uint8_t* const data, size_t data_size) {
MemBuffer* const mem = &idec->mem_;
const uint8_t* const old_base = mem->buf_ + mem->start_;
const uint8_t* const old_buf = mem->buf_;
const uint8_t* const old_start = old_buf + mem->start_;
assert(mem->mode_ == MEM_MODE_MAP);
if (data_size < mem->buf_size_) return 0; // can't remap to a shorter buffer!
@ -176,7 +202,7 @@ static int RemapMemBuffer(WebPIDecoder* const idec,
mem->buf_ = (uint8_t*)data;
mem->end_ = mem->buf_size_ = data_size;
DoRemap(idec, mem->buf_ + mem->start_ - old_base);
DoRemap(idec, mem->buf_ + mem->start_ - old_start);
return 1;
}
@ -425,9 +451,8 @@ static VP8StatusCode DecodeRemaining(WebPIDecoder* const idec) {
}
return VP8_STATUS_SUSPENDED;
}
// Reconstruct and emit samples.
VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
// Release buffer only if there is only one partition
if (dec->num_parts_ == 1) {
@ -596,12 +621,22 @@ void WebPIDelete(WebPIDecoder* idec) {
WebPIDecoder* WebPINewRGB(WEBP_CSP_MODE mode, uint8_t* output_buffer,
size_t output_buffer_size, int output_stride) {
const int is_external_memory = (output_buffer != NULL);
WebPIDecoder* idec;
if (mode >= MODE_YUV) return NULL;
if (!is_external_memory) { // Overwrite parameters to sane values.
output_buffer_size = 0;
output_stride = 0;
} else { // A buffer was passed. Validate the other params.
if (output_stride == 0 || output_buffer_size == 0) {
return NULL; // invalid parameter.
}
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = mode;
idec->output_.is_external_memory = 1;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.RGBA.rgba = output_buffer;
idec->output_.u.RGBA.stride = output_stride;
idec->output_.u.RGBA.size = output_buffer_size;
@ -612,10 +647,30 @@ WebPIDecoder* WebPINewYUVA(uint8_t* luma, size_t luma_size, int luma_stride,
uint8_t* u, size_t u_size, int u_stride,
uint8_t* v, size_t v_size, int v_stride,
uint8_t* a, size_t a_size, int a_stride) {
WebPIDecoder* const idec = WebPINewDecoder(NULL);
const int is_external_memory = (luma != NULL);
WebPIDecoder* idec;
WEBP_CSP_MODE colorspace;
if (!is_external_memory) { // Overwrite parameters to sane values.
luma_size = u_size = v_size = a_size = 0;
luma_stride = u_stride = v_stride = a_stride = 0;
u = v = a = NULL;
colorspace = MODE_YUVA;
} else { // A luma buffer was passed. Validate the other parameters.
if (u == NULL || v == NULL) return NULL;
if (luma_size == 0 || u_size == 0 || v_size == 0) return NULL;
if (luma_stride == 0 || u_stride == 0 || v_stride == 0) return NULL;
if (a != NULL) {
if (a_size == 0 || a_stride == 0) return NULL;
}
colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
}
idec = WebPINewDecoder(NULL);
if (idec == NULL) return NULL;
idec->output_.colorspace = (a == NULL) ? MODE_YUV : MODE_YUVA;
idec->output_.is_external_memory = 1;
idec->output_.colorspace = colorspace;
idec->output_.is_external_memory = is_external_memory;
idec->output_.u.YUVA.y = luma;
idec->output_.u.YUVA.y_stride = luma_stride;
idec->output_.u.YUVA.y_size = luma_size;

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// functions for sample output.

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Enhancement layer (for YUV444/422)

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Quantizer initialization

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Coding trees and probas

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// main entry for the decoder
@ -236,20 +238,6 @@ static int ParseFilterHeader(VP8BitReader* br, VP8Decoder* const dec) {
}
}
dec->filter_type_ = (hdr->level_ == 0) ? 0 : hdr->simple_ ? 1 : 2;
if (dec->filter_type_ > 0) { // precompute filter levels per segment
if (dec->segment_hdr_.use_segment_) {
int s;
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
int strength = dec->segment_hdr_.filter_strength_[s];
if (!dec->segment_hdr_.absolute_delta_) {
strength += hdr->level_;
}
dec->filter_levels_[s] = strength;
}
} else {
dec->filter_levels_[0] = hdr->level_;
}
}
return !br->eof_;
}
@ -458,7 +446,7 @@ int VP8GetHeaders(VP8Decoder* const dec, VP8Io* const io) {
//------------------------------------------------------------------------------
// Residual decoding (Paragraph 13.2 / 13.3)
static const uint8_t kBands[16 + 1] = {
static const int kBands[16 + 1] = {
0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7,
0 // extra entry as sentinel
};
@ -474,6 +462,39 @@ static const uint8_t kZigzag[16] = {
};
typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; // for const-casting
typedef const uint8_t (*ProbaCtxArray)[NUM_PROBAS];
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
int v;
if (!VP8GetBit(br, p[3])) {
if (!VP8GetBit(br, p[4])) {
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
return v;
}
// Returns the position of the last non-zero coeff plus one
// (and 0 if there's no coeff at all)
@ -484,54 +505,26 @@ static int GetCoeffs(VP8BitReader* const br, ProbaArray prob,
if (!VP8GetBit(br, p[0])) { // first EOB is more a 'CBP' bit.
return 0;
}
while (1) {
++n;
for (; n < 16; ++n) {
const ProbaCtxArray p_ctx = prob[kBands[n + 1]];
if (!VP8GetBit(br, p[1])) {
p = prob[kBands[n]][0];
p = p_ctx[0];
} else { // non zero coeff
int v, j;
int v;
if (!VP8GetBit(br, p[2])) {
p = prob[kBands[n]][1];
v = 1;
p = p_ctx[1];
} else {
if (!VP8GetBit(br, p[3])) {
if (!VP8GetBit(br, p[4])) {
v = 2;
} else {
v = 3 + VP8GetBit(br, p[5]);
}
} else {
if (!VP8GetBit(br, p[6])) {
if (!VP8GetBit(br, p[7])) {
v = 5 + VP8GetBit(br, 159);
} else {
v = 7 + 2 * VP8GetBit(br, 165);
v += VP8GetBit(br, 145);
}
} else {
const uint8_t* tab;
const int bit1 = VP8GetBit(br, p[8]);
const int bit0 = VP8GetBit(br, p[9 + bit1]);
const int cat = 2 * bit1 + bit0;
v = 0;
for (tab = kCat3456[cat]; *tab; ++tab) {
v += v + VP8GetBit(br, *tab);
}
v += 3 + (8 << cat);
}
}
p = prob[kBands[n]][2];
v = GetLargeValue(br, p);
p = p_ctx[2];
}
j = kZigzag[n - 1];
out[j] = VP8GetSigned(br, v) * dq[j > 0];
if (n == 16 || !VP8GetBit(br, p[0])) { // EOB
return n;
out[kZigzag[n]] = VP8GetSigned(br, v) * dq[n > 0];
if (n < 15 && !VP8GetBit(br, p[0])) { // EOB
return n + 1;
}
}
if (n == 16) {
return 16;
}
}
return 16;
}
// Alias-safe way of converting 4bytes to 32bits.
@ -670,6 +663,12 @@ int VP8DecodeMB(VP8Decoder* const dec, VP8BitReader* const token_br) {
dec->non_zero_ac_ = 0;
}
if (dec->filter_type_ > 0) { // store filter info
VP8FInfo* const finfo = dec->f_info_ + dec->mb_x_;
*finfo = dec->fstrengths_[dec->segment_][dec->is_i4x4_];
finfo->f_inner_ = (!info->skip_ || dec->is_i4x4_);
}
return (!token_br->eof_);
}
@ -693,10 +692,8 @@ static int ParseFrame(VP8Decoder* const dec, VP8Io* io) {
return VP8SetError(dec, VP8_STATUS_NOT_ENOUGH_DATA,
"Premature end-of-file encountered.");
}
// Reconstruct and emit samples.
VP8ReconstructBlock(dec);
// Store data and save block's filtering params
VP8StoreBlock(dec);
}
if (!VP8ProcessRow(dec, io)) {
return VP8SetError(dec, VP8_STATUS_USER_ABORT, "Output aborted.");

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// VP8 decoder: internal header.
@ -27,7 +29,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 0
#define DEC_MIN_VERSION 2
#define DEC_MIN_VERSION 3
#define DEC_REV_VERSION 1
#define ONLY_KEYFRAME_CODE // to remove any code related to P-Frames
@ -157,7 +159,7 @@ typedef struct { // filter specs
} VP8FInfo;
typedef struct { // used for syntax-parsing
unsigned int nz_; // non-zero AC/DC coeffs
unsigned int nz_:24; // non-zero AC/DC coeffs (24bit)
unsigned int dc_nz_:1; // non-zero DC coeffs
unsigned int skip_:1; // block type
} VP8MB;
@ -269,13 +271,14 @@ struct VP8Decoder {
uint32_t non_zero_ac_;
// Filtering side-info
int filter_type_; // 0=off, 1=simple, 2=complex
int filter_row_; // per-row flag
uint8_t filter_levels_[NUM_MB_SEGMENTS]; // precalculated per-segment
int filter_type_; // 0=off, 1=simple, 2=complex
int filter_row_; // per-row flag
VP8FInfo fstrengths_[NUM_MB_SEGMENTS][2]; // precalculated per-segment/type
// extensions
const uint8_t* alpha_data_; // compressed alpha data (if present)
size_t alpha_data_size_;
int is_alpha_decoded_; // true if alpha_data_ is decoded in alpha_plane_
uint8_t* alpha_plane_; // output. Persistent, contains the whole data.
int layer_colorspace_;
@ -312,8 +315,6 @@ VP8StatusCode VP8EnterCritical(VP8Decoder* const dec, VP8Io* const io);
int VP8ExitCritical(VP8Decoder* const dec, VP8Io* const io);
// Process the last decoded row (filtering + output)
int VP8ProcessRow(VP8Decoder* const dec, VP8Io* const io);
// Store a block, along with filtering params
void VP8StoreBlock(VP8Decoder* const dec);
// To be called at the start of a new scanline, to initialize predictors.
void VP8InitScanline(VP8Decoder* const dec);
// Decode one macroblock. Returns false if there is not enough data.

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// main entry for the decoder
@ -58,18 +60,18 @@ static const uint8_t kCodeLengthCodeOrder[NUM_CODE_LENGTH_CODES] = {
#define CODE_TO_PLANE_CODES 120
static const uint8_t code_to_plane_lut[CODE_TO_PLANE_CODES] = {
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
0x18, 0x07, 0x17, 0x19, 0x28, 0x06, 0x27, 0x29, 0x16, 0x1a,
0x26, 0x2a, 0x38, 0x05, 0x37, 0x39, 0x15, 0x1b, 0x36, 0x3a,
0x25, 0x2b, 0x48, 0x04, 0x47, 0x49, 0x14, 0x1c, 0x35, 0x3b,
0x46, 0x4a, 0x24, 0x2c, 0x58, 0x45, 0x4b, 0x34, 0x3c, 0x03,
0x57, 0x59, 0x13, 0x1d, 0x56, 0x5a, 0x23, 0x2d, 0x44, 0x4c,
0x55, 0x5b, 0x33, 0x3d, 0x68, 0x02, 0x67, 0x69, 0x12, 0x1e,
0x66, 0x6a, 0x22, 0x2e, 0x54, 0x5c, 0x43, 0x4d, 0x65, 0x6b,
0x32, 0x3e, 0x78, 0x01, 0x77, 0x79, 0x53, 0x5d, 0x11, 0x1f,
0x64, 0x6c, 0x42, 0x4e, 0x76, 0x7a, 0x21, 0x2f, 0x75, 0x7b,
0x31, 0x3f, 0x63, 0x6d, 0x52, 0x5e, 0x00, 0x74, 0x7c, 0x41,
0x4f, 0x10, 0x20, 0x62, 0x6e, 0x30, 0x73, 0x7d, 0x51, 0x5f,
0x40, 0x72, 0x7e, 0x61, 0x6f, 0x50, 0x71, 0x7f, 0x60, 0x70
};
static int DecodeImageStream(int xsize, int ysize,
@ -149,29 +151,20 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) {
//------------------------------------------------------------------------------
// Decodes the next Huffman code from bit-stream.
// FillBitWindow(br) needs to be called at minimum every second call
// to ReadSymbolUnsafe.
static int ReadSymbolUnsafe(const HuffmanTree* tree, VP8LBitReader* const br) {
const HuffmanTreeNode* node = tree->root_;
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBitUnsafe(br));
}
return node->symbol_;
}
// to ReadSymbol, in order to pre-fetch enough bits.
static WEBP_INLINE int ReadSymbol(const HuffmanTree* tree,
VP8LBitReader* const br) {
const int read_safe = (br->pos_ + 8 > br->len_);
if (!read_safe) {
return ReadSymbolUnsafe(tree, br);
} else {
const HuffmanTreeNode* node = tree->root_;
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, VP8LReadOneBit(br));
}
return node->symbol_;
const HuffmanTreeNode* node = tree->root_;
int num_bits = 0;
uint32_t bits = VP8LPrefetchBits(br);
assert(node != NULL);
while (!HuffmanTreeNodeIsLeaf(node)) {
node = HuffmanTreeNextNode(node, bits & 1);
bits >>= 1;
++num_bits;
}
VP8LDiscardBits(br, num_bits);
return node->symbol_;
}
static int ReadHuffmanCodeLengths(
@ -327,10 +320,10 @@ static int ReadHuffmanCodes(VP8LDecoder* const dec, int xsize, int ysize,
hdr->huffman_subsample_bits_ = huffman_precision;
for (i = 0; i < huffman_pixs; ++i) {
// The huffman data is stored in red and green bytes.
const int index = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = index;
if (index >= num_htree_groups) {
num_htree_groups = index + 1;
const int group = (huffman_image[i] >> 8) & 0xffff;
huffman_image[i] = group;
if (group >= num_htree_groups) {
num_htree_groups = group + 1;
}
}
}
@ -634,10 +627,24 @@ static void ApplyInverseTransforms(VP8LDecoder* const dec, int num_rows,
}
}
// Special method for paletted alpha data.
static void ApplyInverseTransformsAlpha(VP8LDecoder* const dec, int num_rows,
const uint8_t* const rows) {
const int start_row = dec->last_row_;
const int end_row = start_row + num_rows;
const uint8_t* rows_in = rows;
uint8_t* rows_out = (uint8_t*)dec->io_->opaque + dec->io_->width * start_row;
VP8LTransform* const transform = &dec->transforms_[0];
assert(dec->next_transform_ == 1);
assert(transform->type_ == COLOR_INDEXING_TRANSFORM);
VP8LColorIndexInverseTransformAlpha(transform, start_row, end_row, rows_in,
rows_out);
}
// Processes (transforms, scales & color-converts) the rows decoded after the
// last call.
static void ProcessRows(VP8LDecoder* const dec, int row) {
const uint32_t* const rows = dec->argb_ + dec->width_ * dec->last_row_;
const uint32_t* const rows = dec->pixels_ + dec->width_ * dec->last_row_;
const int num_rows = row - dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done.
@ -676,121 +683,135 @@ static void ProcessRows(VP8LDecoder* const dec, int row) {
assert(dec->last_row_ <= dec->height_);
}
static int DecodeImageData(VP8LDecoder* const dec,
uint32_t* const data, int width, int height,
ProcessRowsFunc process_func) {
int ok = 1;
int col = 0, row = 0;
VP8LBitReader* const br = &dec->br_;
VP8LMetadata* const hdr = &dec->hdr_;
HTreeGroup* htree_group = hdr->htree_groups_;
uint32_t* src = data;
uint32_t* last_cached = data;
uint32_t* const src_end = data + width * height;
const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES;
const int color_cache_limit = len_code_limit + hdr->color_cache_size_;
VP8LColorCache* const color_cache =
(hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL;
const int mask = hdr->huffman_mask_;
assert(htree_group != NULL);
while (!br->eos_ && src < src_end) {
int code;
// Only update when changing tile. Note we could use the following test:
// if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed
// but that's actually slower and requires storing the previous col/row
if ((col & mask) == 0) {
htree_group = GetHtreeGroupForPos(hdr, col, row);
}
VP8LFillBitWindow(br);
code = ReadSymbol(&htree_group->htrees_[GREEN], br);
if (code < NUM_LITERAL_CODES) { // Literal.
int red, green, blue, alpha;
red = ReadSymbol(&htree_group->htrees_[RED], br);
green = code;
VP8LFillBitWindow(br);
blue = ReadSymbol(&htree_group->htrees_[BLUE], br);
alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br);
*src = (alpha << 24) + (red << 16) + (green << 8) + blue;
AdvanceByOne:
++src;
++col;
if (col >= width) {
col = 0;
++row;
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
process_func(dec, row);
}
if (color_cache != NULL) {
while (last_cached < src) {
VP8LColorCacheInsert(color_cache, *last_cached++);
}
}
}
} else if (code < len_code_limit) { // Backward reference
int dist_code, dist;
const int length_sym = code - NUM_LITERAL_CODES;
const int length = GetCopyLength(length_sym, br);
const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br);
VP8LFillBitWindow(br);
dist_code = GetCopyDistance(dist_symbol, br);
dist = PlaneCodeToDistance(width, dist_code);
if (src - data < dist || src_end - src < length) {
ok = 0;
goto End;
}
{
int i;
for (i = 0; i < length; ++i) src[i] = src[i - dist];
src += length;
}
col += length;
while (col >= width) {
col -= width;
++row;
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) {
process_func(dec, row);
}
}
if (src < src_end) {
htree_group = GetHtreeGroupForPos(hdr, col, row);
if (color_cache != NULL) {
while (last_cached < src) {
VP8LColorCacheInsert(color_cache, *last_cached++);
}
}
}
} else if (code < color_cache_limit) { // Color cache.
const int key = code - len_code_limit;
assert(color_cache != NULL);
while (last_cached < src) {
VP8LColorCacheInsert(color_cache, *last_cached++);
}
*src = VP8LColorCacheLookup(color_cache, key);
goto AdvanceByOne;
} else { // Not reached.
ok = 0;
goto End;
}
ok = !br->error_;
if (!ok) goto End;
}
// Process the remaining rows corresponding to last row-block.
if (process_func != NULL) process_func(dec, row);
End:
if (br->error_ || !ok || (br->eos_ && src < src_end)) {
ok = 0;
dec->status_ = (!br->eos_) ?
VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED;
} else if (src == src_end) {
dec->state_ = READ_DATA;
}
return ok;
#define DECODE_DATA_FUNC(FUNC_NAME, TYPE, STORE_PIXEL) \
static int FUNC_NAME(VP8LDecoder* const dec, TYPE* const data, int width, \
int height, ProcessRowsFunc process_func) { \
int ok = 1; \
int col = 0, row = 0; \
VP8LBitReader* const br = &dec->br_; \
VP8LMetadata* const hdr = &dec->hdr_; \
HTreeGroup* htree_group = hdr->htree_groups_; \
TYPE* src = data; \
TYPE* last_cached = data; \
TYPE* const src_end = data + width * height; \
const int len_code_limit = NUM_LITERAL_CODES + NUM_LENGTH_CODES; \
const int color_cache_limit = len_code_limit + hdr->color_cache_size_; \
VP8LColorCache* const color_cache = \
(hdr->color_cache_size_ > 0) ? &hdr->color_cache_ : NULL; \
const int mask = hdr->huffman_mask_; \
assert(htree_group != NULL); \
while (!br->eos_ && src < src_end) { \
int code; \
/* Only update when changing tile. Note we could use this test: */ \
/* if "((((prev_col ^ col) | prev_row ^ row)) > mask)" -> tile changed */ \
/* but that's actually slower and needs storing the previous col/row. */ \
if ((col & mask) == 0) { \
htree_group = GetHtreeGroupForPos(hdr, col, row); \
} \
VP8LFillBitWindow(br); \
code = ReadSymbol(&htree_group->htrees_[GREEN], br); \
if (code < NUM_LITERAL_CODES) { /* Literal*/ \
int red, green, blue, alpha; \
red = ReadSymbol(&htree_group->htrees_[RED], br); \
green = code; \
VP8LFillBitWindow(br); \
blue = ReadSymbol(&htree_group->htrees_[BLUE], br); \
alpha = ReadSymbol(&htree_group->htrees_[ALPHA], br); \
*src = STORE_PIXEL(alpha, red, green, blue); \
AdvanceByOne: \
++src; \
++col; \
if (col >= width) { \
col = 0; \
++row; \
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { \
process_func(dec, row); \
} \
if (color_cache != NULL) { \
while (last_cached < src) { \
VP8LColorCacheInsert(color_cache, *last_cached++); \
} \
} \
} \
} else if (code < len_code_limit) { /* Backward reference */ \
int dist_code, dist; \
const int length_sym = code - NUM_LITERAL_CODES; \
const int length = GetCopyLength(length_sym, br); \
const int dist_symbol = ReadSymbol(&htree_group->htrees_[DIST], br); \
VP8LFillBitWindow(br); \
dist_code = GetCopyDistance(dist_symbol, br); \
dist = PlaneCodeToDistance(width, dist_code); \
if (src - data < dist || src_end - src < length) { \
ok = 0; \
goto End; \
} \
{ \
int i; \
for (i = 0; i < length; ++i) src[i] = src[i - dist]; \
src += length; \
} \
col += length; \
while (col >= width) { \
col -= width; \
++row; \
if ((process_func != NULL) && (row % NUM_ARGB_CACHE_ROWS == 0)) { \
process_func(dec, row); \
} \
} \
if (src < src_end) { \
htree_group = GetHtreeGroupForPos(hdr, col, row); \
if (color_cache != NULL) { \
while (last_cached < src) { \
VP8LColorCacheInsert(color_cache, *last_cached++); \
} \
} \
} \
} else if (code < color_cache_limit) { /* Color cache */ \
const int key = code - len_code_limit; \
assert(color_cache != NULL); \
while (last_cached < src) { \
VP8LColorCacheInsert(color_cache, *last_cached++); \
} \
*src = VP8LColorCacheLookup(color_cache, key); \
goto AdvanceByOne; \
} else { /* Not reached */ \
ok = 0; \
goto End; \
} \
ok = !br->error_; \
if (!ok) goto End; \
} \
/* Process the remaining rows corresponding to last row-block. */ \
if (process_func != NULL) process_func(dec, row); \
End: \
if (br->error_ || !ok || (br->eos_ && src < src_end)) { \
ok = 0; \
dec->status_ = \
(!br->eos_) ? VP8_STATUS_BITSTREAM_ERROR : VP8_STATUS_SUSPENDED; \
} else if (src == src_end) { \
dec->state_ = READ_DATA; \
} \
return ok; \
}
static WEBP_INLINE uint32_t GetARGBPixel(int alpha, int red, int green,
int blue) {
return (alpha << 24) | (red << 16) | (green << 8) | blue;
}
static WEBP_INLINE uint8_t GetAlphaPixel(int alpha, int red, int green,
int blue) {
(void)alpha;
(void)red;
(void)blue;
return green; // Alpha value is stored in green channel.
}
DECODE_DATA_FUNC(DecodeImageData, uint32_t, GetARGBPixel)
DECODE_DATA_FUNC(DecodeAlphaData, uint8_t, GetAlphaPixel)
#undef DECODE_DATA_FUNC
// -----------------------------------------------------------------------------
// VP8LTransform
@ -912,8 +933,8 @@ void VP8LClear(VP8LDecoder* const dec) {
if (dec == NULL) return;
ClearMetadata(&dec->hdr_);
free(dec->argb_);
dec->argb_ = NULL;
free(dec->pixels_);
dec->pixels_ = NULL;
for (i = 0; i < dec->next_transform_; ++i) {
ClearTransform(&dec->transforms_[i]);
}
@ -1037,35 +1058,39 @@ static int DecodeImageStream(int xsize, int ysize,
}
//------------------------------------------------------------------------------
// Allocate dec->argb_ and dec->argb_cache_ using dec->width_ and dec->height_
static int AllocateARGBBuffers(VP8LDecoder* const dec, int final_width) {
// Allocate internal buffers dec->pixels_ and dec->argb_cache_.
static int AllocateInternalBuffers(VP8LDecoder* const dec, int final_width,
size_t bytes_per_pixel) {
const int argb_cache_needed = (bytes_per_pixel == sizeof(uint32_t));
const uint64_t num_pixels = (uint64_t)dec->width_ * dec->height_;
// Scratch buffer corresponding to top-prediction row for transforming the
// first row in the row-blocks.
const uint64_t cache_top_pixels = final_width;
// Scratch buffer for temporary BGRA storage.
const uint64_t cache_pixels = (uint64_t)final_width * NUM_ARGB_CACHE_ROWS;
// first row in the row-blocks. Not needed for paletted alpha.
const uint64_t cache_top_pixels =
argb_cache_needed ? (uint16_t)final_width : 0ULL;
// Scratch buffer for temporary BGRA storage. Not needed for paletted alpha.
const uint64_t cache_pixels =
argb_cache_needed ? (uint64_t)final_width * NUM_ARGB_CACHE_ROWS : 0ULL;
const uint64_t total_num_pixels =
num_pixels + cache_top_pixels + cache_pixels;
assert(dec->width_ <= final_width);
dec->argb_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, sizeof(*dec->argb_));
if (dec->argb_ == NULL) {
dec->pixels_ = (uint32_t*)WebPSafeMalloc(total_num_pixels, bytes_per_pixel);
if (dec->pixels_ == NULL) {
dec->argb_cache_ = NULL; // for sanity check
dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
return 0;
}
dec->argb_cache_ = dec->argb_ + num_pixels + cache_top_pixels;
dec->argb_cache_ =
argb_cache_needed ? dec->pixels_ + num_pixels + cache_top_pixels : NULL;
return 1;
}
//------------------------------------------------------------------------------
// Special row-processing that only stores the alpha data.
// Special row-processing that only stores the alpha data.
static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_;
const uint32_t* const in = dec->argb_ + dec->width_ * dec->last_row_;
const uint32_t* const in = dec->pixels_ + dec->width_ * dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransforms(dec, num_rows, in);
@ -1079,7 +1104,17 @@ static void ExtractAlphaRows(VP8LDecoder* const dec, int row) {
int i;
for (i = 0; i < cache_pixs; ++i) dst[i] = (src[i] >> 8) & 0xff;
}
dec->last_row_ = dec->last_out_row_ = row;
}
// Row-processing for the special case when alpha data contains only one
// transform: color indexing.
static void ExtractPalettedAlphaRows(VP8LDecoder* const dec, int row) {
const int num_rows = row - dec->last_row_;
const uint8_t* const in =
(uint8_t*)dec->pixels_ + dec->width_ * dec->last_row_;
if (num_rows <= 0) return; // Nothing to be done.
ApplyInverseTransformsAlpha(dec, num_rows, in);
dec->last_row_ = dec->last_out_row_ = row;
}
@ -1088,6 +1123,7 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
VP8Io io;
int ok = 0;
VP8LDecoder* const dec = VP8LNew();
size_t bytes_per_pixel = sizeof(uint32_t); // Default: BGRA mode.
if (dec == NULL) return 0;
dec->width_ = width;
@ -1106,13 +1142,25 @@ int VP8LDecodeAlphaImageStream(int width, int height, const uint8_t* const data,
dec->action_ = READ_HDR;
if (!DecodeImageStream(width, height, 1, dec, NULL)) goto Err;
// Allocate output (note that dec->width_ may have changed here).
if (!AllocateARGBBuffers(dec, width)) goto Err;
// Special case: if alpha data uses only the color indexing transform and
// doesn't use color cache (a frequent case), we will use DecodeAlphaData()
// method that only needs allocation of 1 byte per pixel (alpha channel).
if (dec->next_transform_ == 1 &&
dec->transforms_[0].type_ == COLOR_INDEXING_TRANSFORM &&
dec->hdr_.color_cache_size_ == 0) {
bytes_per_pixel = sizeof(uint8_t);
}
// Allocate internal buffers (note that dec->width_ may have changed here).
if (!AllocateInternalBuffers(dec, width, bytes_per_pixel)) goto Err;
// Decode (with special row processing).
dec->action_ = READ_DATA;
ok = DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
ExtractAlphaRows);
ok = (bytes_per_pixel == sizeof(uint8_t)) ?
DecodeAlphaData(dec, (uint8_t*)dec->pixels_, dec->width_, dec->height_,
ExtractPalettedAlphaRows) :
DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
ExtractAlphaRows);
Err:
VP8LDelete(dec);
@ -1146,12 +1194,13 @@ int VP8LDecodeHeader(VP8LDecoder* const dec, VP8Io* const io) {
return 1;
Error:
VP8LClear(dec);
assert(dec->status_ != VP8_STATUS_OK);
return 0;
VP8LClear(dec);
assert(dec->status_ != VP8_STATUS_OK);
return 0;
}
int VP8LDecodeImage(VP8LDecoder* const dec) {
const size_t bytes_per_pixel = sizeof(uint32_t);
VP8Io* io = NULL;
WebPDecParams* params = NULL;
@ -1171,13 +1220,13 @@ int VP8LDecodeImage(VP8LDecoder* const dec) {
goto Err;
}
if (!AllocateARGBBuffers(dec, io->width)) goto Err;
if (!AllocateInternalBuffers(dec, io->width, bytes_per_pixel)) goto Err;
if (io->use_scaling && !AllocateAndInitRescaler(dec, io)) goto Err;
// Decode.
dec->action_ = READ_DATA;
if (!DecodeImageData(dec, dec->argb_, dec->width_, dec->height_,
if (!DecodeImageData(dec, dec->pixels_, dec->width_, dec->height_,
ProcessRows)) {
goto Err;
}

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Lossless decoder: internal header.
@ -63,7 +65,8 @@ typedef struct {
const WebPDecBuffer *output_; // shortcut to io->opaque->output
uint32_t *argb_; // Internal data: always in BGRA color mode.
uint32_t *pixels_; // Internal data: either uint8_t* for alpha
// or uint32_t* for BGRA.
uint32_t *argb_cache_; // Scratch buffer for temporary BGRA storage.
VP8LBitReader br_;

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Main decoding functions for WEBP images.
@ -14,7 +16,7 @@
#include "./vp8i.h"
#include "./vp8li.h"
#include "./webpi.h"
#include "../webp/format_constants.h"
#include "../webp/mux_types.h" // ALPHA_FLAG
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -40,8 +42,8 @@ extern "C" {
// 20..23 VP8X flags bit-map corresponding to the chunk-types present.
// 24..26 Width of the Canvas Image.
// 27..29 Height of the Canvas Image.
// There can be extra chunks after the "VP8X" chunk (ICCP, TILE, FRM, VP8,
// META ...)
// There can be extra chunks after the "VP8X" chunk (ICCP, FRGM, ANMF, VP8,
// VP8L, XMP, EXIF ...)
// All sizes are in little-endian order.
// Note: chunk data size must be padded to multiple of 2 when written.
@ -192,6 +194,15 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
return VP8_STATUS_BITSTREAM_ERROR; // Not a valid chunk size.
}
// Start of a (possibly incomplete) VP8/VP8L chunk implies that we have
// parsed all the optional chunks.
// Note: This check must occur before the check 'buf_size < disk_chunk_size'
// below to allow incomplete VP8/VP8L chunks.
if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
!memcmp(buf, "VP8L", TAG_SIZE)) {
return VP8_STATUS_OK;
}
if (buf_size < disk_chunk_size) { // Insufficient data.
return VP8_STATUS_NOT_ENOUGH_DATA;
}
@ -199,9 +210,6 @@ static VP8StatusCode ParseOptionalChunks(const uint8_t** const data,
if (!memcmp(buf, "ALPH", TAG_SIZE)) { // A valid ALPH header.
*alpha_data = buf + CHUNK_HEADER_SIZE;
*alpha_size = chunk_size;
} else if (!memcmp(buf, "VP8 ", TAG_SIZE) ||
!memcmp(buf, "VP8L", TAG_SIZE)) { // A valid VP8/VP8L header.
return VP8_STATUS_OK; // Found.
}
// We have a full and valid chunk; skip it.
@ -276,6 +284,7 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
int* const width,
int* const height,
int* const has_alpha,
int* const has_animation,
WebPHeaderStructure* const headers) {
int found_riff = 0;
int found_vp8x = 0;
@ -308,7 +317,8 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
// necessary to send VP8X chunk to the decoder.
return VP8_STATUS_BITSTREAM_ERROR;
}
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG_BIT);
if (has_alpha != NULL) *has_alpha = !!(flags & ALPHA_FLAG);
if (has_animation != NULL) *has_animation = !!(flags & ANIMATION_FLAG);
if (found_vp8x && headers == NULL) {
return VP8_STATUS_OK; // Return features from VP8X header.
}
@ -370,10 +380,19 @@ static VP8StatusCode ParseHeadersInternal(const uint8_t* data,
}
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers) {
VP8StatusCode status;
int has_animation = 0;
assert(headers != NULL);
// fill out headers, ignore width/height/has_alpha.
return ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, headers);
status = ParseHeadersInternal(headers->data, headers->data_size,
NULL, NULL, NULL, &has_animation, headers);
if (status == VP8_STATUS_OK || status == VP8_STATUS_NOT_ENOUGH_DATA) {
// TODO(jzern): full support of animation frames will require API additions.
if (has_animation) {
status = VP8_STATUS_UNSUPPORTED_FEATURE;
}
}
return status;
}
//------------------------------------------------------------------------------
@ -625,10 +644,11 @@ static VP8StatusCode GetFeatures(const uint8_t* const data, size_t data_size,
}
DefaultFeatures(features);
// Only parse enough of the data to retrieve width/height/has_alpha.
// Only parse enough of the data to retrieve the features.
return ParseHeadersInternal(data, data_size,
&features->width, &features->height,
&features->has_alpha, NULL);
&features->has_alpha, &features->has_animation,
NULL);
}
//------------------------------------------------------------------------------
@ -672,19 +692,13 @@ int WebPInitDecoderConfigInternal(WebPDecoderConfig* config,
VP8StatusCode WebPGetFeaturesInternal(const uint8_t* data, size_t data_size,
WebPBitstreamFeatures* features,
int version) {
VP8StatusCode status;
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DECODER_ABI_VERSION)) {
return VP8_STATUS_INVALID_PARAM; // version mismatch
}
if (features == NULL) {
return VP8_STATUS_INVALID_PARAM;
}
status = GetFeatures(data, data_size, features);
if (status == VP8_STATUS_NOT_ENOUGH_DATA) {
return VP8_STATUS_BITSTREAM_ERROR; // Not-enough-data treated as error.
}
return status;
return GetFeatures(data, data_size, features);
}
VP8StatusCode WebPDecode(const uint8_t* data, size_t data_size,

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Internal header: WebP decoding parameters and custom IO on buffer
@ -61,10 +63,10 @@ typedef struct {
} WebPHeaderStructure;
// Skips over all valid chunks prior to the first VP8/VP8L frame header.
// Returns VP8_STATUS_OK on success,
// VP8_STATUS_BITSTREAM_ERROR if an invalid header/chunk is found, and
// VP8_STATUS_NOT_ENOUGH_DATA if case of insufficient data.
// In 'headers', compressed_size, offset, alpha_data, alpha_size and lossless
// Returns: VP8_STATUS_OK, VP8_STATUS_BITSTREAM_ERROR (invalid header/chunk),
// VP8_STATUS_NOT_ENOUGH_DATA (partial input) or VP8_STATUS_UNSUPPORTED_FEATURE
// in the case of non-decodable features (animation for instance).
// In 'headers', compressed_size, offset, alpha_data, alpha_size, and lossless
// fields are updated appropriately upon success.
VP8StatusCode WebPParseHeaders(WebPHeaderStructure* const headers);

View File

@ -1,26 +1,35 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// WebP container demux.
//
#include "../webp/mux.h"
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "../webp/decode.h" // WebPGetInfo
#include "../utils/utils.h"
#include "../webp/decode.h" // WebPGetFeatures
#include "../webp/demux.h"
#include "../webp/format_constants.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
#define DMUX_MAJ_VERSION 0
#define DMUX_MIN_VERSION 1
#define DMUX_REV_VERSION 1
typedef struct {
size_t start_; // start location of the data
@ -39,8 +48,9 @@ typedef struct Frame {
int x_offset_, y_offset_;
int width_, height_;
int duration_;
int is_tile_; // this is an image fragment from a 'TILE'.
int frame_num_; // the referent frame number for use in assembling tiles.
WebPMuxAnimDispose dispose_method_;
int is_fragment_; // this is a frame fragment (and not a full frame).
int frame_num_; // the referent frame number for use in assembling fragments.
int complete_; // img_components_ contains a full image.
ChunkData img_components_[2]; // 0=VP8{,L} 1=ALPH
struct Frame* next_;
@ -58,8 +68,10 @@ struct WebPDemuxer {
uint32_t feature_flags_;
int canvas_width_, canvas_height_;
int loop_count_;
uint32_t bgcolor_;
int num_frames_;
Frame* frames_;
Frame** frames_tail_;
Chunk* chunks_; // non-image chunks
};
@ -87,6 +99,12 @@ static const ChunkParser kMasterChunks[] = {
{ { '0', '0', '0', '0' }, NULL, NULL },
};
//------------------------------------------------------------------------------
int WebPGetDemuxVersion(void) {
return (DMUX_MAJ_VERSION << 16) | (DMUX_MIN_VERSION << 8) | DMUX_REV_VERSION;
}
// -----------------------------------------------------------------------------
// MemBuffer
@ -127,43 +145,30 @@ static WEBP_INLINE const uint8_t* GetBuffer(MemBuffer* const mem) {
return mem->buf_ + mem->start_;
}
static WEBP_INLINE uint8_t GetByte(MemBuffer* const mem) {
// Read from 'mem' and skip the read bytes.
static WEBP_INLINE uint8_t ReadByte(MemBuffer* const mem) {
const uint8_t byte = mem->buf_[mem->start_];
Skip(mem, 1);
return byte;
}
// Read 16, 24 or 32 bits stored in little-endian order.
static WEBP_INLINE int ReadLE16s(const uint8_t* const data) {
return (int)(data[0] << 0) | (data[1] << 8);
}
static WEBP_INLINE int ReadLE24s(const uint8_t* const data) {
return ReadLE16s(data) | (data[2] << 16);
}
static WEBP_INLINE uint32_t ReadLE32(const uint8_t* const data) {
return (uint32_t)ReadLE24s(data) | (data[3] << 24);
}
// In addition to reading, skip the read bytes.
static WEBP_INLINE int GetLE16s(MemBuffer* const mem) {
static WEBP_INLINE int ReadLE16s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const int val = ReadLE16s(data);
const int val = GetLE16(data);
Skip(mem, 2);
return val;
}
static WEBP_INLINE int GetLE24s(MemBuffer* const mem) {
static WEBP_INLINE int ReadLE24s(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const int val = ReadLE24s(data);
const int val = GetLE24(data);
Skip(mem, 3);
return val;
}
static WEBP_INLINE uint32_t GetLE32(MemBuffer* const mem) {
static WEBP_INLINE uint32_t ReadLE32(MemBuffer* const mem) {
const uint8_t* const data = mem->buf_ + mem->start_;
const uint32_t val = ReadLE32(data);
const uint32_t val = GetLE32(data);
Skip(mem, 4);
return val;
}
@ -181,32 +186,34 @@ static void AddChunk(WebPDemuxer* const dmux, Chunk* const chunk) {
// Add a frame to the end of the list, ensuring the last frame is complete.
// Returns true on success, false otherwise.
static int AddFrame(WebPDemuxer* const dmux, Frame* const frame) {
const Frame* last_frame = NULL;
Frame** f = &dmux->frames_;
while (*f != NULL) {
last_frame = *f;
f = &(*f)->next_;
}
const Frame* const last_frame = *dmux->frames_tail_;
if (last_frame != NULL && !last_frame->complete_) return 0;
*f = frame;
*dmux->frames_tail_ = frame;
frame->next_ = NULL;
dmux->frames_tail_ = &frame->next_;
return 1;
}
// Store image bearing chunks to 'frame'.
static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
Frame* const frame) {
// If 'has_vp8l_alpha' is not NULL, it will be set to true if the frame is a
// lossless image with alpha.
static ParseStatus StoreFrame(int frame_num, uint32_t min_size,
MemBuffer* const mem, Frame* const frame,
int* const has_vp8l_alpha) {
int alpha_chunks = 0;
int image_chunks = 0;
int done = (MemDataSize(mem) < CHUNK_HEADER_SIZE);
int done = (MemDataSize(mem) < min_size);
ParseStatus status = PARSE_OK;
if (has_vp8l_alpha != NULL) *has_vp8l_alpha = 0; // Default.
if (done) return PARSE_NEED_MORE_DATA;
do {
const size_t chunk_start_offset = mem->start_;
const uint32_t fourcc = GetLE32(mem);
const uint32_t payload_size = GetLE32(mem);
const uint32_t fourcc = ReadLE32(mem);
const uint32_t payload_size = ReadLE32(mem);
const uint32_t payload_size_padded = payload_size + (payload_size & 1);
const size_t payload_available = (payload_size_padded > MemDataSize(mem))
? MemDataSize(mem) : payload_size_padded;
@ -228,23 +235,30 @@ static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
goto Done;
}
break;
case MKFOURCC('V', 'P', '8', ' '):
case MKFOURCC('V', 'P', '8', 'L'):
if (alpha_chunks > 0) return PARSE_ERROR; // VP8L has its own alpha
// fall through
case MKFOURCC('V', 'P', '8', ' '):
if (image_chunks == 0) {
int width = 0, height = 0;
// Extract the bitstream features, tolerating failures when the data
// is incomplete.
WebPBitstreamFeatures features;
const VP8StatusCode vp8_status =
WebPGetFeatures(mem->buf_ + chunk_start_offset, chunk_size,
&features);
if (status == PARSE_NEED_MORE_DATA &&
vp8_status == VP8_STATUS_NOT_ENOUGH_DATA) {
return PARSE_NEED_MORE_DATA;
} else if (vp8_status != VP8_STATUS_OK) {
// We have enough data, and yet WebPGetFeatures() failed.
return PARSE_ERROR;
}
++image_chunks;
frame->img_components_[0].offset_ = chunk_start_offset;
frame->img_components_[0].size_ = chunk_size;
// Extract the width and height from the bitstream, tolerating
// failures when the data is incomplete.
if (!WebPGetInfo(mem->buf_ + frame->img_components_[0].offset_,
frame->img_components_[0].size_, &width, &height) &&
status != PARSE_NEED_MORE_DATA) {
return PARSE_ERROR;
}
frame->width_ = width;
frame->height_ = height;
frame->width_ = features.width;
frame->height_ = features.height;
if (has_vp8l_alpha != NULL) *has_vp8l_alpha = features.has_alpha;
frame->frame_num_ = frame_num;
frame->complete_ = (status == PARSE_OK);
Skip(mem, payload_available);
@ -275,42 +289,44 @@ static ParseStatus StoreFrame(int frame_num, MemBuffer* const mem,
// Returns PARSE_OK on success with *frame pointing to the new Frame.
// Returns PARSE_NEED_MORE_DATA with insufficient data, PARSE_ERROR otherwise.
static ParseStatus NewFrame(const MemBuffer* const mem,
uint32_t min_size, uint32_t expected_size,
uint32_t actual_size, Frame** frame) {
uint32_t min_size, uint32_t actual_size,
Frame** frame) {
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
if (actual_size < expected_size) return PARSE_ERROR;
if (actual_size < min_size) return PARSE_ERROR;
if (MemDataSize(mem) < min_size) return PARSE_NEED_MORE_DATA;
*frame = (Frame*)calloc(1, sizeof(**frame));
return (*frame == NULL) ? PARSE_ERROR : PARSE_OK;
}
// Parse a 'FRM ' chunk and any image bearing chunks that immediately follow.
// Parse a 'ANMF' chunk and any image bearing chunks that immediately follow.
// 'frame_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseFrame(
static ParseStatus ParseAnimationFrame(
WebPDemuxer* const dmux, uint32_t frame_chunk_size) {
const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const uint32_t min_size = frame_chunk_size + CHUNK_HEADER_SIZE;
const uint32_t anmf_payload_size = frame_chunk_size - ANMF_CHUNK_SIZE;
int added_frame = 0;
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status =
NewFrame(mem, min_size, FRAME_CHUNK_SIZE, frame_chunk_size, &frame);
NewFrame(mem, ANMF_CHUNK_SIZE, frame_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->x_offset_ = 2 * GetLE24s(mem);
frame->y_offset_ = 2 * GetLE24s(mem);
frame->width_ = 1 + GetLE24s(mem);
frame->height_ = 1 + GetLE24s(mem);
frame->duration_ = 1 + GetLE24s(mem);
Skip(mem, frame_chunk_size - FRAME_CHUNK_SIZE); // skip any trailing data.
frame->x_offset_ = 2 * ReadLE24s(mem);
frame->y_offset_ = 2 * ReadLE24s(mem);
frame->width_ = 1 + ReadLE24s(mem);
frame->height_ = 1 + ReadLE24s(mem);
frame->duration_ = ReadLE24s(mem);
frame->dispose_method_ = (WebPMuxAnimDispose)(ReadByte(mem) & 1);
if (frame->width_ * (uint64_t)frame->height_ >= MAX_IMAGE_AREA) {
free(frame);
return PARSE_ERROR;
}
// Store a (potentially partial) frame only if the animation flag is set
// and there is some data in 'frame'.
status = StoreFrame(dmux->num_frames_ + 1, mem, frame);
// Store a frame only if the animation flag is set there is some data for
// this frame is available.
status = StoreFrame(dmux->num_frames_ + 1, anmf_payload_size, mem, frame,
NULL);
if (status != PARSE_ERROR && has_frames && frame->frame_num_ > 0) {
added_frame = AddFrame(dmux, frame);
if (added_frame) {
@ -324,38 +340,43 @@ static ParseStatus ParseFrame(
return status;
}
// Parse a 'TILE' chunk and any image bearing chunks that immediately follow.
// 'tile_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseTile(WebPDemuxer* const dmux,
uint32_t tile_chunk_size) {
const int has_tiles = !!(dmux->feature_flags_ & TILE_FLAG);
const uint32_t min_size = tile_chunk_size + CHUNK_HEADER_SIZE;
int added_tile = 0;
#ifdef WEBP_EXPERIMENTAL_FEATURES
// Parse a 'FRGM' chunk and any image bearing chunks that immediately follow.
// 'fragment_chunk_size' is the previously validated, padded chunk size.
static ParseStatus ParseFragment(WebPDemuxer* const dmux,
uint32_t fragment_chunk_size) {
const int frame_num = 1; // All fragments belong to the 1st (and only) frame.
const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
const uint32_t frgm_payload_size = fragment_chunk_size - FRGM_CHUNK_SIZE;
int added_fragment = 0;
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status =
NewFrame(mem, min_size, TILE_CHUNK_SIZE, tile_chunk_size, &frame);
NewFrame(mem, FRGM_CHUNK_SIZE, fragment_chunk_size, &frame);
if (status != PARSE_OK) return status;
frame->is_tile_ = 1;
frame->x_offset_ = 2 * GetLE24s(mem);
frame->y_offset_ = 2 * GetLE24s(mem);
Skip(mem, tile_chunk_size - TILE_CHUNK_SIZE); // skip any trailing data.
frame->is_fragment_ = 1;
frame->x_offset_ = 2 * ReadLE24s(mem);
frame->y_offset_ = 2 * ReadLE24s(mem);
// Store a (potentially partial) tile only if the tile flag is set
// and the tile contains some data.
status = StoreFrame(dmux->num_frames_, mem, frame);
if (status != PARSE_ERROR && has_tiles && frame->frame_num_ > 0) {
// Note num_frames_ is incremented only when all tiles have been consumed.
added_tile = AddFrame(dmux, frame);
if (!added_tile) status = PARSE_ERROR;
// Store a fragment only if the fragments flag is set there is some data for
// this fragment is available.
status = StoreFrame(frame_num, frgm_payload_size, mem, frame, NULL);
if (status != PARSE_ERROR && has_fragments && frame->frame_num_ > 0) {
added_fragment = AddFrame(dmux, frame);
if (!added_fragment) {
status = PARSE_ERROR;
} else {
dmux->num_frames_ = 1;
}
}
if (!added_tile) free(frame);
if (!added_fragment) free(frame);
return status;
}
#endif // WEBP_EXPERIMENTAL_FEATURES
// General chunk storage starting with the header at 'start_offset' allowing
// General chunk storage, starting with the header at 'start_offset', allowing
// the user to request the payload via a fourcc string. 'size' includes the
// header and the unpadded payload size.
// Returns true on success, false otherwise.
@ -384,7 +405,7 @@ static int ReadHeader(MemBuffer* const mem) {
return 0;
}
riff_size = ReadLE32(GetBuffer(mem) + TAG_SIZE);
riff_size = GetLE32(GetBuffer(mem) + TAG_SIZE);
if (riff_size < CHUNK_HEADER_SIZE) return 0;
if (riff_size > MAX_CHUNK_PAYLOAD) return 0;
@ -403,6 +424,7 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
MemBuffer* const mem = &dmux->mem_;
Frame* frame;
ParseStatus status;
int has_vp8l_alpha = 0; // Frame contains a lossless image with alpha.
if (dmux->frames_ != NULL) return PARSE_ERROR;
if (SizeIsInvalid(mem, min_size)) return PARSE_ERROR;
@ -411,7 +433,10 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
frame = (Frame*)calloc(1, sizeof(*frame));
if (frame == NULL) return PARSE_ERROR;
status = StoreFrame(1, &dmux->mem_, frame);
// For the single image case we allow parsing of a partial frame, but we need
// at least CHUNK_HEADER_SIZE for parsing.
status = StoreFrame(1, CHUNK_HEADER_SIZE, &dmux->mem_, frame,
&has_vp8l_alpha);
if (status != PARSE_ERROR) {
const int has_alpha = !!(dmux->feature_flags_ & ALPHA_FLAG);
// Clear any alpha when the alpha flag is missing.
@ -421,10 +446,12 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
}
// Use the frame width/height as the canvas values for non-vp8x files.
// Also, set ALPHA_FLAG if this is a lossless image with alpha.
if (!dmux->is_ext_format_ && frame->width_ > 0 && frame->height_ > 0) {
dmux->state_ = WEBP_DEMUX_PARSED_HEADER;
dmux->canvas_width_ = frame->width_;
dmux->canvas_height_ = frame->height_;
dmux->feature_flags_ |= has_vp8l_alpha ? ALPHA_FLAG : 0;
}
AddFrame(dmux, frame);
dmux->num_frames_ = 1;
@ -437,7 +464,7 @@ static ParseStatus ParseSingleImage(WebPDemuxer* const dmux) {
static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
MemBuffer* const mem = &dmux->mem_;
int loop_chunks = 0;
int anim_chunks = 0;
uint32_t vp8x_size;
ParseStatus status = PARSE_OK;
@ -445,17 +472,17 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
dmux->is_ext_format_ = 1;
Skip(mem, TAG_SIZE); // VP8X
vp8x_size = GetLE32(mem);
vp8x_size = ReadLE32(mem);
if (vp8x_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
if (vp8x_size < VP8X_CHUNK_SIZE) return PARSE_ERROR;
vp8x_size += vp8x_size & 1;
if (SizeIsInvalid(mem, vp8x_size)) return PARSE_ERROR;
if (MemDataSize(mem) < vp8x_size) return PARSE_NEED_MORE_DATA;
dmux->feature_flags_ = GetByte(mem);
dmux->feature_flags_ = ReadByte(mem);
Skip(mem, 3); // Reserved.
dmux->canvas_width_ = 1 + GetLE24s(mem);
dmux->canvas_height_ = 1 + GetLE24s(mem);
dmux->canvas_width_ = 1 + ReadLE24s(mem);
dmux->canvas_height_ = 1 + ReadLE24s(mem);
if (dmux->canvas_width_ * (uint64_t)dmux->canvas_height_ >= MAX_IMAGE_AREA) {
return PARSE_ERROR; // image final dimension is too large
}
@ -468,8 +495,8 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
do {
int store_chunk = 1;
const size_t chunk_start_offset = mem->start_;
const uint32_t fourcc = GetLE32(mem);
const uint32_t chunk_size = GetLE32(mem);
const uint32_t fourcc = ReadLE32(mem);
const uint32_t chunk_size = ReadLE32(mem);
const uint32_t chunk_size_padded = chunk_size + (chunk_size & 1);
if (chunk_size > MAX_CHUNK_PAYLOAD) return PARSE_ERROR;
@ -482,40 +509,50 @@ static ParseStatus ParseVP8X(WebPDemuxer* const dmux) {
case MKFOURCC('A', 'L', 'P', 'H'):
case MKFOURCC('V', 'P', '8', ' '):
case MKFOURCC('V', 'P', '8', 'L'): {
// check that this isn't an animation (all frames should be in an ANMF).
if (anim_chunks > 0) return PARSE_ERROR;
Rewind(mem, CHUNK_HEADER_SIZE);
status = ParseSingleImage(dmux);
break;
}
case MKFOURCC('L', 'O', 'O', 'P'): {
if (chunk_size_padded < LOOP_CHUNK_SIZE) return PARSE_ERROR;
case MKFOURCC('A', 'N', 'I', 'M'): {
if (chunk_size_padded < ANIM_CHUNK_SIZE) return PARSE_ERROR;
if (MemDataSize(mem) < chunk_size_padded) {
status = PARSE_NEED_MORE_DATA;
} else if (loop_chunks == 0) {
++loop_chunks;
dmux->loop_count_ = GetLE16s(mem);
Skip(mem, chunk_size_padded - LOOP_CHUNK_SIZE);
} else if (anim_chunks == 0) {
++anim_chunks;
dmux->bgcolor_ = ReadLE32(mem);
dmux->loop_count_ = ReadLE16s(mem);
Skip(mem, chunk_size_padded - ANIM_CHUNK_SIZE);
} else {
store_chunk = 0;
goto Skip;
}
break;
}
case MKFOURCC('F', 'R', 'M', ' '): {
status = ParseFrame(dmux, chunk_size_padded);
case MKFOURCC('A', 'N', 'M', 'F'): {
if (anim_chunks == 0) return PARSE_ERROR; // 'ANIM' precedes frames.
status = ParseAnimationFrame(dmux, chunk_size_padded);
break;
}
case MKFOURCC('T', 'I', 'L', 'E'): {
if (dmux->num_frames_ == 0) dmux->num_frames_ = 1;
status = ParseTile(dmux, chunk_size_padded);
#ifdef WEBP_EXPERIMENTAL_FEATURES
case MKFOURCC('F', 'R', 'G', 'M'): {
status = ParseFragment(dmux, chunk_size_padded);
break;
}
#endif
case MKFOURCC('I', 'C', 'C', 'P'): {
store_chunk = !!(dmux->feature_flags_ & ICCP_FLAG);
goto Skip;
}
case MKFOURCC('M', 'E', 'T', 'A'): {
store_chunk = !!(dmux->feature_flags_ & META_FLAG);
case MKFOURCC('X', 'M', 'P', ' '): {
store_chunk = !!(dmux->feature_flags_ & XMP_FLAG);
goto Skip;
}
case MKFOURCC('E', 'X', 'I', 'F'): {
store_chunk = !!(dmux->feature_flags_ & EXIF_FLAG);
goto Skip;
}
Skip:
@ -561,7 +598,7 @@ static int IsValidSimpleFormat(const WebPDemuxer* const dmux) {
}
static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
const int has_tiles = !!(dmux->feature_flags_ & TILE_FLAG);
const int has_fragments = !!(dmux->feature_flags_ & FRAGMENTS_FLAG);
const int has_frames = !!(dmux->feature_flags_ & ANIMATION_FLAG);
const Frame* f;
@ -573,15 +610,15 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
for (f = dmux->frames_; f != NULL; f = f->next_) {
const int cur_frame_set = f->frame_num_;
int frame_count = 0, tile_count = 0;
int frame_count = 0, fragment_count = 0;
// Check frame properties and if the image is composed of tiles that each
// fragment came from a 'TILE'.
// Check frame properties and if the image is composed of fragments that
// each fragment came from a fragment.
for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) {
const ChunkData* const image = f->img_components_;
const ChunkData* const alpha = f->img_components_ + 1;
if (!has_tiles && f->is_tile_) return 0;
if (!has_fragments && f->is_fragment_) return 0;
if (!has_frames && f->frame_num_ > 1) return 0;
if (f->x_offset_ < 0 || f->y_offset_ < 0) return 0;
if (f->complete_) {
@ -593,6 +630,9 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
if (f->width_ <= 0 || f->height_ <= 0) return 0;
} else {
// There shouldn't be a partial frame in a complete file.
if (dmux->state_ == WEBP_DEMUX_DONE) return 0;
// Ensure alpha precedes image bitstream.
if (alpha->size_ > 0 && image->size_ > 0 &&
alpha->offset_ > image->offset_) {
@ -602,11 +642,11 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
if (f->next_ != NULL) return 0;
}
tile_count += f->is_tile_;
fragment_count += f->is_fragment_;
++frame_count;
}
if (!has_tiles && frame_count > 1) return 0;
if (tile_count > 0 && frame_count != tile_count) return 0;
if (!has_fragments && frame_count > 1) return 0;
if (fragment_count > 0 && frame_count != fragment_count) return 0;
if (f == NULL) break;
}
return 1;
@ -618,8 +658,10 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) {
static void InitDemux(WebPDemuxer* const dmux, const MemBuffer* const mem) {
dmux->state_ = WEBP_DEMUX_PARSING_HEADER;
dmux->loop_count_ = 1;
dmux->bgcolor_ = 0xFFFFFFFF; // White background by default.
dmux->canvas_width_ = -1;
dmux->canvas_height_ = -1;
dmux->frames_tail_ = &dmux->frames_;
dmux->mem_ = *mem;
}
@ -632,9 +674,9 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
WebPDemuxer* dmux;
if (WEBP_ABI_IS_INCOMPATIBLE(version, WEBP_DEMUX_ABI_VERSION)) return NULL;
if (data == NULL || data->bytes_ == NULL || data->size_ == 0) return NULL;
if (data == NULL || data->bytes == NULL || data->size == 0) return NULL;
if (!InitMemBuffer(&mem, data->bytes_, data->size_)) return NULL;
if (!InitMemBuffer(&mem, data->bytes, data->size)) return NULL;
if (!ReadHeader(&mem)) return NULL;
partial = (mem.buf_size_ < mem.riff_end_);
@ -648,6 +690,7 @@ WebPDemuxer* WebPDemuxInternal(const WebPData* data, int allow_partial,
if (!memcmp(parser->id, GetBuffer(&dmux->mem_), TAG_SIZE)) {
status = parser->parse(dmux);
if (status == PARSE_OK) dmux->state_ = WEBP_DEMUX_DONE;
if (status == PARSE_NEED_MORE_DATA && !partial) status = PARSE_ERROR;
if (status != PARSE_ERROR && !parser->valid(dmux)) status = PARSE_ERROR;
break;
}
@ -685,10 +728,12 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
if (dmux == NULL) return 0;
switch (feature) {
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags_;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width_;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height_;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count_;
case WEBP_FF_FORMAT_FLAGS: return dmux->feature_flags_;
case WEBP_FF_CANVAS_WIDTH: return (uint32_t)dmux->canvas_width_;
case WEBP_FF_CANVAS_HEIGHT: return (uint32_t)dmux->canvas_height_;
case WEBP_FF_LOOP_COUNT: return (uint32_t)dmux->loop_count_;
case WEBP_FF_BACKGROUND_COLOR: return dmux->bgcolor_;
case WEBP_FF_FRAME_COUNT: return (uint32_t)dmux->num_frames_;
}
return 0;
}
@ -696,7 +741,8 @@ uint32_t WebPDemuxGetI(const WebPDemuxer* dmux, WebPFormatFeature feature) {
// -----------------------------------------------------------------------------
// Frame iteration
// Find the first 'frame_num' frame. There may be multiple in a tiled frame.
// Find the first 'frame_num' frame. There may be multiple such frames in a
// fragmented frame.
static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
const Frame* f;
for (f = dmux->frames_; f != NULL; f = f->next_) {
@ -705,19 +751,19 @@ static const Frame* GetFrame(const WebPDemuxer* const dmux, int frame_num) {
return f;
}
// Returns tile 'tile_num' and the total count.
static const Frame* GetTile(
const Frame* const frame_set, int tile_num, int* const count) {
// Returns fragment 'fragment_num' and the total count.
static const Frame* GetFragment(
const Frame* const frame_set, int fragment_num, int* const count) {
const int this_frame = frame_set->frame_num_;
const Frame* f = frame_set;
const Frame* tile = NULL;
const Frame* fragment = NULL;
int total;
for (total = 0; f != NULL && f->frame_num_ == this_frame; f = f->next_) {
if (++total == tile_num) tile = f;
if (++total == fragment_num) fragment = f;
}
*count = total;
return tile;
return fragment;
}
static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
@ -747,27 +793,31 @@ static const uint8_t* GetFramePayload(const uint8_t* const mem_buf,
// Create a whole 'frame' from VP8 (+ alpha) or lossless.
static int SynthesizeFrame(const WebPDemuxer* const dmux,
const Frame* const first_frame,
int tile_num, WebPIterator* const iter) {
int fragment_num, WebPIterator* const iter) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
int num_tiles;
int num_fragments;
size_t payload_size = 0;
const Frame* const tile = GetTile(first_frame, tile_num, &num_tiles);
const uint8_t* const payload = GetFramePayload(mem_buf, tile, &payload_size);
const Frame* const fragment =
GetFragment(first_frame, fragment_num, &num_fragments);
const uint8_t* const payload =
GetFramePayload(mem_buf, fragment, &payload_size);
if (payload == NULL) return 0;
assert(first_frame != NULL);
iter->frame_num_ = first_frame->frame_num_;
iter->num_frames_ = dmux->num_frames_;
iter->tile_num_ = tile_num;
iter->num_tiles_ = num_tiles;
iter->x_offset_ = tile->x_offset_;
iter->y_offset_ = tile->y_offset_;
iter->width_ = tile->width_;
iter->height_ = tile->height_;
iter->duration_ = tile->duration_;
iter->complete_ = tile->complete_;
iter->tile_.bytes_ = payload;
iter->tile_.size_ = payload_size;
// TODO(jzern): adjust offsets for 'TILE's embedded in 'FRM 's
iter->frame_num = first_frame->frame_num_;
iter->num_frames = dmux->num_frames_;
iter->fragment_num = fragment_num;
iter->num_fragments = num_fragments;
iter->x_offset = fragment->x_offset_;
iter->y_offset = fragment->y_offset_;
iter->width = fragment->width_;
iter->height = fragment->height_;
iter->duration = fragment->duration_;
iter->dispose_method = fragment->dispose_method_;
iter->complete = fragment->complete_;
iter->fragment.bytes = payload;
iter->fragment.size = payload_size;
// TODO(jzern): adjust offsets for 'FRGM's embedded in 'ANMF's
return 1;
}
@ -779,6 +829,8 @@ static int SetFrame(int frame_num, WebPIterator* const iter) {
if (frame_num == 0) frame_num = dmux->num_frames_;
frame = GetFrame(dmux, frame_num);
if (frame == NULL) return 0;
return SynthesizeFrame(dmux, frame, 1, iter);
}
@ -792,22 +844,22 @@ int WebPDemuxGetFrame(const WebPDemuxer* dmux, int frame, WebPIterator* iter) {
int WebPDemuxNextFrame(WebPIterator* iter) {
if (iter == NULL) return 0;
return SetFrame(iter->frame_num_ + 1, iter);
return SetFrame(iter->frame_num + 1, iter);
}
int WebPDemuxPrevFrame(WebPIterator* iter) {
if (iter == NULL) return 0;
if (iter->frame_num_ <= 1) return 0;
return SetFrame(iter->frame_num_ - 1, iter);
if (iter->frame_num <= 1) return 0;
return SetFrame(iter->frame_num - 1, iter);
}
int WebPDemuxSelectTile(WebPIterator* iter, int tile) {
if (iter != NULL && iter->private_ != NULL && tile > 0) {
int WebPDemuxSelectFragment(WebPIterator* iter, int fragment_num) {
if (iter != NULL && iter->private_ != NULL && fragment_num > 0) {
const WebPDemuxer* const dmux = (WebPDemuxer*)iter->private_;
const Frame* const frame = GetFrame(dmux, iter->frame_num_);
const Frame* const frame = GetFrame(dmux, iter->frame_num);
if (frame == NULL) return 0;
return SynthesizeFrame(dmux, frame, tile, iter);
return SynthesizeFrame(dmux, frame, fragment_num, iter);
}
return 0;
}
@ -856,10 +908,10 @@ static int SetChunk(const char fourcc[4], int chunk_num,
if (chunk_num <= count) {
const uint8_t* const mem_buf = dmux->mem_.buf_;
const Chunk* const chunk = GetChunk(dmux, fourcc, chunk_num);
iter->chunk_.bytes_ = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
iter->chunk_.size_ = chunk->data_.size_ - CHUNK_HEADER_SIZE;
iter->num_chunks_ = count;
iter->chunk_num_ = chunk_num;
iter->chunk.bytes = mem_buf + chunk->data_.offset_ + CHUNK_HEADER_SIZE;
iter->chunk.size = chunk->data_.size_ - CHUNK_HEADER_SIZE;
iter->num_chunks = count;
iter->chunk_num = chunk_num;
return 1;
}
return 0;
@ -878,17 +930,17 @@ int WebPDemuxGetChunk(const WebPDemuxer* dmux,
int WebPDemuxNextChunk(WebPChunkIterator* iter) {
if (iter != NULL) {
const char* const fourcc =
(const char*)iter->chunk_.bytes_ - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num_ + 1, iter);
(const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num + 1, iter);
}
return 0;
}
int WebPDemuxPrevChunk(WebPChunkIterator* iter) {
if (iter != NULL && iter->chunk_num_ > 1) {
if (iter != NULL && iter->chunk_num > 1) {
const char* const fourcc =
(const char*)iter->chunk_.bytes_ - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num_ - 1, iter);
(const char*)iter->chunk.bytes - CHUNK_HEADER_SIZE;
return SetChunk(fourcc, iter->chunk_num - 1, iter);
}
return 0;
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// CPU detection

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Speed-critical decoding functions.
@ -426,11 +428,16 @@ static void HE8uv(uint8_t *dst) { // horizontal
}
// helper for chroma-DC predictions
static WEBP_INLINE void Put8x8uv(uint64_t v, uint8_t* dst) {
static WEBP_INLINE void Put8x8uv(uint8_t value, uint8_t* dst) {
int j;
#ifndef WEBP_REFERENCE_IMPLEMENTATION
const uint64_t v = (uint64_t)value * 0x0101010101010101ULL;
for (j = 0; j < 8; ++j) {
*(uint64_t*)(dst + j * BPS) = v;
}
#else
for (j = 0; j < 8; ++j) memset(dst + j * BPS, value, 8);
#endif
}
static void DC8uv(uint8_t *dst) { // DC
@ -439,7 +446,7 @@ static void DC8uv(uint8_t *dst) { // DC
for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS] + dst[-1 + i * BPS];
}
Put8x8uv((uint64_t)((dc0 >> 4) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 4, dst);
}
static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
@ -448,7 +455,7 @@ static void DC8uvNoLeft(uint8_t *dst) { // DC with no left samples
for (i = 0; i < 8; ++i) {
dc0 += dst[i - BPS];
}
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
@ -457,11 +464,11 @@ static void DC8uvNoTop(uint8_t *dst) { // DC with no top samples
for (i = 0; i < 8; ++i) {
dc0 += dst[-1 + i * BPS];
}
Put8x8uv((uint64_t)((dc0 >> 3) * 0x0101010101010101ULL), dst);
Put8x8uv(dc0 >> 3, dst);
}
static void DC8uvNoTopLeft(uint8_t *dst) { // DC with nothing
Put8x8uv(0x8080808080808080ULL, dst);
Put8x8uv(0x80, dst);
}
//------------------------------------------------------------------------------

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARM NEON version of dsp functions and loop filtering.
@ -79,7 +81,7 @@ extern "C" {
"vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \
"vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n"
#define STORE8x2(c1, c2, p,stride) \
#define STORE8x2(c1, c2, p, stride) \
"vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \
"vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \
@ -155,6 +157,9 @@ static void SimpleHFilter16iNEON(uint8_t* p, int stride, int thresh) {
}
}
//-----------------------------------------------------------------------------
// Inverse transforms (Paragraph 14.4)
static void TransformOneNEON(const int16_t *in, uint8_t *dst) {
const int kBPS = BPS;
const int16_t constants[] = {20091, 17734, 0, 0};
@ -311,6 +316,73 @@ static void TransformTwoNEON(const int16_t* in, uint8_t* dst, int do_two) {
}
}
static void TransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32; // The store is only incrementing the pointer as if we
// had stored a single byte.
__asm__ volatile (
// part 1
// load data into q0, q1
"vld1.16 {q0, q1}, [%[in]] \n"
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
// Transpose
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
"vswp d1, d4 \n" // vtrn.64 q0, q2
"vswp d3, d6 \n" // vtrn.64 q1, q3
"vtrn.32 q0, q1 \n"
"vtrn.32 q2, q3 \n"
"vmov.s32 q4, #3 \n" // dc = 3
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
"vadd.s32 q0, q6, q7 \n"
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
"vadd.s32 q1, q9, q8 \n"
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
"vsub.s32 q2, q6, q7 \n"
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
"vsub.s32 q3, q9, q8 \n"
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
// set the results to output
"vst1.16 d0[0], [%[out]], %[kStep] \n"
"vst1.16 d1[0], [%[out]], %[kStep] \n"
"vst1.16 d2[0], [%[out]], %[kStep] \n"
"vst1.16 d3[0], [%[out]], %[kStep] \n"
"vst1.16 d0[1], [%[out]], %[kStep] \n"
"vst1.16 d1[1], [%[out]], %[kStep] \n"
"vst1.16 d2[1], [%[out]], %[kStep] \n"
"vst1.16 d3[1], [%[out]], %[kStep] \n"
"vst1.16 d0[2], [%[out]], %[kStep] \n"
"vst1.16 d1[2], [%[out]], %[kStep] \n"
"vst1.16 d2[2], [%[out]], %[kStep] \n"
"vst1.16 d3[2], [%[out]], %[kStep] \n"
"vst1.16 d0[3], [%[out]], %[kStep] \n"
"vst1.16 d1[3], [%[out]], %[kStep] \n"
"vst1.16 d2[3], [%[out]], %[kStep] \n"
"vst1.16 d3[3], [%[out]], %[kStep] \n"
: [out] "+r"(out) // modified registers
: [in] "r"(in), [kStep] "r"(kStep) // constants
: "memory", "q0", "q1", "q2", "q3", "q4",
"q5", "q6", "q7", "q8", "q9" // clobbered
);
}
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
@ -321,6 +393,7 @@ extern void VP8DspInitNEON(void);
void VP8DspInitNEON(void) {
#if defined(WEBP_USE_NEON)
VP8Transform = TransformTwoNEON;
VP8TransformWHT = TransformWHT;
VP8SimpleVFilter16 = SimpleVFilter16NEON;
VP8SimpleHFilter16 = SimpleHFilter16NEON;

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 version of some decoding functions (idct, loop filtering).
@ -194,7 +196,7 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Add inverse transform to 'dst' and store.
{
const __m128i zero = _mm_set1_epi16(0);
const __m128i zero = _mm_setzero_si128();
// Load the reference(s).
__m128i dst0, dst1, dst2, dst3;
if (do_two) {
@ -278,14 +280,14 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
#define GET_NOTHEV(p1, p0, q0, q1, hev_thresh, not_hev) { \
const __m128i zero = _mm_setzero_si128(); \
const __m128i t1 = MM_ABS(p1, p0); \
const __m128i t2 = MM_ABS(q1, q0); \
const __m128i t_1 = MM_ABS(p1, p0); \
const __m128i t_2 = MM_ABS(q1, q0); \
\
const __m128i h = _mm_set1_epi8(hev_thresh); \
const __m128i t3 = _mm_subs_epu8(t1, h); /* abs(p1 - p0) - hev_tresh */ \
const __m128i t4 = _mm_subs_epu8(t2, h); /* abs(q1 - q0) - hev_tresh */ \
const __m128i t_3 = _mm_subs_epu8(t_1, h); /* abs(p1 - p0) - hev_tresh */ \
const __m128i t_4 = _mm_subs_epu8(t_2, h); /* abs(q1 - q0) - hev_tresh */ \
\
not_hev = _mm_or_si128(t3, t4); \
not_hev = _mm_or_si128(t_3, t_4); \
not_hev = _mm_cmpeq_epi8(not_hev, zero); /* not_hev <= t1 && not_hev <= t2 */\
}
@ -314,13 +316,13 @@ static void TransformSSE2(const int16_t* in, uint8_t* dst, int do_two) {
// Updates values of 2 pixels at MB edge during complex filtering.
// Update operations:
// q = q - a and p = p + a; where a = [(a_hi >> 7), (a_lo >> 7)]
// q = q - delta and p = p + delta; where delta = [(a_hi >> 7), (a_lo >> 7)]
#define UPDATE_2PIXELS(pi, qi, a_lo, a_hi) { \
const __m128i a_lo7 = _mm_srai_epi16(a_lo, 7); \
const __m128i a_hi7 = _mm_srai_epi16(a_hi, 7); \
const __m128i a = _mm_packs_epi16(a_lo7, a_hi7); \
pi = _mm_adds_epi8(pi, a); \
qi = _mm_subs_epi8(qi, a); \
const __m128i delta = _mm_packs_epi16(a_lo7, a_hi7); \
pi = _mm_adds_epi8(pi, delta); \
qi = _mm_subs_epi8(qi, delta); \
}
static void NeedsFilter(const __m128i* p1, const __m128i* p0, const __m128i* q0,

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Speed-critical functions.
@ -49,8 +51,6 @@ extern VP8CPUInfo VP8GetCPUInfo;
//------------------------------------------------------------------------------
// Encoding
int VP8GetAlpha(const int histo[]);
// Transforms
// VP8Idct: Does one of two inverse transforms. If do_two is set, the transforms
// will be done for (ref, in, dst) and (ref + 4, in + 16, dst + 4).
@ -85,10 +85,11 @@ typedef int (*VP8QuantizeBlock)(int16_t in[16], int16_t out[16],
int n, const struct VP8Matrix* const mtx);
extern VP8QuantizeBlock VP8EncQuantizeBlock;
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
typedef int (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block);
// Collect histogram for susceptibility calculation and accumulate in histo[].
struct VP8Histogram;
typedef void (*VP8CHisto)(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
struct VP8Histogram* const histo);
extern const int VP8DspScan[16 + 4 + 4];
extern VP8CHisto VP8CollectHistogram;
@ -104,7 +105,7 @@ extern VP8DecIdct2 VP8Transform;
extern VP8DecIdct VP8TransformUV;
extern VP8DecIdct VP8TransformDC;
extern VP8DecIdct VP8TransformDCUV;
extern void (*VP8TransformWHT)(const int16_t* in, int16_t* out);
extern VP8WHT VP8TransformWHT;
// *dst is the destination block, with stride BPS. Boundary samples are
// assumed accessible when needed.
@ -159,6 +160,9 @@ extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
// Initializes SSE2 version of the fancy upsamplers.
void WebPInitUpsamplersSSE2(void);
// NEON version
void WebPInitUpsamplersNEON(void);
#endif // FANCY_UPSAMPLING
// Point-sampling methods.
@ -200,6 +204,7 @@ extern void (*WebPApplyAlphaMultiply4444)(
void WebPInitPremultiply(void);
void WebPInitPremultiplySSE2(void); // should not be called directly.
void WebPInitPremultiplyNEON(void);
//------------------------------------------------------------------------------

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Speed-critical encoding functions.
@ -17,31 +19,18 @@
extern "C" {
#endif
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255;
}
static WEBP_INLINE int clip_max(int v, int max) {
return (v > max) ? max : v;
}
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
}
int VP8GetAlpha(const int histo[MAX_COEFF_THRESH + 1]) {
int num = 0, den = 0, val = 0;
int k;
int alpha;
// note: changing this loop to avoid the numerous "k + 1" slows things down.
for (k = 0; k < MAX_COEFF_THRESH; ++k) {
if (histo[k + 1]) {
val += histo[k + 1];
num += val * (k + 1);
den += (k + 1) * (k + 1);
}
}
// we scale the value to a usable [0..255] range
alpha = den ? 10 * num / den - 5 : 0;
return ClipAlpha(alpha);
}
const int VP8DspScan[16 + 4 + 4] = {
// Luma
0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
@ -53,27 +42,23 @@ const int VP8DspScan[16 + 4 + 4] = {
8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
};
static int CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) {
int histo[MAX_COEFF_THRESH + 1] = { 0 };
int16_t out[16];
int j, k;
static void CollectHistogram(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
int j;
for (j = start_block; j < end_block; ++j) {
int k;
int16_t out[16];
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
// Convert coefficients to bin.
for (k = 0; k < 16; ++k) {
const int v = abs(out[k]) >> 2;
out[k] = (v > MAX_COEFF_THRESH) ? MAX_COEFF_THRESH : v;
}
// Use bin to update histogram.
for (k = 0; k < 16; ++k) {
histo[out[k]]++;
const int v = abs(out[k]) >> 3; // TODO(skal): add rounding?
const int clipped_value = clip_max(v, MAX_COEFF_THRESH);
histo->distribution[clipped_value]++;
}
}
return VP8GetAlpha(histo);
}
//------------------------------------------------------------------------------
@ -89,15 +74,12 @@ static void InitTables(void) {
if (!tables_ok) {
int i;
for (i = -255; i <= 255 + 255; ++i) {
clip1[255 + i] = (i < 0) ? 0 : (i > 255) ? 255 : i;
clip1[255 + i] = clip_8b(i);
}
tables_ok = 1;
}
}
static WEBP_INLINE uint8_t clip_8b(int v) {
return (!(v & ~0xff)) ? v : v < 0 ? 0 : 255;
}
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@ -154,25 +136,25 @@ static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) {
int i;
int tmp[16];
for (i = 0; i < 4; ++i, src += BPS, ref += BPS) {
const int d0 = src[0] - ref[0];
const int d0 = src[0] - ref[0]; // 9bit dynamic range ([-255,255])
const int d1 = src[1] - ref[1];
const int d2 = src[2] - ref[2];
const int d3 = src[3] - ref[3];
const int a0 = (d0 + d3) << 3;
const int a1 = (d1 + d2) << 3;
const int a2 = (d1 - d2) << 3;
const int a3 = (d0 - d3) << 3;
tmp[0 + i * 4] = (a0 + a1);
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 14500) >> 12;
tmp[2 + i * 4] = (a0 - a1);
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 7500) >> 12;
const int a0 = (d0 + d3); // 10b [-510,510]
const int a1 = (d1 + d2);
const int a2 = (d1 - d2);
const int a3 = (d0 - d3);
tmp[0 + i * 4] = (a0 + a1) * 8; // 14b [-8160,8160]
tmp[1 + i * 4] = (a2 * 2217 + a3 * 5352 + 1812) >> 9; // [-7536,7542]
tmp[2 + i * 4] = (a0 - a1) * 8;
tmp[3 + i * 4] = (a3 * 2217 - a2 * 5352 + 937) >> 9;
}
for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[12 + i]);
const int a0 = (tmp[0 + i] + tmp[12 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[ 8 + i]);
const int a2 = (tmp[4 + i] - tmp[ 8 + i]);
const int a3 = (tmp[0 + i] - tmp[12 + i]);
out[0 + i] = (a0 + a1 + 7) >> 4;
out[0 + i] = (a0 + a1 + 7) >> 4; // 12b
out[4 + i] = ((a2 * 2217 + a3 * 5352 + 12000) >> 16) + (a3 != 0);
out[8 + i] = (a0 - a1 + 7) >> 4;
out[12+ i] = ((a3 * 2217 - a2 * 5352 + 51000) >> 16);
@ -207,31 +189,32 @@ static void ITransformWHT(const int16_t* in, int16_t* out) {
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
int tmp[16];
// input is 12b signed
int16_t tmp[16];
int i;
for (i = 0; i < 4; ++i, in += 64) {
const int a0 = (in[0 * 16] + in[2 * 16]) << 2;
const int a1 = (in[1 * 16] + in[3 * 16]) << 2;
const int a2 = (in[1 * 16] - in[3 * 16]) << 2;
const int a3 = (in[0 * 16] - in[2 * 16]) << 2;
tmp[0 + i * 4] = (a0 + a1) + (a0 != 0);
const int a0 = (in[0 * 16] + in[2 * 16]); // 13b
const int a1 = (in[1 * 16] + in[3 * 16]);
const int a2 = (in[1 * 16] - in[3 * 16]);
const int a3 = (in[0 * 16] - in[2 * 16]);
tmp[0 + i * 4] = a0 + a1; // 14b
tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1;
}
for (i = 0; i < 4; ++i) {
const int a0 = (tmp[0 + i] + tmp[8 + i]);
const int a0 = (tmp[0 + i] + tmp[8 + i]); // 15b
const int a1 = (tmp[4 + i] + tmp[12+ i]);
const int a2 = (tmp[4 + i] - tmp[12+ i]);
const int a3 = (tmp[0 + i] - tmp[8 + i]);
const int b0 = a0 + a1;
const int b0 = a0 + a1; // 16b
const int b1 = a3 + a2;
const int b2 = a3 - a2;
const int b3 = a0 - a1;
out[ 0 + i] = (b0 + (b0 > 0) + 3) >> 3;
out[ 4 + i] = (b1 + (b1 > 0) + 3) >> 3;
out[ 8 + i] = (b2 + (b2 > 0) + 3) >> 3;
out[12 + i] = (b3 + (b3 > 0) + 3) >> 3;
out[ 0 + i] = b0 >> 1; // 15b
out[ 4 + i] = b1 >> 1;
out[ 8 + i] = b2 >> 1;
out[12 + i] = b3 >> 1;
}
}
@ -589,30 +572,30 @@ static int TTransform(const uint8_t* in, const uint16_t* w) {
int i;
// horizontal pass
for (i = 0; i < 4; ++i, in += BPS) {
const int a0 = (in[0] + in[2]) << 2;
const int a1 = (in[1] + in[3]) << 2;
const int a2 = (in[1] - in[3]) << 2;
const int a3 = (in[0] - in[2]) << 2;
tmp[0 + i * 4] = a0 + a1 + (a0 != 0);
const int a0 = in[0] + in[2];
const int a1 = in[1] + in[3];
const int a2 = in[1] - in[3];
const int a3 = in[0] - in[2];
tmp[0 + i * 4] = a0 + a1;
tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1;
}
// vertical pass
for (i = 0; i < 4; ++i, ++w) {
const int a0 = (tmp[0 + i] + tmp[8 + i]);
const int a1 = (tmp[4 + i] + tmp[12+ i]);
const int a2 = (tmp[4 + i] - tmp[12+ i]);
const int a3 = (tmp[0 + i] - tmp[8 + i]);
const int a0 = tmp[0 + i] + tmp[8 + i];
const int a1 = tmp[4 + i] + tmp[12+ i];
const int a2 = tmp[4 + i] - tmp[12+ i];
const int a3 = tmp[0 + i] - tmp[8 + i];
const int b0 = a0 + a1;
const int b1 = a3 + a2;
const int b2 = a3 - a2;
const int b3 = a0 - a1;
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
sum += w[ 0] * ((abs(b0) + 3) >> 3);
sum += w[ 4] * ((abs(b1) + 3) >> 3);
sum += w[ 8] * ((abs(b2) + 3) >> 3);
sum += w[12] * ((abs(b3) + 3) >> 3);
sum += w[ 0] * abs(b0);
sum += w[ 4] * abs(b1);
sum += w[ 8] * abs(b2);
sum += w[12] * abs(b3);
}
return sum;
}
@ -621,7 +604,7 @@ static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int sum1 = TTransform(a, w);
const int sum2 = TTransform(b, w);
return (abs(sum2 - sum1) + 8) >> 4;
return abs(sum2 - sum1) >> 5;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
@ -651,13 +634,13 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16],
for (; n < 16; ++n) {
const int j = kZigzag[n];
const int sign = (in[j] < 0);
int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > 2047) coeff = 2047;
const int coeff = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
if (coeff > mtx->zthresh_[j]) {
const int Q = mtx->q_[j];
const int iQ = mtx->iq_[j];
const int B = mtx->bias_[j];
out[n] = QUANTDIV(coeff, iQ, B);
if (out[n] > MAX_LEVEL) out[n] = MAX_LEVEL;
if (sign) out[n] = -out[n];
in[j] = out[n] * Q;
if (out[n]) last = n;
@ -706,6 +689,7 @@ VP8QuantizeBlock VP8EncQuantizeBlock;
VP8BlockCopy VP8Copy4x4;
extern void VP8EncDspInitSSE2(void);
extern void VP8EncDspInitNEON(void);
void VP8EncDspInit(void) {
InitTables();
@ -734,6 +718,10 @@ void VP8EncDspInit(void) {
if (VP8GetCPUInfo(kSSE2)) {
VP8EncDspInitSSE2();
}
#elif defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
VP8EncDspInitNEON();
}
#endif
}
}

639
3rdparty/libwebp/dsp/enc_neon.c vendored Normal file
View File

@ -0,0 +1,639 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// ARM NEON version of speed-critical encoding functions.
//
// adapted from libvpx (http://www.webmproject.org/code/)
#include "./dsp.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_USE_NEON)
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
// Inverse transform.
// This code is pretty much the same as TransformOneNEON in the decoder, except
// for subtraction to *ref. See the comments there for algorithmic explanations.
static void ITransformOne(const uint8_t* ref,
const int16_t* in, uint8_t* dst) {
const int kBPS = BPS;
const int16_t kC1C2[] = { 20091, 17734, 0, 0 }; // kC1 / (kC2 >> 1) / 0 / 0
__asm__ volatile (
"vld1.16 {q1, q2}, [%[in]] \n"
"vld1.16 {d0}, [%[kC1C2]] \n"
// d2: in[0]
// d3: in[8]
// d4: in[4]
// d5: in[12]
"vswp d3, d4 \n"
// q8 = {in[4], in[12]} * kC1 * 2 >> 16
// q9 = {in[4], in[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = in[0] + in[8]
// d23 = b = in[0] - in[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
// q8 = in[4]/[12] * kC1 >> 16
"vshr.s16 q8, q8, #1 \n"
// Add {in[4], in[12]} back after the multiplication.
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
"vswp d3, d4 \n"
// q8 = {tmp[4], tmp[12]} * kC1 * 2 >> 16
// q9 = {tmp[4], tmp[12]} * kC2 >> 16
"vqdmulh.s16 q8, q2, d0[0] \n"
"vqdmulh.s16 q9, q2, d0[1] \n"
// d22 = a = tmp[0] + tmp[8]
// d23 = b = tmp[0] - tmp[8]
"vqadd.s16 d22, d2, d3 \n"
"vqsub.s16 d23, d2, d3 \n"
"vshr.s16 q8, q8, #1 \n"
"vqadd.s16 q8, q2, q8 \n"
// d20 = c = in[4]*kC2 - in[12]*kC1
// d21 = d = in[4]*kC1 + in[12]*kC2
"vqsub.s16 d20, d18, d17 \n"
"vqadd.s16 d21, d19, d16 \n"
// d2 = tmp[0] = a + d
// d3 = tmp[1] = b + c
// d4 = tmp[2] = b - c
// d5 = tmp[3] = a - d
"vqadd.s16 d2, d22, d21 \n"
"vqadd.s16 d3, d23, d20 \n"
"vqsub.s16 d4, d23, d20 \n"
"vqsub.s16 d5, d22, d21 \n"
"vld1.32 d6[0], [%[ref]], %[kBPS] \n"
"vld1.32 d6[1], [%[ref]], %[kBPS] \n"
"vld1.32 d7[0], [%[ref]], %[kBPS] \n"
"vld1.32 d7[1], [%[ref]], %[kBPS] \n"
"sub %[ref], %[ref], %[kBPS], lsl #2 \n"
// (val) + 4 >> 3
"vrshr.s16 d2, d2, #3 \n"
"vrshr.s16 d3, d3, #3 \n"
"vrshr.s16 d4, d4, #3 \n"
"vrshr.s16 d5, d5, #3 \n"
"vzip.16 q1, q2 \n"
"vzip.16 q1, q2 \n"
// Must accumulate before saturating
"vmovl.u8 q8, d6 \n"
"vmovl.u8 q9, d7 \n"
"vqadd.s16 q1, q1, q8 \n"
"vqadd.s16 q2, q2, q9 \n"
"vqmovun.s16 d0, q1 \n"
"vqmovun.s16 d1, q2 \n"
"vst1.32 d0[0], [%[dst]], %[kBPS] \n"
"vst1.32 d0[1], [%[dst]], %[kBPS] \n"
"vst1.32 d1[0], [%[dst]], %[kBPS] \n"
"vst1.32 d1[1], [%[dst]] \n"
: [in] "+r"(in), [dst] "+r"(dst) // modified registers
: [kBPS] "r"(kBPS), [kC1C2] "r"(kC1C2), [ref] "r"(ref) // constants
: "memory", "q0", "q1", "q2", "q8", "q9", "q10", "q11" // clobbered
);
}
static void ITransform(const uint8_t* ref,
const int16_t* in, uint8_t* dst, int do_two) {
ITransformOne(ref, in, dst);
if (do_two) {
ITransformOne(ref + 4, in + 16, dst + 4);
}
}
// Same code as dec_neon.c
static void ITransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32; // The store is only incrementing the pointer as if we
// had stored a single byte.
__asm__ volatile (
// part 1
// load data into q0, q1
"vld1.16 {q0, q1}, [%[in]] \n"
"vaddl.s16 q2, d0, d3 \n" // a0 = in[0] + in[12]
"vaddl.s16 q3, d1, d2 \n" // a1 = in[4] + in[8]
"vsubl.s16 q4, d1, d2 \n" // a2 = in[4] - in[8]
"vsubl.s16 q5, d0, d3 \n" // a3 = in[0] - in[12]
"vadd.s32 q0, q2, q3 \n" // tmp[0] = a0 + a1
"vsub.s32 q2, q2, q3 \n" // tmp[8] = a0 - a1
"vadd.s32 q1, q5, q4 \n" // tmp[4] = a3 + a2
"vsub.s32 q3, q5, q4 \n" // tmp[12] = a3 - a2
// Transpose
// q0 = tmp[0, 4, 8, 12], q1 = tmp[2, 6, 10, 14]
// q2 = tmp[1, 5, 9, 13], q3 = tmp[3, 7, 11, 15]
"vswp d1, d4 \n" // vtrn.64 q0, q2
"vswp d3, d6 \n" // vtrn.64 q1, q3
"vtrn.32 q0, q1 \n"
"vtrn.32 q2, q3 \n"
"vmov.s32 q4, #3 \n" // dc = 3
"vadd.s32 q0, q0, q4 \n" // dc = tmp[0] + 3
"vadd.s32 q6, q0, q3 \n" // a0 = dc + tmp[3]
"vadd.s32 q7, q1, q2 \n" // a1 = tmp[1] + tmp[2]
"vsub.s32 q8, q1, q2 \n" // a2 = tmp[1] - tmp[2]
"vsub.s32 q9, q0, q3 \n" // a3 = dc - tmp[3]
"vadd.s32 q0, q6, q7 \n"
"vshrn.s32 d0, q0, #3 \n" // (a0 + a1) >> 3
"vadd.s32 q1, q9, q8 \n"
"vshrn.s32 d1, q1, #3 \n" // (a3 + a2) >> 3
"vsub.s32 q2, q6, q7 \n"
"vshrn.s32 d2, q2, #3 \n" // (a0 - a1) >> 3
"vsub.s32 q3, q9, q8 \n"
"vshrn.s32 d3, q3, #3 \n" // (a3 - a2) >> 3
// set the results to output
"vst1.16 d0[0], [%[out]], %[kStep] \n"
"vst1.16 d1[0], [%[out]], %[kStep] \n"
"vst1.16 d2[0], [%[out]], %[kStep] \n"
"vst1.16 d3[0], [%[out]], %[kStep] \n"
"vst1.16 d0[1], [%[out]], %[kStep] \n"
"vst1.16 d1[1], [%[out]], %[kStep] \n"
"vst1.16 d2[1], [%[out]], %[kStep] \n"
"vst1.16 d3[1], [%[out]], %[kStep] \n"
"vst1.16 d0[2], [%[out]], %[kStep] \n"
"vst1.16 d1[2], [%[out]], %[kStep] \n"
"vst1.16 d2[2], [%[out]], %[kStep] \n"
"vst1.16 d3[2], [%[out]], %[kStep] \n"
"vst1.16 d0[3], [%[out]], %[kStep] \n"
"vst1.16 d1[3], [%[out]], %[kStep] \n"
"vst1.16 d2[3], [%[out]], %[kStep] \n"
"vst1.16 d3[3], [%[out]], %[kStep] \n"
: [out] "+r"(out) // modified registers
: [in] "r"(in), [kStep] "r"(kStep) // constants
: "memory", "q0", "q1", "q2", "q3", "q4",
"q5", "q6", "q7", "q8", "q9" // clobbered
);
}
// Forward transform.
// adapted from vp8/encoder/arm/neon/shortfdct_neon.asm
static const int16_t kCoeff16[] = {
5352, 5352, 5352, 5352, 2217, 2217, 2217, 2217
};
static const int32_t kCoeff32[] = {
1812, 1812, 1812, 1812,
937, 937, 937, 937,
12000, 12000, 12000, 12000,
51000, 51000, 51000, 51000
};
static void FTransform(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const int kBPS = BPS;
const uint8_t* src_ptr = src;
const uint8_t* ref_ptr = ref;
const int16_t* coeff16 = kCoeff16;
const int32_t* coeff32 = kCoeff32;
__asm__ volatile (
// load src into q4, q5 in high half
"vld1.8 {d8}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d10}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d9}, [%[src_ptr]], %[kBPS] \n"
"vld1.8 {d11}, [%[src_ptr]] \n"
// load ref into q6, q7 in high half
"vld1.8 {d12}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d14}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d13}, [%[ref_ptr]], %[kBPS] \n"
"vld1.8 {d15}, [%[ref_ptr]] \n"
// Pack the high values in to q4 and q6
"vtrn.32 q4, q5 \n"
"vtrn.32 q6, q7 \n"
// d[0-3] = src - ref
"vsubl.u8 q0, d8, d12 \n"
"vsubl.u8 q1, d9, d13 \n"
// load coeff16 into q8(d16=5352, d17=2217)
"vld1.16 {q8}, [%[coeff16]] \n"
// load coeff32 high half into q9 = 1812, q10 = 937
"vld1.32 {q9, q10}, [%[coeff32]]! \n"
// load coeff32 low half into q11=12000, q12=51000
"vld1.32 {q11,q12}, [%[coeff32]] \n"
// part 1
// Transpose. Register dN is the same as dN in C
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vadd.s16 d4, d0, d3 \n" // a0 = d0 + d3
"vadd.s16 d5, d1, d2 \n" // a1 = d1 + d2
"vsub.s16 d6, d1, d2 \n" // a2 = d1 - d2
"vsub.s16 d7, d0, d3 \n" // a3 = d0 - d3
"vadd.s16 d0, d4, d5 \n" // a0 + a1
"vshl.s16 d0, d0, #3 \n" // temp[0+i*4] = (a0+a1) << 3
"vsub.s16 d2, d4, d5 \n" // a0 - a1
"vshl.s16 d2, d2, #3 \n" // (temp[2+i*4] = (a0-a1) << 3
"vmlal.s16 q9, d7, d16 \n" // a3*5352 + 1812
"vmlal.s16 q10, d7, d17 \n" // a3*2217 + 937
"vmlal.s16 q9, d6, d17 \n" // a2*2217 + a3*5352 + 1812
"vmlsl.s16 q10, d6, d16 \n" // a3*2217 + 937 - a2*5352
// temp[1+i*4] = (d2*2217 + d3*5352 + 1812) >> 9
// temp[3+i*4] = (d3*2217 + 937 - d2*5352) >> 9
"vshrn.s32 d1, q9, #9 \n"
"vshrn.s32 d3, q10, #9 \n"
// part 2
// transpose d0=ip[0], d1=ip[4], d2=ip[8], d3=ip[12]
"vtrn.32 d0, d2 \n"
"vtrn.32 d1, d3 \n"
"vtrn.16 d0, d1 \n"
"vtrn.16 d2, d3 \n"
"vmov.s16 d26, #7 \n"
"vadd.s16 d4, d0, d3 \n" // a1 = ip[0] + ip[12]
"vadd.s16 d5, d1, d2 \n" // b1 = ip[4] + ip[8]
"vsub.s16 d6, d1, d2 \n" // c1 = ip[4] - ip[8]
"vadd.s16 d4, d4, d26 \n" // a1 + 7
"vsub.s16 d7, d0, d3 \n" // d1 = ip[0] - ip[12]
"vadd.s16 d0, d4, d5 \n" // op[0] = a1 + b1 + 7
"vsub.s16 d2, d4, d5 \n" // op[8] = a1 - b1 + 7
"vmlal.s16 q11, d7, d16 \n" // d1*5352 + 12000
"vmlal.s16 q12, d7, d17 \n" // d1*2217 + 51000
"vceq.s16 d4, d7, #0 \n"
"vshr.s16 d0, d0, #4 \n"
"vshr.s16 d2, d2, #4 \n"
"vmlal.s16 q11, d6, d17 \n" // c1*2217 + d1*5352 + 12000
"vmlsl.s16 q12, d6, d16 \n" // d1*2217 - c1*5352 + 51000
"vmvn d4, d4 \n" // !(d1 == 0)
// op[4] = (c1*2217 + d1*5352 + 12000)>>16
"vshrn.s32 d1, q11, #16 \n"
// op[4] += (d1!=0)
"vsub.s16 d1, d1, d4 \n"
// op[12]= (d1*2217 - c1*5352 + 51000)>>16
"vshrn.s32 d3, q12, #16 \n"
// set result to out array
"vst1.16 {q0, q1}, [%[out]] \n"
: [src_ptr] "+r"(src_ptr), [ref_ptr] "+r"(ref_ptr),
[coeff32] "+r"(coeff32) // modified registers
: [kBPS] "r"(kBPS), [coeff16] "r"(coeff16),
[out] "r"(out) // constants
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13" // clobbered
);
}
static void FTransformWHT(const int16_t* in, int16_t* out) {
const int kStep = 32;
__asm__ volatile (
// d0 = in[0 * 16] , d1 = in[1 * 16]
// d2 = in[2 * 16] , d3 = in[3 * 16]
"vld1.16 d0[0], [%[in]], %[kStep] \n"
"vld1.16 d1[0], [%[in]], %[kStep] \n"
"vld1.16 d2[0], [%[in]], %[kStep] \n"
"vld1.16 d3[0], [%[in]], %[kStep] \n"
"vld1.16 d0[1], [%[in]], %[kStep] \n"
"vld1.16 d1[1], [%[in]], %[kStep] \n"
"vld1.16 d2[1], [%[in]], %[kStep] \n"
"vld1.16 d3[1], [%[in]], %[kStep] \n"
"vld1.16 d0[2], [%[in]], %[kStep] \n"
"vld1.16 d1[2], [%[in]], %[kStep] \n"
"vld1.16 d2[2], [%[in]], %[kStep] \n"
"vld1.16 d3[2], [%[in]], %[kStep] \n"
"vld1.16 d0[3], [%[in]], %[kStep] \n"
"vld1.16 d1[3], [%[in]], %[kStep] \n"
"vld1.16 d2[3], [%[in]], %[kStep] \n"
"vld1.16 d3[3], [%[in]], %[kStep] \n"
"vaddl.s16 q2, d0, d2 \n" // a0=(in[0*16]+in[2*16])
"vaddl.s16 q3, d1, d3 \n" // a1=(in[1*16]+in[3*16])
"vsubl.s16 q4, d1, d3 \n" // a2=(in[1*16]-in[3*16])
"vsubl.s16 q5, d0, d2 \n" // a3=(in[0*16]-in[2*16])
"vqadd.s32 q6, q2, q3 \n" // a0 + a1
"vqadd.s32 q7, q5, q4 \n" // a3 + a2
"vqsub.s32 q8, q5, q4 \n" // a3 - a2
"vqsub.s32 q9, q2, q3 \n" // a0 - a1
// Transpose
// q6 = tmp[0, 1, 2, 3] ; q7 = tmp[ 4, 5, 6, 7]
// q8 = tmp[8, 9, 10, 11] ; q9 = tmp[12, 13, 14, 15]
"vswp d13, d16 \n" // vtrn.64 q0, q2
"vswp d15, d18 \n" // vtrn.64 q1, q3
"vtrn.32 q6, q7 \n"
"vtrn.32 q8, q9 \n"
"vqadd.s32 q0, q6, q8 \n" // a0 = tmp[0] + tmp[8]
"vqadd.s32 q1, q7, q9 \n" // a1 = tmp[4] + tmp[12]
"vqsub.s32 q2, q7, q9 \n" // a2 = tmp[4] - tmp[12]
"vqsub.s32 q3, q6, q8 \n" // a3 = tmp[0] - tmp[8]
"vqadd.s32 q4, q0, q1 \n" // b0 = a0 + a1
"vqadd.s32 q5, q3, q2 \n" // b1 = a3 + a2
"vqsub.s32 q6, q3, q2 \n" // b2 = a3 - a2
"vqsub.s32 q7, q0, q1 \n" // b3 = a0 - a1
"vshrn.s32 d18, q4, #1 \n" // b0 >> 1
"vshrn.s32 d19, q5, #1 \n" // b1 >> 1
"vshrn.s32 d20, q6, #1 \n" // b2 >> 1
"vshrn.s32 d21, q7, #1 \n" // b3 >> 1
"vst1.16 {q9, q10}, [%[out]] \n"
: [in] "+r"(in)
: [kStep] "r"(kStep), [out] "r"(out)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5",
"q6", "q7", "q8", "q9", "q10" // clobbered
) ;
}
//------------------------------------------------------------------------------
// Texture distortion
//
// We try to match the spectral content (weighted) between source and
// reconstructed samples.
// Hadamard transform
// Returns the weighted sum of the absolute value of transformed coefficients.
// This uses a TTransform helper function in C
static int Disto4x4(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int kBPS = BPS;
const uint8_t* A = a;
const uint8_t* B = b;
const uint16_t* W = w;
int sum;
__asm__ volatile (
"vld1.32 d0[0], [%[a]], %[kBPS] \n"
"vld1.32 d0[1], [%[a]], %[kBPS] \n"
"vld1.32 d2[0], [%[a]], %[kBPS] \n"
"vld1.32 d2[1], [%[a]] \n"
"vld1.32 d1[0], [%[b]], %[kBPS] \n"
"vld1.32 d1[1], [%[b]], %[kBPS] \n"
"vld1.32 d3[0], [%[b]], %[kBPS] \n"
"vld1.32 d3[1], [%[b]] \n"
// a d0/d2, b d1/d3
// d0/d1: 01 01 01 01
// d2/d3: 23 23 23 23
// But: it goes 01 45 23 67
// Notice the middle values are transposed
"vtrn.16 q0, q1 \n"
// {a0, a1} = {in[0] + in[2], in[1] + in[3]}
"vaddl.u8 q2, d0, d2 \n"
"vaddl.u8 q10, d1, d3 \n"
// {a3, a2} = {in[0] - in[2], in[1] - in[3]}
"vsubl.u8 q3, d0, d2 \n"
"vsubl.u8 q11, d1, d3 \n"
// tmp[0] = a0 + a1
"vpaddl.s16 q0, q2 \n"
"vpaddl.s16 q8, q10 \n"
// tmp[1] = a3 + a2
"vpaddl.s16 q1, q3 \n"
"vpaddl.s16 q9, q11 \n"
// No pair subtract
// q2 = {a0, a3}
// q3 = {a1, a2}
"vtrn.16 q2, q3 \n"
"vtrn.16 q10, q11 \n"
// {tmp[3], tmp[2]} = {a0 - a1, a3 - a2}
"vsubl.s16 q12, d4, d6 \n"
"vsubl.s16 q13, d5, d7 \n"
"vsubl.s16 q14, d20, d22 \n"
"vsubl.s16 q15, d21, d23 \n"
// separate tmp[3] and tmp[2]
// q12 = tmp[3]
// q13 = tmp[2]
"vtrn.32 q12, q13 \n"
"vtrn.32 q14, q15 \n"
// Transpose tmp for a
"vswp d1, d26 \n" // vtrn.64
"vswp d3, d24 \n" // vtrn.64
"vtrn.32 q0, q1 \n"
"vtrn.32 q13, q12 \n"
// Transpose tmp for b
"vswp d17, d30 \n" // vtrn.64
"vswp d19, d28 \n" // vtrn.64
"vtrn.32 q8, q9 \n"
"vtrn.32 q15, q14 \n"
// The first Q register is a, the second b.
// q0/8 tmp[0-3]
// q13/15 tmp[4-7]
// q1/9 tmp[8-11]
// q12/14 tmp[12-15]
// These are still in 01 45 23 67 order. We fix it easily in the addition
// case but the subtraction propegates them.
"vswp d3, d27 \n"
"vswp d19, d31 \n"
// a0 = tmp[0] + tmp[8]
"vadd.s32 q2, q0, q1 \n"
"vadd.s32 q3, q8, q9 \n"
// a1 = tmp[4] + tmp[12]
"vadd.s32 q10, q13, q12 \n"
"vadd.s32 q11, q15, q14 \n"
// a2 = tmp[4] - tmp[12]
"vsub.s32 q13, q13, q12 \n"
"vsub.s32 q15, q15, q14 \n"
// a3 = tmp[0] - tmp[8]
"vsub.s32 q0, q0, q1 \n"
"vsub.s32 q8, q8, q9 \n"
// b0 = a0 + a1
"vadd.s32 q1, q2, q10 \n"
"vadd.s32 q9, q3, q11 \n"
// b1 = a3 + a2
"vadd.s32 q12, q0, q13 \n"
"vadd.s32 q14, q8, q15 \n"
// b2 = a3 - a2
"vsub.s32 q0, q0, q13 \n"
"vsub.s32 q8, q8, q15 \n"
// b3 = a0 - a1
"vsub.s32 q2, q2, q10 \n"
"vsub.s32 q3, q3, q11 \n"
"vld1.64 {q10, q11}, [%[w]] \n"
// abs(b0)
"vabs.s32 q1, q1 \n"
"vabs.s32 q9, q9 \n"
// abs(b1)
"vabs.s32 q12, q12 \n"
"vabs.s32 q14, q14 \n"
// abs(b2)
"vabs.s32 q0, q0 \n"
"vabs.s32 q8, q8 \n"
// abs(b3)
"vabs.s32 q2, q2 \n"
"vabs.s32 q3, q3 \n"
// expand w before using.
"vmovl.u16 q13, d20 \n"
"vmovl.u16 q15, d21 \n"
// w[0] * abs(b0)
"vmul.u32 q1, q1, q13 \n"
"vmul.u32 q9, q9, q13 \n"
// w[4] * abs(b1)
"vmla.u32 q1, q12, q15 \n"
"vmla.u32 q9, q14, q15 \n"
// expand w before using.
"vmovl.u16 q13, d22 \n"
"vmovl.u16 q15, d23 \n"
// w[8] * abs(b1)
"vmla.u32 q1, q0, q13 \n"
"vmla.u32 q9, q8, q13 \n"
// w[12] * abs(b1)
"vmla.u32 q1, q2, q15 \n"
"vmla.u32 q9, q3, q15 \n"
// Sum the arrays
"vpaddl.u32 q1, q1 \n"
"vpaddl.u32 q9, q9 \n"
"vadd.u64 d2, d3 \n"
"vadd.u64 d18, d19 \n"
// Hadamard transform needs 4 bits of extra precision (2 bits in each
// direction) for dynamic raw. Weights w[] are 16bits at max, so the maximum
// precision for coeff is 8bit of input + 4bits of Hadamard transform +
// 16bits for w[] + 2 bits of abs() summation.
//
// This uses a maximum of 31 bits (signed). Discarding the top 32 bits is
// A-OK.
// sum2 - sum1
"vsub.u32 d0, d2, d18 \n"
// abs(sum2 - sum1)
"vabs.s32 d0, d0 \n"
// abs(sum2 - sum1) >> 5
"vshr.u32 d0, #5 \n"
// It would be better to move the value straight into r0 but I'm not
// entirely sure how this works with inline assembly.
"vmov.32 %[sum], d0[0] \n"
: [sum] "=r"(sum), [a] "+r"(A), [b] "+r"(B), [w] "+r"(W)
: [kBPS] "r"(kBPS)
: "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
"q10", "q11", "q12", "q13", "q14", "q15" // clobbered
) ;
return sum;
}
static int Disto16x16(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
int D = 0;
int x, y;
for (y = 0; y < 16 * BPS; y += 4 * BPS) {
for (x = 0; x < 16; x += 4) {
D += Disto4x4(a + x + y, b + x + y, w);
}
}
return D;
}
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
// Entry point
extern void VP8EncDspInitNEON(void);
void VP8EncDspInitNEON(void) {
#if defined(WEBP_USE_NEON)
VP8ITransform = ITransform;
VP8FTransform = FTransform;
VP8ITransformWHT = ITransformWHT;
VP8FTransformWHT = FTransformWHT;
VP8TDisto4x4 = Disto4x4;
VP8TDisto16x16 = Disto16x16;
#endif // WEBP_USE_NEON
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 version of speed-critical encoding functions.
@ -21,17 +23,48 @@ extern "C" {
#include "../enc/vp8enci.h"
//------------------------------------------------------------------------------
// Quite useful macro for debugging. Left here for convenience.
#if 0
#include <stdio.h>
static void PrintReg(const __m128i r, const char* const name, int size) {
int n;
union {
__m128i r;
uint8_t i8[16];
uint16_t i16[8];
uint32_t i32[4];
uint64_t i64[2];
} tmp;
tmp.r = r;
printf("%s\t: ", name);
if (size == 8) {
for (n = 0; n < 16; ++n) printf("%.2x ", tmp.i8[n]);
} else if (size == 16) {
for (n = 0; n < 8; ++n) printf("%.4x ", tmp.i16[n]);
} else if (size == 32) {
for (n = 0; n < 4; ++n) printf("%.8x ", tmp.i32[n]);
} else {
for (n = 0; n < 2; ++n) printf("%.16lx ", tmp.i64[n]);
}
printf("\n");
}
#endif
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block) {
int histo[MAX_COEFF_THRESH + 1] = { 0 };
int16_t out[16];
int j, k;
static void CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
int start_block, int end_block,
VP8Histogram* const histo) {
const __m128i max_coeff_thresh = _mm_set1_epi16(MAX_COEFF_THRESH);
int j;
for (j = start_block; j < end_block; ++j) {
int16_t out[16];
int k;
VP8FTransform(ref + VP8DspScan[j], pred + VP8DspScan[j], out);
// Convert coefficients to bin (within out[]).
@ -47,9 +80,9 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
const __m128i xor1 = _mm_xor_si128(out1, sign1);
const __m128i abs0 = _mm_sub_epi16(xor0, sign0);
const __m128i abs1 = _mm_sub_epi16(xor1, sign1);
// v = abs(out) >> 2
const __m128i v0 = _mm_srai_epi16(abs0, 2);
const __m128i v1 = _mm_srai_epi16(abs1, 2);
// v = abs(out) >> 3
const __m128i v0 = _mm_srai_epi16(abs0, 3);
const __m128i v1 = _mm_srai_epi16(abs1, 3);
// bin = min(v, MAX_COEFF_THRESH)
const __m128i bin0 = _mm_min_epi16(v0, max_coeff_thresh);
const __m128i bin1 = _mm_min_epi16(v1, max_coeff_thresh);
@ -58,13 +91,11 @@ static int CollectHistogramSSE2(const uint8_t* ref, const uint8_t* pred,
_mm_storeu_si128((__m128i*)&out[8], bin1);
}
// Use bin to update histogram.
// Convert coefficients to bin.
for (k = 0; k < 16; ++k) {
histo[out[k]]++;
histo->distribution[out[k]]++;
}
}
return VP8GetAlpha(histo);
}
//------------------------------------------------------------------------------
@ -243,7 +274,7 @@ static void ITransformSSE2(const uint8_t* ref, const int16_t* in, uint8_t* dst,
// Add inverse transform to 'ref' and store.
{
const __m128i zero = _mm_set1_epi16(0);
const __m128i zero = _mm_setzero_si128();
// Load the reference(s).
__m128i ref0, ref1, ref2, ref3;
if (do_two) {
@ -295,17 +326,23 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
int16_t* out) {
const __m128i zero = _mm_setzero_si128();
const __m128i seven = _mm_set1_epi16(7);
const __m128i k7500 = _mm_set1_epi32(7500);
const __m128i k14500 = _mm_set1_epi32(14500);
const __m128i k937 = _mm_set1_epi32(937);
const __m128i k1812 = _mm_set1_epi32(1812);
const __m128i k51000 = _mm_set1_epi32(51000);
const __m128i k12000_plus_one = _mm_set1_epi32(12000 + (1 << 16));
const __m128i k5352_2217 = _mm_set_epi16(5352, 2217, 5352, 2217,
5352, 2217, 5352, 2217);
const __m128i k2217_5352 = _mm_set_epi16(2217, -5352, 2217, -5352,
2217, -5352, 2217, -5352);
const __m128i k88p = _mm_set_epi16(8, 8, 8, 8, 8, 8, 8, 8);
const __m128i k88m = _mm_set_epi16(-8, 8, -8, 8, -8, 8, -8, 8);
const __m128i k5352_2217p = _mm_set_epi16(2217, 5352, 2217, 5352,
2217, 5352, 2217, 5352);
const __m128i k5352_2217m = _mm_set_epi16(-5352, 2217, -5352, 2217,
-5352, 2217, -5352, 2217);
__m128i v01, v32;
// Difference between src and ref and initial transpose.
{
// Load src and convert to 16b.
@ -326,73 +363,52 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
const __m128i ref_1 = _mm_unpacklo_epi8(ref1, zero);
const __m128i ref_2 = _mm_unpacklo_epi8(ref2, zero);
const __m128i ref_3 = _mm_unpacklo_epi8(ref3, zero);
// Compute difference.
// Compute difference. -> 00 01 02 03 00 00 00 00
const __m128i diff0 = _mm_sub_epi16(src_0, ref_0);
const __m128i diff1 = _mm_sub_epi16(src_1, ref_1);
const __m128i diff2 = _mm_sub_epi16(src_2, ref_2);
const __m128i diff3 = _mm_sub_epi16(src_3, ref_3);
// Transpose.
// Unpack and shuffle
// 00 01 02 03 0 0 0 0
// 10 11 12 13 0 0 0 0
// 20 21 22 23 0 0 0 0
// 30 31 32 33 0 0 0 0
const __m128i transpose0_0 = _mm_unpacklo_epi16(diff0, diff1);
const __m128i transpose0_1 = _mm_unpacklo_epi16(diff2, diff3);
// 00 10 01 11 02 12 03 13
// 20 30 21 31 22 32 23 33
const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
// a02 a12 a22 a32 a03 a13 a23 a33
// a00 a10 a20 a30 a01 a11 a21 a31
// a03 a13 a23 a33 a02 a12 a22 a32
}
const __m128i shuf01 = _mm_unpacklo_epi32(diff0, diff1);
const __m128i shuf23 = _mm_unpacklo_epi32(diff2, diff3);
// 00 01 10 11 02 03 12 13
// 20 21 30 31 22 23 32 33
const __m128i shuf01_p =
_mm_shufflehi_epi16(shuf01, _MM_SHUFFLE(2, 3, 0, 1));
const __m128i shuf23_p =
_mm_shufflehi_epi16(shuf23, _MM_SHUFFLE(2, 3, 0, 1));
// 00 01 10 11 03 02 13 12
// 20 21 30 31 23 22 33 32
const __m128i s01 = _mm_unpacklo_epi64(shuf01_p, shuf23_p);
const __m128i s32 = _mm_unpackhi_epi64(shuf01_p, shuf23_p);
// 00 01 10 11 20 21 30 31
// 03 02 13 12 23 22 33 32
const __m128i a01 = _mm_add_epi16(s01, s32);
const __m128i a32 = _mm_sub_epi16(s01, s32);
// [d0 + d3 | d1 + d2 | ...] = [a0 a1 | a0' a1' | ... ]
// [d0 - d3 | d1 - d2 | ...] = [a3 a2 | a3' a2' | ... ]
// First pass and subsequent transpose.
{
// Same operations are done on the (0,3) and (1,2) pairs.
// b0 = (a0 + a3) << 3
// b1 = (a1 + a2) << 3
// b3 = (a0 - a3) << 3
// b2 = (a1 - a2) << 3
const __m128i a01 = _mm_add_epi16(v01, v32);
const __m128i a32 = _mm_sub_epi16(v01, v32);
const __m128i b01 = _mm_slli_epi16(a01, 3);
const __m128i b32 = _mm_slli_epi16(a32, 3);
const __m128i b11 = _mm_unpackhi_epi64(b01, b01);
const __m128i b22 = _mm_unpackhi_epi64(b32, b32);
// e0 = b0 + b1
// e2 = b0 - b1
const __m128i e0 = _mm_add_epi16(b01, b11);
const __m128i e2 = _mm_sub_epi16(b01, b11);
const __m128i e02 = _mm_unpacklo_epi64(e0, e2);
// e1 = (b3 * 5352 + b2 * 2217 + 14500) >> 12
// e3 = (b3 * 2217 - b2 * 5352 + 7500) >> 12
const __m128i b23 = _mm_unpacklo_epi16(b22, b32);
const __m128i c1 = _mm_madd_epi16(b23, k5352_2217);
const __m128i c3 = _mm_madd_epi16(b23, k2217_5352);
const __m128i d1 = _mm_add_epi32(c1, k14500);
const __m128i d3 = _mm_add_epi32(c3, k7500);
const __m128i e1 = _mm_srai_epi32(d1, 12);
const __m128i e3 = _mm_srai_epi32(d3, 12);
const __m128i e13 = _mm_packs_epi32(e1, e3);
// Transpose.
// 00 01 02 03 20 21 22 23
// 10 11 12 13 30 31 32 33
const __m128i transpose0_0 = _mm_unpacklo_epi16(e02, e13);
const __m128i transpose0_1 = _mm_unpackhi_epi16(e02, e13);
// 00 10 01 11 02 12 03 13
// 20 30 21 31 22 32 23 33
const __m128i v23 = _mm_unpackhi_epi32(transpose0_0, transpose0_1);
v01 = _mm_unpacklo_epi32(transpose0_0, transpose0_1);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2));
// 02 12 22 32 03 13 23 33
// 00 10 20 30 01 11 21 31
// 03 13 23 33 02 12 22 32
const __m128i tmp0 = _mm_madd_epi16(a01, k88p); // [ (a0 + a1) << 3, ... ]
const __m128i tmp2 = _mm_madd_epi16(a01, k88m); // [ (a0 - a1) << 3, ... ]
const __m128i tmp1_1 = _mm_madd_epi16(a32, k5352_2217p);
const __m128i tmp3_1 = _mm_madd_epi16(a32, k5352_2217m);
const __m128i tmp1_2 = _mm_add_epi32(tmp1_1, k1812);
const __m128i tmp3_2 = _mm_add_epi32(tmp3_1, k937);
const __m128i tmp1 = _mm_srai_epi32(tmp1_2, 9);
const __m128i tmp3 = _mm_srai_epi32(tmp3_2, 9);
const __m128i s03 = _mm_packs_epi32(tmp0, tmp2);
const __m128i s12 = _mm_packs_epi32(tmp1, tmp3);
const __m128i s_lo = _mm_unpacklo_epi16(s03, s12); // 0 1 0 1 0 1...
const __m128i s_hi = _mm_unpackhi_epi16(s03, s12); // 2 3 2 3 2 3
const __m128i v23 = _mm_unpackhi_epi32(s_lo, s_hi);
v01 = _mm_unpacklo_epi32(s_lo, s_hi);
v32 = _mm_shuffle_epi32(v23, _MM_SHUFFLE(1, 0, 3, 2)); // 3 2 3 2 3 2..
}
// Second pass
@ -406,13 +422,12 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
const __m128i a32 = _mm_sub_epi16(v01, v32);
const __m128i a11 = _mm_unpackhi_epi64(a01, a01);
const __m128i a22 = _mm_unpackhi_epi64(a32, a32);
const __m128i a01_plus_7 = _mm_add_epi16(a01, seven);
// d0 = (a0 + a1 + 7) >> 4;
// d2 = (a0 - a1 + 7) >> 4;
const __m128i b0 = _mm_add_epi16(a01, a11);
const __m128i b2 = _mm_sub_epi16(a01, a11);
const __m128i c0 = _mm_add_epi16(b0, seven);
const __m128i c2 = _mm_add_epi16(b2, seven);
const __m128i c0 = _mm_add_epi16(a01_plus_7, a11);
const __m128i c2 = _mm_sub_epi16(a01_plus_7, a11);
const __m128i d0 = _mm_srai_epi16(c0, 4);
const __m128i d2 = _mm_srai_epi16(c2, 4);
@ -430,6 +445,7 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
// f1 = f1 + (a3 != 0);
// The compare will return (0xffff, 0) for (==0, !=0). To turn that into the
// desired (0, 1), we add one earlier through k12000_plus_one.
// -> f1 = f1 + 1 - (a3 == 0)
const __m128i g1 = _mm_add_epi16(f1, _mm_cmpeq_epi16(a32, zero));
_mm_storel_epi64((__m128i*)&out[ 0], d0);
@ -439,13 +455,137 @@ static void FTransformSSE2(const uint8_t* src, const uint8_t* ref,
}
}
static void FTransformWHTSSE2(const int16_t* in, int16_t* out) {
int16_t tmp[16];
int i;
for (i = 0; i < 4; ++i, in += 64) {
const int a0 = (in[0 * 16] + in[2 * 16]);
const int a1 = (in[1 * 16] + in[3 * 16]);
const int a2 = (in[1 * 16] - in[3 * 16]);
const int a3 = (in[0 * 16] - in[2 * 16]);
tmp[0 + i * 4] = a0 + a1;
tmp[1 + i * 4] = a3 + a2;
tmp[2 + i * 4] = a3 - a2;
tmp[3 + i * 4] = a0 - a1;
}
{
const __m128i src0 = _mm_loadl_epi64((__m128i*)&tmp[0]);
const __m128i src1 = _mm_loadl_epi64((__m128i*)&tmp[4]);
const __m128i src2 = _mm_loadl_epi64((__m128i*)&tmp[8]);
const __m128i src3 = _mm_loadl_epi64((__m128i*)&tmp[12]);
const __m128i a0 = _mm_add_epi16(src0, src2);
const __m128i a1 = _mm_add_epi16(src1, src3);
const __m128i a2 = _mm_sub_epi16(src1, src3);
const __m128i a3 = _mm_sub_epi16(src0, src2);
const __m128i b0 = _mm_srai_epi16(_mm_adds_epi16(a0, a1), 1);
const __m128i b1 = _mm_srai_epi16(_mm_adds_epi16(a3, a2), 1);
const __m128i b2 = _mm_srai_epi16(_mm_subs_epi16(a3, a2), 1);
const __m128i b3 = _mm_srai_epi16(_mm_subs_epi16(a0, a1), 1);
_mm_storel_epi64((__m128i*)&out[ 0], b0);
_mm_storel_epi64((__m128i*)&out[ 4], b1);
_mm_storel_epi64((__m128i*)&out[ 8], b2);
_mm_storel_epi64((__m128i*)&out[12], b3);
}
}
//------------------------------------------------------------------------------
// Metric
static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i zero = _mm_set1_epi16(0);
static int SSE_Nx4SSE2(const uint8_t* a, const uint8_t* b,
int num_quads, int do_16) {
const __m128i zero = _mm_setzero_si128();
__m128i sum1 = zero;
__m128i sum2 = zero;
// Load values.
while (num_quads-- > 0) {
// Note: for the !do_16 case, we read 16 pixels instead of 8 but that's ok,
// thanks to buffer over-allocation to that effect.
const __m128i a0 = _mm_loadu_si128((__m128i*)&a[BPS * 0]);
const __m128i a1 = _mm_loadu_si128((__m128i*)&a[BPS * 1]);
const __m128i a2 = _mm_loadu_si128((__m128i*)&a[BPS * 2]);
const __m128i a3 = _mm_loadu_si128((__m128i*)&a[BPS * 3]);
const __m128i b0 = _mm_loadu_si128((__m128i*)&b[BPS * 0]);
const __m128i b1 = _mm_loadu_si128((__m128i*)&b[BPS * 1]);
const __m128i b2 = _mm_loadu_si128((__m128i*)&b[BPS * 2]);
const __m128i b3 = _mm_loadu_si128((__m128i*)&b[BPS * 3]);
// compute clip0(a-b) and clip0(b-a)
const __m128i a0p = _mm_subs_epu8(a0, b0);
const __m128i a0m = _mm_subs_epu8(b0, a0);
const __m128i a1p = _mm_subs_epu8(a1, b1);
const __m128i a1m = _mm_subs_epu8(b1, a1);
const __m128i a2p = _mm_subs_epu8(a2, b2);
const __m128i a2m = _mm_subs_epu8(b2, a2);
const __m128i a3p = _mm_subs_epu8(a3, b3);
const __m128i a3m = _mm_subs_epu8(b3, a3);
// compute |a-b| with 8b arithmetic as clip0(a-b) | clip0(b-a)
const __m128i diff0 = _mm_or_si128(a0p, a0m);
const __m128i diff1 = _mm_or_si128(a1p, a1m);
const __m128i diff2 = _mm_or_si128(a2p, a2m);
const __m128i diff3 = _mm_or_si128(a3p, a3m);
// unpack (only four operations, instead of eight)
const __m128i low0 = _mm_unpacklo_epi8(diff0, zero);
const __m128i low1 = _mm_unpacklo_epi8(diff1, zero);
const __m128i low2 = _mm_unpacklo_epi8(diff2, zero);
const __m128i low3 = _mm_unpacklo_epi8(diff3, zero);
// multiply with self
const __m128i low_madd0 = _mm_madd_epi16(low0, low0);
const __m128i low_madd1 = _mm_madd_epi16(low1, low1);
const __m128i low_madd2 = _mm_madd_epi16(low2, low2);
const __m128i low_madd3 = _mm_madd_epi16(low3, low3);
// collect in a cascading way
const __m128i low_sum0 = _mm_add_epi32(low_madd0, low_madd1);
const __m128i low_sum1 = _mm_add_epi32(low_madd2, low_madd3);
sum1 = _mm_add_epi32(sum1, low_sum0);
sum2 = _mm_add_epi32(sum2, low_sum1);
if (do_16) { // if necessary, process the higher 8 bytes similarly
const __m128i hi0 = _mm_unpackhi_epi8(diff0, zero);
const __m128i hi1 = _mm_unpackhi_epi8(diff1, zero);
const __m128i hi2 = _mm_unpackhi_epi8(diff2, zero);
const __m128i hi3 = _mm_unpackhi_epi8(diff3, zero);
const __m128i hi_madd0 = _mm_madd_epi16(hi0, hi0);
const __m128i hi_madd1 = _mm_madd_epi16(hi1, hi1);
const __m128i hi_madd2 = _mm_madd_epi16(hi2, hi2);
const __m128i hi_madd3 = _mm_madd_epi16(hi3, hi3);
const __m128i hi_sum0 = _mm_add_epi32(hi_madd0, hi_madd1);
const __m128i hi_sum1 = _mm_add_epi32(hi_madd2, hi_madd3);
sum1 = _mm_add_epi32(sum1, hi_sum0);
sum2 = _mm_add_epi32(sum2, hi_sum1);
}
a += 4 * BPS;
b += 4 * BPS;
}
{
int32_t tmp[4];
const __m128i sum = _mm_add_epi32(sum1, sum2);
_mm_storeu_si128((__m128i*)tmp, sum);
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
}
}
static int SSE16x16SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 4, 1);
}
static int SSE16x8SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 2, 1);
}
static int SSE8x8SSE2(const uint8_t* a, const uint8_t* b) {
return SSE_Nx4SSE2(a, b, 2, 0);
}
static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i zero = _mm_setzero_si128();
// Load values. Note that we read 8 pixels instead of 4,
// but the a/b buffers are over-allocated to that effect.
const __m128i a0 = _mm_loadl_epi64((__m128i*)&a[BPS * 0]);
const __m128i a1 = _mm_loadl_epi64((__m128i*)&a[BPS * 1]);
const __m128i a2 = _mm_loadl_epi64((__m128i*)&a[BPS * 2]);
@ -483,6 +623,7 @@ static int SSE4x4SSE2(const uint8_t* a, const uint8_t* b) {
const __m128i sum0 = _mm_add_epi32(madd0, madd1);
const __m128i sum1 = _mm_add_epi32(madd2, madd3);
const __m128i sum2 = _mm_add_epi32(sum0, sum1);
int32_t tmp[4];
_mm_storeu_si128((__m128i*)tmp, sum2);
return (tmp[3] + tmp[2] + tmp[1] + tmp[0]);
@ -502,8 +643,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
int32_t sum[4];
__m128i tmp_0, tmp_1, tmp_2, tmp_3;
const __m128i zero = _mm_setzero_si128();
const __m128i one = _mm_set1_epi16(1);
const __m128i three = _mm_set1_epi16(3);
// Load, combine and tranpose inputs.
{
@ -550,17 +689,14 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
// Horizontal pass and subsequent transpose.
{
// Calculate a and b (two 4x4 at once).
const __m128i a0 = _mm_slli_epi16(_mm_add_epi16(tmp_0, tmp_2), 2);
const __m128i a1 = _mm_slli_epi16(_mm_add_epi16(tmp_1, tmp_3), 2);
const __m128i a2 = _mm_slli_epi16(_mm_sub_epi16(tmp_1, tmp_3), 2);
const __m128i a3 = _mm_slli_epi16(_mm_sub_epi16(tmp_0, tmp_2), 2);
// b0_extra = (a0 != 0);
const __m128i b0_extra = _mm_andnot_si128(_mm_cmpeq_epi16 (a0, zero), one);
const __m128i b0_base = _mm_add_epi16(a0, a1);
const __m128i a0 = _mm_add_epi16(tmp_0, tmp_2);
const __m128i a1 = _mm_add_epi16(tmp_1, tmp_3);
const __m128i a2 = _mm_sub_epi16(tmp_1, tmp_3);
const __m128i a3 = _mm_sub_epi16(tmp_0, tmp_2);
const __m128i b0 = _mm_add_epi16(a0, a1);
const __m128i b1 = _mm_add_epi16(a3, a2);
const __m128i b2 = _mm_sub_epi16(a3, a2);
const __m128i b3 = _mm_sub_epi16(a0, a1);
const __m128i b0 = _mm_add_epi16(b0_base, b0_extra);
// a00 a01 a02 a03 b00 b01 b02 b03
// a10 a11 a12 a13 b10 b11 b12 b13
// a20 a21 a22 a23 b20 b21 b22 b23
@ -635,19 +771,6 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
B_b2 = _mm_sub_epi16(B_b2, sign_B_b2);
}
// b = abs(b) + 3
A_b0 = _mm_add_epi16(A_b0, three);
A_b2 = _mm_add_epi16(A_b2, three);
B_b0 = _mm_add_epi16(B_b0, three);
B_b2 = _mm_add_epi16(B_b2, three);
// abs((b + (b<0) + 3) >> 3) = (abs(b) + 3) >> 3
// b = (abs(b) + 3) >> 3
A_b0 = _mm_srai_epi16(A_b0, 3);
A_b2 = _mm_srai_epi16(A_b2, 3);
B_b0 = _mm_srai_epi16(B_b0, 3);
B_b2 = _mm_srai_epi16(B_b2, 3);
// weighted sums
A_b0 = _mm_madd_epi16(A_b0, w_0);
A_b2 = _mm_madd_epi16(A_b2, w_8);
@ -666,7 +789,7 @@ static int TTransformSSE2(const uint8_t* inA, const uint8_t* inB,
static int Disto4x4SSE2(const uint8_t* const a, const uint8_t* const b,
const uint16_t* const w) {
const int diff_sum = TTransformSSE2(a, b, w);
return (abs(diff_sum) + 8) >> 4;
return abs(diff_sum) >> 5;
}
static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
@ -681,7 +804,6 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
return D;
}
//------------------------------------------------------------------------------
// Quantization
//
@ -689,9 +811,8 @@ static int Disto16x16SSE2(const uint8_t* const a, const uint8_t* const b,
// Simple quantization
static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
int n, const VP8Matrix* const mtx) {
const __m128i max_coeff_2047 = _mm_set1_epi16(2047);
const __m128i zero = _mm_set1_epi16(0);
__m128i sign0, sign8;
const __m128i max_coeff_2047 = _mm_set1_epi16(MAX_LEVEL);
const __m128i zero = _mm_setzero_si128();
__m128i coeff0, coeff8;
__m128i out0, out8;
__m128i packed_out;
@ -713,8 +834,8 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
const __m128i zthresh8 = _mm_loadu_si128((__m128i*)&mtx->zthresh_[8]);
// sign(in) = in >> 15 (0x0000 if positive, 0xffff if negative)
sign0 = _mm_srai_epi16(in0, 15);
sign8 = _mm_srai_epi16(in8, 15);
const __m128i sign0 = _mm_srai_epi16(in0, 15);
const __m128i sign8 = _mm_srai_epi16(in8, 15);
// coeff = abs(in) = (in ^ sign) - sign
coeff0 = _mm_xor_si128(in0, sign0);
@ -726,10 +847,6 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
coeff0 = _mm_add_epi16(coeff0, sharpen0);
coeff8 = _mm_add_epi16(coeff8, sharpen8);
// if (coeff > 2047) coeff = 2047
coeff0 = _mm_min_epi16(coeff0, max_coeff_2047);
coeff8 = _mm_min_epi16(coeff8, max_coeff_2047);
// out = (coeff * iQ + B) >> QFIX;
{
// doing calculations with 32b precision (QFIX=17)
@ -757,9 +874,14 @@ static int QuantizeBlockSSE2(int16_t in[16], int16_t out[16],
out_04 = _mm_srai_epi32(out_04, QFIX);
out_08 = _mm_srai_epi32(out_08, QFIX);
out_12 = _mm_srai_epi32(out_12, QFIX);
// pack result as 16b
out0 = _mm_packs_epi32(out_00, out_04);
out8 = _mm_packs_epi32(out_08, out_12);
// if (coeff > 2047) coeff = 2047
out0 = _mm_min_epi16(out0, max_coeff_2047);
out8 = _mm_min_epi16(out8, max_coeff_2047);
}
// get sign back (if (sign[j]) out_n = -out_n)
@ -832,6 +954,10 @@ void VP8EncDspInitSSE2(void) {
VP8EncQuantizeBlock = QuantizeBlockSSE2;
VP8ITransform = ITransformSSE2;
VP8FTransform = FTransformSSE2;
VP8FTransformWHT = FTransformWHTSSE2;
VP8SSE16x16 = SSE16x16SSE2;
VP8SSE16x8 = SSE16x8SSE2;
VP8SSE8x8 = SSE8x8SSE2;
VP8SSE4x4 = SSE4x4SSE2;
VP8TDisto4x4 = Disto4x4SSE2;
VP8TDisto16x16 = Disto16x16SSE2;

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Image transforms and color space conversion methods for lossless decoder.
@ -11,25 +13,31 @@
// Jyrki Alakuijala (jyrki@google.com)
// Urvang Joshi (urvang@google.com)
#include "./dsp.h"
// Define the following if target arch is sure to have SSE2
// #define WEBP_TARGET_HAS_SSE2
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_TARGET_HAS_SSE2)
#include <emmintrin.h>
#endif
#include <math.h>
#include <stdlib.h>
#include "./lossless.h"
#include "../dec/vp8li.h"
#include "../dsp/yuv.h"
#include "../dsp/dsp.h"
#include "../enc/histogram.h"
#include "./yuv.h"
#define MAX_DIFF_COST (1e30f)
// lookup table for small values of log2(int)
#define APPROX_LOG_MAX 4096
#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
#define LOG_LOOKUP_IDX_MAX 256
static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f,
2.0000000000000000f, 2.3219280948873621f,
@ -160,16 +168,97 @@ static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
7.9886846867721654f, 7.9943534368588577f
};
float VP8LFastLog2(int v) {
if (v < LOG_LOOKUP_IDX_MAX) {
return kLog2Table[v];
} else if (v < APPROX_LOG_MAX) {
const float kSLog2Table[LOG_LOOKUP_IDX_MAX] = {
0.00000000f, 0.00000000f, 2.00000000f, 4.75488750f,
8.00000000f, 11.60964047f, 15.50977500f, 19.65148445f,
24.00000000f, 28.52932501f, 33.21928095f, 38.05374781f,
43.01955001f, 48.10571634f, 53.30296891f, 58.60335893f,
64.00000000f, 69.48686830f, 75.05865003f, 80.71062276f,
86.43856190f, 92.23866588f, 98.10749561f, 104.04192499f,
110.03910002f, 116.09640474f, 122.21143267f, 128.38196256f,
134.60593782f, 140.88144886f, 147.20671787f, 153.58008562f,
160.00000000f, 166.46500594f, 172.97373660f, 179.52490559f,
186.11730005f, 192.74977453f, 199.42124551f, 206.13068654f,
212.87712380f, 219.65963219f, 226.47733176f, 233.32938445f,
240.21499122f, 247.13338933f, 254.08384998f, 261.06567603f,
268.07820003f, 275.12078236f, 282.19280949f, 289.29369244f,
296.42286534f, 303.57978409f, 310.76392512f, 317.97478424f,
325.21187564f, 332.47473081f, 339.76289772f, 347.07593991f,
354.41343574f, 361.77497759f, 369.16017124f, 376.56863518f,
384.00000000f, 391.45390785f, 398.93001188f, 406.42797576f,
413.94747321f, 421.48818752f, 429.04981119f, 436.63204548f,
444.23460010f, 451.85719280f, 459.49954906f, 467.16140179f,
474.84249102f, 482.54256363f, 490.26137307f, 497.99867911f,
505.75424759f, 513.52785023f, 521.31926438f, 529.12827280f,
536.95466351f, 544.79822957f, 552.65876890f, 560.53608414f,
568.42998244f, 576.34027536f, 584.26677867f, 592.20931226f,
600.16769996f, 608.14176943f, 616.13135206f, 624.13628279f,
632.15640007f, 640.19154569f, 648.24156472f, 656.30630539f,
664.38561898f, 672.47935976f, 680.58738488f, 688.70955430f,
696.84573069f, 704.99577935f, 713.15956818f, 721.33696754f,
729.52785023f, 737.73209140f, 745.94956849f, 754.18016116f,
762.42375127f, 770.68022275f, 778.94946161f, 787.23135586f,
795.52579543f, 803.83267219f, 812.15187982f, 820.48331383f,
828.82687147f, 837.18245171f, 845.54995518f, 853.92928416f,
862.32034249f, 870.72303558f, 879.13727036f, 887.56295522f,
896.00000000f, 904.44831595f, 912.90781569f, 921.37841320f,
929.86002376f, 938.35256392f, 946.85595152f, 955.37010560f,
963.89494641f, 972.43039537f, 980.97637504f, 989.53280911f,
998.09962237f, 1006.67674069f, 1015.26409097f, 1023.86160116f,
1032.46920021f, 1041.08681805f, 1049.71438560f, 1058.35183469f,
1066.99909811f, 1075.65610955f, 1084.32280357f, 1092.99911564f,
1101.68498204f, 1110.38033993f, 1119.08512727f, 1127.79928282f,
1136.52274614f, 1145.25545758f, 1153.99735821f, 1162.74838989f,
1171.50849518f, 1180.27761738f, 1189.05570047f, 1197.84268914f,
1206.63852876f, 1215.44316535f, 1224.25654560f, 1233.07861684f,
1241.90932703f, 1250.74862473f, 1259.59645914f, 1268.45278005f,
1277.31753781f, 1286.19068338f, 1295.07216828f, 1303.96194457f,
1312.85996488f, 1321.76618236f, 1330.68055071f, 1339.60302413f,
1348.53355734f, 1357.47210556f, 1366.41862452f, 1375.37307041f,
1384.33539991f, 1393.30557020f, 1402.28353887f, 1411.26926400f,
1420.26270412f, 1429.26381818f, 1438.27256558f, 1447.28890615f,
1456.31280014f, 1465.34420819f, 1474.38309138f, 1483.42941118f,
1492.48312945f, 1501.54420843f, 1510.61261078f, 1519.68829949f,
1528.77123795f, 1537.86138993f, 1546.95871952f, 1556.06319119f,
1565.17476976f, 1574.29342040f, 1583.41910860f, 1592.55180020f,
1601.69146137f, 1610.83805860f, 1619.99155871f, 1629.15192882f,
1638.31913637f, 1647.49314911f, 1656.67393509f, 1665.86146266f,
1675.05570047f, 1684.25661744f, 1693.46418280f, 1702.67836605f,
1711.89913698f, 1721.12646563f, 1730.36032233f, 1739.60067768f,
1748.84750254f, 1758.10076802f, 1767.36044551f, 1776.62650662f,
1785.89892323f, 1795.17766747f, 1804.46271172f, 1813.75402857f,
1823.05159087f, 1832.35537170f, 1841.66534438f, 1850.98148244f,
1860.30375965f, 1869.63214999f, 1878.96662767f, 1888.30716711f,
1897.65374295f, 1907.00633003f, 1916.36490342f, 1925.72943838f,
1935.09991037f, 1944.47629506f, 1953.85856831f, 1963.24670620f,
1972.64068498f, 1982.04048108f, 1991.44607117f, 2000.85743204f,
2010.27454072f, 2019.69737440f, 2029.12591044f, 2038.56012640f
};
float VP8LFastSLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) {
int log_cnt = 0;
const float v_f = (float)v;
while (v >= LOG_LOOKUP_IDX_MAX) {
++log_cnt;
v = v >> 1;
}
return v_f * (kLog2Table[v] + log_cnt);
} else {
return (float)(LOG_2_RECIPROCAL * v * log((double)v));
}
}
float VP8LFastLog2Slow(int v) {
assert(v >= LOG_LOOKUP_IDX_MAX);
if (v < APPROX_LOG_MAX) {
int log_cnt = 0;
while (v >= LOG_LOOKUP_IDX_MAX) {
++log_cnt;
v = v >> 1;
}
return kLog2Table[v] + (float)log_cnt;
return kLog2Table[v] + log_cnt;
} else {
return (float)(LOG_2_RECIPROCAL * log((double)v));
}
@ -198,6 +287,61 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
return Average2(Average2(a0, a1), Average2(a2, a3));
}
#if defined(WEBP_TARGET_HAS_SSE2)
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
uint32_t c2) {
const __m128i zero = _mm_setzero_si128();
const __m128i C0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0), zero);
const __m128i C1 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1), zero);
const __m128i C2 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i V1 = _mm_add_epi16(C0, C1);
const __m128i V2 = _mm_sub_epi16(V1, C2);
const __m128i b = _mm_packus_epi16(V2, V2);
const uint32_t output = _mm_cvtsi128_si32(b);
return output;
}
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
uint32_t c2) {
const uint32_t ave = Average2(c0, c1);
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave), zero);
const __m128i B0 = _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2), zero);
const __m128i A1 = _mm_sub_epi16(A0, B0);
const __m128i BgtA = _mm_cmpgt_epi16(B0, A0);
const __m128i A2 = _mm_sub_epi16(A1, BgtA);
const __m128i A3 = _mm_srai_epi16(A2, 1);
const __m128i A4 = _mm_add_epi16(A0, A3);
const __m128i A5 = _mm_packus_epi16(A4, A4);
const uint32_t output = _mm_cvtsi128_si32(A5);
return output;
}
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
int pa_minus_pb;
const __m128i zero = _mm_setzero_si128();
const __m128i A0 = _mm_cvtsi32_si128(a);
const __m128i B0 = _mm_cvtsi32_si128(b);
const __m128i C0 = _mm_cvtsi32_si128(c);
const __m128i AC0 = _mm_subs_epu8(A0, C0);
const __m128i CA0 = _mm_subs_epu8(C0, A0);
const __m128i BC0 = _mm_subs_epu8(B0, C0);
const __m128i CB0 = _mm_subs_epu8(C0, B0);
const __m128i AC = _mm_or_si128(AC0, CA0);
const __m128i BC = _mm_or_si128(BC0, CB0);
const __m128i pa = _mm_unpacklo_epi8(AC, zero); // |a - c|
const __m128i pb = _mm_unpacklo_epi8(BC, zero); // |b - c|
const __m128i diff = _mm_sub_epi16(pb, pa);
{
int16_t out[8];
_mm_storeu_si128((__m128i*)out, diff);
pa_minus_pb = out[0] + out[1] + out[2] + out[3];
}
return (pa_minus_pb <= 0) ? a : b;
}
#else
static WEBP_INLINE uint32_t Clip255(uint32_t a) {
if (a < 256) {
return a;
@ -239,9 +383,9 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
}
static WEBP_INLINE int Sub3(int a, int b, int c) {
const int pa = b - c;
const int pb = a - c;
return abs(pa) - abs(pb);
const int pb = b - c;
const int pa = a - c;
return abs(pb) - abs(pa);
}
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
@ -250,9 +394,9 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) +
Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff);
return (pa_minus_pb <= 0) ? a : b;
}
#endif
//------------------------------------------------------------------------------
// Predictors
@ -340,35 +484,36 @@ static float PredictionCostSpatial(const int* counts,
return (float)(-0.1 * bits);
}
// Compute the Shanon's entropy: Sum(p*log2(p))
static float ShannonEntropy(const int* const array, int n) {
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
static float CombinedShannonEntropy(const int* const X,
const int* const Y, int n) {
int i;
float retval = 0.f;
int sum = 0;
double retval = 0.;
int sumX = 0, sumXY = 0;
for (i = 0; i < n; ++i) {
if (array[i] != 0) {
sum += array[i];
retval -= VP8LFastSLog2(array[i]);
const int x = X[i];
const int xy = X[i] + Y[i];
if (x != 0) {
sumX += x;
retval -= VP8LFastSLog2(x);
}
if (xy != 0) {
sumXY += xy;
retval -= VP8LFastSLog2(xy);
}
}
retval += VP8LFastSLog2(sum);
return retval;
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
return (float)retval;
}
static float PredictionCostSpatialHistogram(int accumulated[4][256],
int tile[4][256]) {
int i;
int k;
int combo[256];
double retval = 0;
for (i = 0; i < 4; ++i) {
const double exp_val = 0.94;
retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
retval += ShannonEntropy(&tile[i][0], 256);
for (k = 0; k < 256; ++k) {
combo[k] = accumulated[i][k] + tile[i][k];
}
retval += ShannonEntropy(&combo[0], 256);
const double kExpValue = 0.94;
retval += PredictionCostSpatial(tile[i], 1, kExpValue);
retval += CombinedShannonEntropy(tile[i], accumulated[i], 256);
}
return (float)retval;
}
@ -572,8 +717,21 @@ static void PredictorInverseTransform(const VP8LTransform* const transform,
}
void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
int i;
for (i = 0; i < num_pixs; ++i) {
int i = 0;
#if defined(WEBP_TARGET_HAS_SSE2)
const __m128i mask = _mm_set1_epi32(0x0000ff00);
for (; i + 4 < num_pixs; i += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)&argb_data[i]);
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
const __m128i out = _mm_sub_epi8(in, in_0g0g);
_mm_storeu_si128((__m128i*)&argb_data[i], out);
}
// fallthrough and finish off with plain-C
#endif
for (; i < num_pixs; ++i) {
const uint32_t argb = argb_data[i];
const uint32_t green = (argb >> 8) & 0xff;
const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
@ -588,9 +746,21 @@ static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
int y_start, int y_end, uint32_t* data) {
const int width = transform->xsize_;
const uint32_t* const data_end = data + (y_end - y_start) * width;
#if defined(WEBP_TARGET_HAS_SSE2)
const __m128i mask = _mm_set1_epi32(0x0000ff00);
for (; data + 4 < data_end; data += 4) {
const __m128i in = _mm_loadu_si128((__m128i*)data);
const __m128i in_00g0 = _mm_and_si128(in, mask); // 00g0|00g0|...
const __m128i in_0g00 = _mm_slli_epi32(in_00g0, 8); // 0g00|0g00|...
const __m128i in_000g = _mm_srli_epi32(in_00g0, 8); // 000g|000g|...
const __m128i in_0g0g = _mm_or_si128(in_0g00, in_000g);
const __m128i out = _mm_add_epi8(in, in_0g0g);
_mm_storeu_si128((__m128i*)data, out);
}
// fallthrough and finish off with plain-C
#endif
while (data < data_end) {
const uint32_t argb = *data;
// "* 0001001u" is equivalent to "(green << 16) + green)"
const uint32_t green = ((argb >> 8) & 0xff);
uint32_t red_blue = (argb & 0x00ff00ffu);
red_blue += (green << 16) | green;
@ -655,6 +825,25 @@ static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
}
static WEBP_INLINE uint8_t TransformColorRed(uint8_t green_to_red,
uint32_t argb) {
const uint32_t green = argb >> 8;
uint32_t new_red = argb >> 16;
new_red -= ColorTransformDelta(green_to_red, green);
return (new_red & 0xff);
}
static WEBP_INLINE uint8_t TransformColorBlue(uint8_t green_to_blue,
uint8_t red_to_blue,
uint32_t argb) {
const uint32_t green = argb >> 8;
const uint32_t red = argb >> 16;
uint8_t new_blue = argb;
new_blue -= ColorTransformDelta(green_to_blue, green);
new_blue -= ColorTransformDelta(red_to_blue, red);
return (new_blue & 0xff);
}
static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
int ix, int xsize) {
const uint32_t v = argb[ix];
@ -675,14 +864,10 @@ static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
static float PredictionCostCrossColor(const int accumulated[256],
const int counts[256]) {
// Favor low entropy, locally and globally.
int i;
int combo[256];
for (i = 0; i < 256; ++i) {
combo[i] = accumulated[i] + counts[i];
}
return ShannonEntropy(combo, 256) +
ShannonEntropy(counts, 256) +
PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values.
// Favor small absolute values for PredictionCostSpatial
static const double kExpValue = 2.4;
return CombinedShannonEntropy(counts, accumulated, 256) +
PredictionCostSpatial(counts, 3, kExpValue);
}
static Multipliers GetBestColorTransformForTile(
@ -712,85 +897,75 @@ static Multipliers GetBestColorTransformForTile(
if (all_y_max > ysize) {
all_y_max = ysize;
}
for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
int histo[256] = { 0 };
int all_y;
Multipliers tx;
MultipliersClear(&tx);
tx.green_to_red_ = green_to_red & 0xff;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
uint32_t predict;
int ix = all_y * xsize + tile_x_offset;
int all_x;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
predict = TransformColor(&tx, argb[ix], 0);
++histo[(predict >> 16) & 0xff]; // red.
++histo[TransformColorRed(green_to_red, argb[ix])]; // red.
}
}
cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
if (tx.green_to_red_ == prevX.green_to_red_) {
if ((uint8_t)green_to_red == prevX.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_red_ == prevY.green_to_red_) {
if ((uint8_t)green_to_red == prevY.green_to_red_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_red_ == 0) {
if (green_to_red == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx = tx;
best_tx.green_to_red_ = green_to_red;
}
}
best_diff = MAX_DIFF_COST;
green_to_red = best_tx.green_to_red_;
for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
int all_y;
int histo[256] = { 0 };
Multipliers tx;
tx.green_to_red_ = green_to_red;
tx.green_to_blue_ = green_to_blue;
tx.red_to_blue_ = red_to_blue;
for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
uint32_t predict;
int all_x;
int ix = all_y * xsize + tile_x_offset;
for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
if (SkipRepeatedPixels(argb, ix, xsize)) {
continue;
}
predict = TransformColor(&tx, argb[ix], 0);
++histo[predict & 0xff]; // blue.
++histo[TransformColorBlue(green_to_blue, red_to_blue, argb[ix])];
}
}
cur_diff =
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if (tx.green_to_blue_ == prevX.green_to_blue_) {
PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
if ((uint8_t)green_to_blue == prevX.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_blue_ == prevY.green_to_blue_) {
if ((uint8_t)green_to_blue == prevY.green_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.red_to_blue_ == prevX.red_to_blue_) {
if ((uint8_t)red_to_blue == prevX.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.red_to_blue_ == prevY.red_to_blue_) {
if ((uint8_t)red_to_blue == prevY.red_to_blue_) {
cur_diff -= 3; // favor keeping the areas locally similar
}
if (tx.green_to_blue_ == 0) {
if (green_to_blue == 0) {
cur_diff -= 3;
}
if (tx.red_to_blue_ == 0) {
if (red_to_blue == 0) {
cur_diff -= 3;
}
if (cur_diff < best_diff) {
best_diff = cur_diff;
best_tx = tx;
best_tx.green_to_blue_ = green_to_blue;
best_tx.red_to_blue_ = red_to_blue;
}
}
}
@ -920,39 +1095,64 @@ static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
}
// Separate out pixels packed together using pixel-bundling.
static void ColorIndexInverseTransform(
const VP8LTransform* const transform,
int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
int y;
const int bits_per_pixel = 8 >> transform->bits_;
const int width = transform->xsize_;
const uint32_t* const color_map = transform->data_;
if (bits_per_pixel < 8) {
const int pixels_per_byte = 1 << transform->bits_;
const int count_mask = pixels_per_byte - 1;
const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
for (y = y_start; y < y_end; ++y) {
uint32_t packed_pixels = 0;
int x;
for (x = 0; x < width; ++x) {
// We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
// increments of x. Fortunately, pixels_per_byte is a power of 2, so
// can just use a mask for that, instead of decrementing a counter.
if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
*dst++ = color_map[packed_pixels & bit_mask];
packed_pixels >>= bits_per_pixel;
}
}
} else {
for (y = y_start; y < y_end; ++y) {
int x;
for (x = 0; x < width; ++x) {
*dst++ = color_map[((*src++) >> 8) & 0xff];
}
}
}
// We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
#define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
void FUNC_NAME(const VP8LTransform* const transform, \
int y_start, int y_end, const TYPE* src, TYPE* dst) { \
int y; \
const int bits_per_pixel = 8 >> transform->bits_; \
const int width = transform->xsize_; \
const uint32_t* const color_map = transform->data_; \
if (bits_per_pixel < 8) { \
const int pixels_per_byte = 1 << transform->bits_; \
const int count_mask = pixels_per_byte - 1; \
const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
for (y = y_start; y < y_end; ++y) { \
uint32_t packed_pixels = 0; \
int x; \
for (x = 0; x < width; ++x) { \
/* We need to load fresh 'packed_pixels' once every */ \
/* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
/* is a power of 2, so can just use a mask for that, instead of */ \
/* decrementing a counter. */ \
if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
*dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
packed_pixels >>= bits_per_pixel; \
} \
} \
} else { \
for (y = y_start; y < y_end; ++y) { \
int x; \
for (x = 0; x < width; ++x) { \
*dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
} \
} \
} \
}
static WEBP_INLINE uint32_t GetARGBIndex(uint32_t idx) {
return (idx >> 8) & 0xff;
}
static WEBP_INLINE uint8_t GetAlphaIndex(uint8_t idx) {
return idx;
}
static WEBP_INLINE uint32_t GetARGBValue(uint32_t val) {
return val;
}
static WEBP_INLINE uint8_t GetAlphaValue(uint32_t val) {
return (val >> 8) & 0xff;
}
static COLOR_INDEX_INVERSE(ColorIndexInverseTransform, uint32_t, GetARGBIndex,
GetARGBValue)
COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha, uint8_t, GetAlphaIndex,
GetAlphaValue)
#undef COLOR_INDEX_INVERSE
void VP8LInverseTransform(const VP8LTransform* const transform,
int row_start, int row_end,
const uint32_t* const in, uint32_t* const out) {
@ -1034,8 +1234,15 @@ static void ConvertBGRAToRGBA4444(const uint32_t* src,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
*dst++ = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
*dst++ = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf);
#ifdef WEBP_SWAP_16BIT_CSP
*dst++ = ba;
*dst++ = rg;
#else
*dst++ = rg;
*dst++ = ba;
#endif
}
}
@ -1044,8 +1251,15 @@ static void ConvertBGRAToRGB565(const uint32_t* src,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
const uint32_t argb = *src++;
*dst++ = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
*dst++ = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f);
#ifdef WEBP_SWAP_16BIT_CSP
*dst++ = gb;
*dst++ = rg;
#else
*dst++ = rg;
*dst++ = gb;
#endif
}
}
@ -1066,20 +1280,34 @@ static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
const uint32_t* const src_end = src + num_pixels;
while (src < src_end) {
uint32_t argb = *src++;
#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
#if !defined(__BIG_ENDIAN__)
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
#if defined(__i386__) || defined(__x86_64__)
__asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
*(uint32_t*)dst = argb;
dst += sizeof(argb);
#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
#elif defined(_MSC_VER)
argb = _byteswap_ulong(argb);
*(uint32_t*)dst = argb;
dst += sizeof(argb);
#else
*dst++ = (argb >> 24) & 0xff;
*dst++ = (argb >> 16) & 0xff;
*dst++ = (argb >> 8) & 0xff;
*dst++ = (argb >> 0) & 0xff;
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
#else // WEBP_REFERENCE_IMPLEMENTATION
dst[0] = (argb >> 24) & 0xff;
dst[1] = (argb >> 16) & 0xff;
dst[2] = (argb >> 8) & 0xff;
dst[3] = (argb >> 0) & 0xff;
#endif
#else // __BIG_ENDIAN__
dst[0] = (argb >> 0) & 0xff;
dst[1] = (argb >> 8) & 0xff;
dst[2] = (argb >> 16) & 0xff;
dst[3] = (argb >> 24) & 0xff;
#endif
dst += sizeof(argb);
}
} else {
memcpy(dst, src, num_pixels * sizeof(*src));
@ -1131,6 +1359,27 @@ void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
}
}
// Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst) {
int x;
if (xbits > 0) {
const int bit_depth = 1 << (3 - xbits);
const int mask = (1 << xbits) - 1;
uint32_t code = 0xff000000;
for (x = 0; x < width; ++x) {
const int xsub = x & mask;
if (xsub == 0) {
code = 0xff000000;
}
code |= row[x] << (8 + bit_depth * xsub);
dst[x >> xbits] = code;
}
} else {
for (x = 0; x < width; ++x) dst[x] = 0xff000000 | (row[x] << 8);
}
}
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Image transforms and color space conversion methods for lossless decoder.
@ -33,6 +35,13 @@ void VP8LInverseTransform(const struct VP8LTransform* const transform,
int row_start, int row_end,
const uint32_t* const in, uint32_t* const out);
// Similar to the static method ColorIndexInverseTransform() that is part of
// lossless.c, but used only for alpha decoding. It takes uint8_t (rather than
// uint32_t) arguments for 'src' and 'dst'.
void VP8LColorIndexInverseTransformAlpha(
const struct VP8LTransform* const transform, int y_start, int y_end,
const uint8_t* src, uint8_t* dst);
// Subtracts green from blue and red channels.
void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs);
@ -59,10 +68,20 @@ static WEBP_INLINE uint32_t VP8LSubSampleSize(uint32_t size,
return (size + (1 << sampling_bits) - 1) >> sampling_bits;
}
// Faster logarithm for integers, with the property of log2(0) == 0.
float VP8LFastLog2(int v);
// Faster logarithm for integers. Small values use a look-up table.
#define LOG_LOOKUP_IDX_MAX 256
extern const float kLog2Table[LOG_LOOKUP_IDX_MAX];
extern const float kSLog2Table[LOG_LOOKUP_IDX_MAX];
extern float VP8LFastLog2Slow(int v);
extern float VP8LFastSLog2Slow(int v);
static WEBP_INLINE float VP8LFastLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kLog2Table[v] : VP8LFastLog2Slow(v);
}
// Fast calculation of v * log2(v) for integer input.
static WEBP_INLINE float VP8LFastSLog2(int v) { return VP8LFastLog2(v) * v; }
static WEBP_INLINE float VP8LFastSLog2(int v) {
return (v < LOG_LOOKUP_IDX_MAX) ? kSLog2Table[v] : VP8LFastSLog2Slow(v);
}
// In-place difference of each component with mod 256.
static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
@ -73,6 +92,9 @@ static WEBP_INLINE uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
return (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
}
void VP8LBundleColorMap(const uint8_t* const row, int width,
int xbits, uint32_t* const dst);
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// YUV to RGB upsampling functions.
@ -32,7 +34,7 @@ WebPUpsampleLinePairFunc WebPUpsamplers[MODE_LAST];
// ([3*a + b + 9*c + 3*d a + 3*b + 3*c + 9*d] [8 8]) / 16
// We process u and v together stashed into 32bit (16bit each).
#define LOAD_UV(u,v) ((u) | ((v) << 16))
#define LOAD_UV(u, v) ((u) | ((v) << 16))
#define UPSAMPLE_FUNC(FUNC_NAME, FUNC, XSTEP) \
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
@ -327,6 +329,11 @@ void WebPInitUpsamplers(void) {
if (VP8GetCPUInfo(kSSE2)) {
WebPInitUpsamplersSSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitUpsamplersNEON();
}
#endif
}
#endif // FANCY_UPSAMPLING
@ -347,6 +354,11 @@ void WebPInitPremultiply(void) {
if (VP8GetCPUInfo(kSSE2)) {
WebPInitPremultiplySSE2();
}
#endif
#if defined(WEBP_USE_NEON)
if (VP8GetCPUInfo(kNEON)) {
WebPInitPremultiplyNEON();
}
#endif
}
#endif // FANCY_UPSAMPLING

294
3rdparty/libwebp/dsp/upsampling_neon.c vendored Normal file
View File

@ -0,0 +1,294 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// NEON version of YUV to RGB upsampling functions.
//
// Author: mans@mansr.com (Mans Rullgard)
// Based on SSE code by: somnath@google.com (Somnath Banerjee)
#include "./dsp.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if defined(WEBP_USE_NEON)
#include <assert.h>
#include <arm_neon.h>
#include <string.h>
#include "./yuv.h"
#ifdef FANCY_UPSAMPLING
// Loads 9 pixels each from rows r1 and r2 and generates 16 pixels.
#define UPSAMPLE_16PIXELS(r1, r2, out) { \
uint8x8_t a = vld1_u8(r1); \
uint8x8_t b = vld1_u8(r1 + 1); \
uint8x8_t c = vld1_u8(r2); \
uint8x8_t d = vld1_u8(r2 + 1); \
\
uint16x8_t al = vshll_n_u8(a, 1); \
uint16x8_t bl = vshll_n_u8(b, 1); \
uint16x8_t cl = vshll_n_u8(c, 1); \
uint16x8_t dl = vshll_n_u8(d, 1); \
\
uint8x8_t diag1, diag2; \
uint16x8_t sl; \
\
/* a + b + c + d */ \
sl = vaddl_u8(a, b); \
sl = vaddw_u8(sl, c); \
sl = vaddw_u8(sl, d); \
\
al = vaddq_u16(sl, al); /* 3a + b + c + d */ \
bl = vaddq_u16(sl, bl); /* a + 3b + c + d */ \
\
al = vaddq_u16(al, dl); /* 3a + b + c + 3d */ \
bl = vaddq_u16(bl, cl); /* a + 3b + 3c + d */ \
\
diag2 = vshrn_n_u16(al, 3); \
diag1 = vshrn_n_u16(bl, 3); \
\
a = vrhadd_u8(a, diag1); \
b = vrhadd_u8(b, diag2); \
c = vrhadd_u8(c, diag2); \
d = vrhadd_u8(d, diag1); \
\
{ \
const uint8x8x2_t a_b = {{ a, b }}; \
const uint8x8x2_t c_d = {{ c, d }}; \
vst2_u8(out, a_b); \
vst2_u8(out + 32, c_d); \
} \
}
// Turn the macro into a function for reducing code-size when non-critical
static void Upsample16Pixels(const uint8_t *r1, const uint8_t *r2,
uint8_t *out) {
UPSAMPLE_16PIXELS(r1, r2, out);
}
#define UPSAMPLE_LAST_BLOCK(tb, bb, num_pixels, out) { \
uint8_t r1[9], r2[9]; \
memcpy(r1, (tb), (num_pixels)); \
memcpy(r2, (bb), (num_pixels)); \
/* replicate last byte */ \
memset(r1 + (num_pixels), r1[(num_pixels) - 1], 9 - (num_pixels)); \
memset(r2 + (num_pixels), r2[(num_pixels) - 1], 9 - (num_pixels)); \
Upsample16Pixels(r1, r2, out); \
}
#define CY 76283
#define CVR 89858
#define CUG 22014
#define CVG 45773
#define CUB 113618
static const int16_t coef[4] = { CVR / 4, CUG, CVG / 2, CUB / 4 };
#define CONVERT8(FMT, XSTEP, N, src_y, src_uv, out, cur_x) { \
int i; \
for (i = 0; i < N; i += 8) { \
int off = ((cur_x) + i) * XSTEP; \
uint8x8_t y = vld1_u8(src_y + (cur_x) + i); \
uint8x8_t u = vld1_u8((src_uv) + i); \
uint8x8_t v = vld1_u8((src_uv) + i + 16); \
int16x8_t yy = vreinterpretq_s16_u16(vsubl_u8(y, u16)); \
int16x8_t uu = vreinterpretq_s16_u16(vsubl_u8(u, u128)); \
int16x8_t vv = vreinterpretq_s16_u16(vsubl_u8(v, u128)); \
\
int16x8_t ud = vshlq_n_s16(uu, 1); \
int16x8_t vd = vshlq_n_s16(vv, 1); \
\
int32x4_t vrl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(vv), 1), \
vget_low_s16(vd), cf16, 0); \
int32x4_t vrh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(vv), 1), \
vget_high_s16(vd), cf16, 0); \
int16x8_t vr = vcombine_s16(vrshrn_n_s32(vrl, 16), \
vrshrn_n_s32(vrh, 16)); \
\
int32x4_t vl = vmovl_s16(vget_low_s16(vv)); \
int32x4_t vh = vmovl_s16(vget_high_s16(vv)); \
int32x4_t ugl = vmlal_lane_s16(vl, vget_low_s16(uu), cf16, 1); \
int32x4_t ugh = vmlal_lane_s16(vh, vget_high_s16(uu), cf16, 1); \
int32x4_t gcl = vqdmlal_lane_s16(ugl, vget_low_s16(vv), cf16, 2); \
int32x4_t gch = vqdmlal_lane_s16(ugh, vget_high_s16(vv), cf16, 2); \
int16x8_t gc = vcombine_s16(vrshrn_n_s32(gcl, 16), \
vrshrn_n_s32(gch, 16)); \
\
int32x4_t ubl = vqdmlal_lane_s16(vshll_n_s16(vget_low_s16(uu), 1), \
vget_low_s16(ud), cf16, 3); \
int32x4_t ubh = vqdmlal_lane_s16(vshll_n_s16(vget_high_s16(uu), 1), \
vget_high_s16(ud), cf16, 3); \
int16x8_t ub = vcombine_s16(vrshrn_n_s32(ubl, 16), \
vrshrn_n_s32(ubh, 16)); \
\
int32x4_t rl = vaddl_s16(vget_low_s16(yy), vget_low_s16(vr)); \
int32x4_t rh = vaddl_s16(vget_high_s16(yy), vget_high_s16(vr)); \
int32x4_t gl = vsubl_s16(vget_low_s16(yy), vget_low_s16(gc)); \
int32x4_t gh = vsubl_s16(vget_high_s16(yy), vget_high_s16(gc)); \
int32x4_t bl = vaddl_s16(vget_low_s16(yy), vget_low_s16(ub)); \
int32x4_t bh = vaddl_s16(vget_high_s16(yy), vget_high_s16(ub)); \
\
rl = vmulq_lane_s32(rl, cf32, 0); \
rh = vmulq_lane_s32(rh, cf32, 0); \
gl = vmulq_lane_s32(gl, cf32, 0); \
gh = vmulq_lane_s32(gh, cf32, 0); \
bl = vmulq_lane_s32(bl, cf32, 0); \
bh = vmulq_lane_s32(bh, cf32, 0); \
\
y = vqmovun_s16(vcombine_s16(vrshrn_n_s32(rl, 16), \
vrshrn_n_s32(rh, 16))); \
u = vqmovun_s16(vcombine_s16(vrshrn_n_s32(gl, 16), \
vrshrn_n_s32(gh, 16))); \
v = vqmovun_s16(vcombine_s16(vrshrn_n_s32(bl, 16), \
vrshrn_n_s32(bh, 16))); \
STR_ ## FMT(out + off, y, u, v); \
} \
}
#define v255 vmov_n_u8(255)
#define STR_Rgb(out, r, g, b) do { \
const uint8x8x3_t r_g_b = {{ r, g, b }}; \
vst3_u8(out, r_g_b); \
} while (0)
#define STR_Bgr(out, r, g, b) do { \
const uint8x8x3_t b_g_r = {{ b, g, r }}; \
vst3_u8(out, b_g_r); \
} while (0)
#define STR_Rgba(out, r, g, b) do { \
const uint8x8x4_t r_g_b_v255 = {{ r, g, b, v255 }}; \
vst4_u8(out, r_g_b_v255); \
} while (0)
#define STR_Bgra(out, r, g, b) do { \
const uint8x8x4_t b_g_r_v255 = {{ b, g, r, v255 }}; \
vst4_u8(out, b_g_r_v255); \
} while (0)
#define CONVERT1(FMT, XSTEP, N, src_y, src_uv, rgb, cur_x) { \
int i; \
for (i = 0; i < N; i++) { \
int off = ((cur_x) + i) * XSTEP; \
int y = src_y[(cur_x) + i]; \
int u = (src_uv)[i]; \
int v = (src_uv)[i + 16]; \
VP8YuvTo ## FMT(y, u, v, rgb + off); \
} \
}
#define CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, uv, \
top_dst, bottom_dst, cur_x, len) { \
if (top_y) { \
CONVERT8(FMT, XSTEP, len, top_y, uv, top_dst, cur_x) \
} \
if (bottom_y) { \
CONVERT8(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x) \
} \
}
#define CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, uv, \
top_dst, bottom_dst, cur_x, len) { \
if (top_y) { \
CONVERT1(FMT, XSTEP, len, top_y, uv, top_dst, cur_x); \
} \
if (bottom_y) { \
CONVERT1(FMT, XSTEP, len, bottom_y, (uv) + 32, bottom_dst, cur_x); \
} \
}
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
const uint8_t *top_u, const uint8_t *top_v, \
const uint8_t *cur_u, const uint8_t *cur_v, \
uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[2 * 32 + 15]; \
uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
const int uv_len = (len + 1) >> 1; \
/* 9 pixels must be read-able for each block */ \
const int num_blocks = (uv_len - 1) >> 3; \
const int leftover = uv_len - num_blocks * 8; \
const int last_pos = 1 + 16 * num_blocks; \
\
const int u_diag = ((top_u[0] + cur_u[0]) >> 1) + 1; \
const int v_diag = ((top_v[0] + cur_v[0]) >> 1) + 1; \
\
const int16x4_t cf16 = vld1_s16(coef); \
const int32x2_t cf32 = vmov_n_s32(CY); \
const uint8x8_t u16 = vmov_n_u8(16); \
const uint8x8_t u128 = vmov_n_u8(128); \
\
/* Treat the first pixel in regular way */ \
if (top_y) { \
const int u0 = (top_u[0] + u_diag) >> 1; \
const int v0 = (top_v[0] + v_diag) >> 1; \
VP8YuvTo ## FMT(top_y[0], u0, v0, top_dst); \
} \
if (bottom_y) { \
const int u0 = (cur_u[0] + u_diag) >> 1; \
const int v0 = (cur_v[0] + v_diag) >> 1; \
VP8YuvTo ## FMT(bottom_y[0], u0, v0, bottom_dst); \
} \
\
for (block = 0; block < num_blocks; ++block) { \
UPSAMPLE_16PIXELS(top_u, cur_u, r_uv); \
UPSAMPLE_16PIXELS(top_v, cur_v, r_uv + 16); \
CONVERT2RGB_8(FMT, XSTEP, top_y, bottom_y, r_uv, \
top_dst, bottom_dst, 16 * block + 1, 16); \
top_u += 8; \
cur_u += 8; \
top_v += 8; \
cur_v += 8; \
} \
\
UPSAMPLE_LAST_BLOCK(top_u, cur_u, leftover, r_uv); \
UPSAMPLE_LAST_BLOCK(top_v, cur_v, leftover, r_uv + 16); \
CONVERT2RGB_1(FMT, XSTEP, top_y, bottom_y, r_uv, \
top_dst, bottom_dst, last_pos, len - last_pos); \
}
// NEON variants of the fancy upsampler.
NEON_UPSAMPLE_FUNC(UpsampleRgbLinePairNEON, Rgb, 3)
NEON_UPSAMPLE_FUNC(UpsampleBgrLinePairNEON, Bgr, 3)
NEON_UPSAMPLE_FUNC(UpsampleRgbaLinePairNEON, Rgba, 4)
NEON_UPSAMPLE_FUNC(UpsampleBgraLinePairNEON, Bgra, 4)
#endif // FANCY_UPSAMPLING
#endif // WEBP_USE_NEON
//------------------------------------------------------------------------------
extern WebPUpsampleLinePairFunc WebPUpsamplers[/* MODE_LAST */];
void WebPInitUpsamplersNEON(void) {
#if defined(WEBP_USE_NEON)
WebPUpsamplers[MODE_RGB] = UpsampleRgbLinePairNEON;
WebPUpsamplers[MODE_RGBA] = UpsampleRgbaLinePairNEON;
WebPUpsamplers[MODE_BGR] = UpsampleBgrLinePairNEON;
WebPUpsamplers[MODE_BGRA] = UpsampleBgraLinePairNEON;
#endif // WEBP_USE_NEON
}
void WebPInitPremultiplyNEON(void) {
#if defined(WEBP_USE_NEON)
WebPUpsamplers[MODE_rgbA] = UpsampleRgbaLinePairNEON;
WebPUpsamplers[MODE_bgrA] = UpsampleBgraLinePairNEON;
#endif // WEBP_USE_NEON
}
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// SSE2 version of YUV to RGB upsampling functions.
@ -51,12 +53,12 @@ extern "C" {
// pack and store two alterning pixel rows
#define PACK_AND_STORE(a, b, da, db, out) do { \
const __m128i ta = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
const __m128i tb = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
const __m128i t1 = _mm_unpacklo_epi8(ta, tb); \
const __m128i t2 = _mm_unpackhi_epi8(ta, tb); \
_mm_store_si128(((__m128i*)(out)) + 0, t1); \
_mm_store_si128(((__m128i*)(out)) + 1, t2); \
const __m128i t_a = _mm_avg_epu8(a, da); /* (9a + 3b + 3c + d + 8) / 16 */ \
const __m128i t_b = _mm_avg_epu8(b, db); /* (3a + 9b + c + 3d + 8) / 16 */ \
const __m128i t_1 = _mm_unpacklo_epi8(t_a, t_b); \
const __m128i t_2 = _mm_unpackhi_epi8(t_a, t_b); \
_mm_store_si128(((__m128i*)(out)) + 0, t_1); \
_mm_store_si128(((__m128i*)(out)) + 1, t_2); \
} while (0)
// Loads 17 pixels each from rows r1 and r2 and generates 32 pixels.
@ -128,7 +130,7 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int b; \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[4 * 32 + 15]; \
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
@ -154,11 +156,11 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
FUNC(bottom_y[0], u0, v0, bottom_dst); \
} \
\
for (b = 0; b < num_blocks; ++b) { \
for (block = 0; block < num_blocks; ++block) { \
UPSAMPLE_32PIXELS(top_u, cur_u, r_uv + 0 * 32); \
UPSAMPLE_32PIXELS(top_v, cur_v, r_uv + 1 * 32); \
CONVERT2RGB(FUNC, XSTEP, top_y, bottom_y, r_uv, top_dst, bottom_dst, \
32 * b + 1, 32) \
32 * block + 1, 32) \
top_u += 16; \
cur_u += 16; \
top_v += 16; \
@ -211,3 +213,5 @@ void WebPInitPremultiplySSE2(void) {
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// YUV->RGB conversion function
@ -15,7 +17,7 @@
extern "C" {
#endif
enum { YUV_HALF = 1 << (YUV_FIX - 1) };
#ifdef WEBP_YUV_USE_TABLE
int16_t VP8kVToR[256], VP8kUToB[256];
int32_t VP8kVToG[256], VP8kUToG[256];
@ -33,6 +35,7 @@ void VP8YUVInit(void) {
if (done) {
return;
}
#ifndef USE_YUVj
for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (89858 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22014 * (i - 128) + YUV_HALF;
@ -44,9 +47,29 @@ void VP8YUVInit(void) {
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
}
#else
for (i = 0; i < 256; ++i) {
VP8kVToR[i] = (91881 * (i - 128) + YUV_HALF) >> YUV_FIX;
VP8kUToG[i] = -22554 * (i - 128) + YUV_HALF;
VP8kVToG[i] = -46802 * (i - 128);
VP8kUToB[i] = (116130 * (i - 128) + YUV_HALF) >> YUV_FIX;
}
for (i = YUV_RANGE_MIN; i < YUV_RANGE_MAX; ++i) {
const int k = i;
VP8kClip[i - YUV_RANGE_MIN] = clip(k, 255);
VP8kClip4Bits[i - YUV_RANGE_MIN] = clip((k + 8) >> 4, 15);
}
#endif
done = 1;
}
#else
void VP8YUVInit(void) {}
#endif // WEBP_YUV_USE_TABLE
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,12 +1,34 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// inline YUV<->RGB conversion function
//
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
// More information at: http://en.wikipedia.org/wiki/YCbCr
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
// We use 16bit fixed point operations for RGB->YUV conversion.
//
// For the Y'CbCr to RGB conversion, the BT.601 specification reads:
// R = 1.164 * (Y-16) + 1.596 * (V-128)
// G = 1.164 * (Y-16) - 0.813 * (V-128) - 0.391 * (U-128)
// B = 1.164 * (Y-16) + 2.018 * (U-128)
// where Y is in the [16,235] range, and U/V in the [16,240] range.
// In the table-lookup version (WEBP_YUV_USE_TABLE), the common factor
// "1.164 * (Y-16)" can be handled as an offset in the VP8kClip[] table.
// So in this case the formulae should be read as:
// R = 1.164 * [Y + 1.371 * (V-128) ] - 18.624
// G = 1.164 * [Y - 0.698 * (V-128) - 0.336 * (U-128)] - 18.624
// B = 1.164 * [Y + 1.733 * (U-128)] - 18.624
// once factorized. Here too, 16bit fixed precision is used.
//
// Author: Skal (pascal.massimino@gmail.com)
#ifndef WEBP_DSP_YUV_H_
@ -14,6 +36,19 @@
#include "../dec/decode_vp8.h"
// Define the following to use the LUT-based code:
#define WEBP_YUV_USE_TABLE
#if defined(WEBP_EXPERIMENTAL_FEATURES)
// Do NOT activate this feature for real compression. This is only experimental!
// This flag is for comparison purpose against JPEG's "YUVj" natural colorspace.
// This colorspace is close to Rec.601's Y'CbCr model with the notable
// difference of allowing larger range for luma/chroma.
// See http://en.wikipedia.org/wiki/YCbCr#JPEG_conversion paragraph, and its
// difference with http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
// #define USE_YUVj
#endif
//------------------------------------------------------------------------------
// YUV -> RGB conversion
@ -22,9 +57,14 @@ extern "C" {
#endif
enum { YUV_FIX = 16, // fixed-point precision
YUV_HALF = 1 << (YUV_FIX - 1),
YUV_MASK = (256 << YUV_FIX) - 1,
YUV_RANGE_MIN = -227, // min value of r/g/b output
YUV_RANGE_MAX = 256 + 226 // max value of r/g/b output
};
#ifdef WEBP_YUV_USE_TABLE
extern int16_t VP8kVToR[256], VP8kUToB[256];
extern int32_t VP8kVToG[256], VP8kUToG[256];
extern uint8_t VP8kClip[YUV_RANGE_MAX - YUV_RANGE_MIN];
@ -40,34 +80,6 @@ static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
rgb[2] = VP8kClip[y + b_off - YUV_RANGE_MIN];
}
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
rgb[0] = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
(VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
rgb[1] = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
(VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
}
static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
argb[0] = 0xff;
VP8YuvToRgb(y, u, v, argb + 1);
}
static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
// Don't update alpha (last 4 bits of argb[1])
argb[0] = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
argb[1] = 0x0f | (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4);
}
static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const bgr) {
const int r_off = VP8kVToR[v];
@ -78,6 +90,129 @@ static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
bgr[2] = VP8kClip[y + r_off - YUV_RANGE_MIN];
}
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
const uint8_t rg = ((VP8kClip[y + r_off - YUV_RANGE_MIN] & 0xf8) |
(VP8kClip[y + g_off - YUV_RANGE_MIN] >> 5));
const uint8_t gb = (((VP8kClip[y + g_off - YUV_RANGE_MIN] << 3) & 0xe0) |
(VP8kClip[y + b_off - YUV_RANGE_MIN] >> 3));
#ifdef WEBP_SWAP_16BIT_CSP
rgb[0] = gb;
rgb[1] = rg;
#else
rgb[0] = rg;
rgb[1] = gb;
#endif
}
static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
const int r_off = VP8kVToR[v];
const int g_off = (VP8kVToG[v] + VP8kUToG[u]) >> YUV_FIX;
const int b_off = VP8kUToB[u];
const uint8_t rg = ((VP8kClip4Bits[y + r_off - YUV_RANGE_MIN] << 4) |
VP8kClip4Bits[y + g_off - YUV_RANGE_MIN]);
const uint8_t ba = (VP8kClip4Bits[y + b_off - YUV_RANGE_MIN] << 4) | 0x0f;
#ifdef WEBP_SWAP_16BIT_CSP
argb[0] = ba;
argb[1] = rg;
#else
argb[0] = rg;
argb[1] = ba;
#endif
}
#else // Table-free version (slower on x86)
// These constants are 16b fixed-point version of ITU-R BT.601 constants
#define kYScale 76309 // 1.164 = 255 / 219
#define kVToR 104597 // 1.596 = 255 / 112 * 0.701
#define kUToG 25674 // 0.391 = 255 / 112 * 0.886 * 0.114 / 0.587
#define kVToG 53278 // 0.813 = 255 / 112 * 0.701 * 0.299 / 0.587
#define kUToB 132201 // 2.018 = 255 / 112 * 0.886
#define kRCst (-kYScale * 16 - kVToR * 128 + YUV_HALF)
#define kGCst (-kYScale * 16 + kUToG * 128 + kVToG * 128 + YUV_HALF)
#define kBCst (-kYScale * 16 - kUToB * 128 + YUV_HALF)
static WEBP_INLINE uint8_t VP8Clip8(int v) {
return ((v & ~YUV_MASK) == 0) ? (uint8_t)(v >> YUV_FIX)
: (v < 0) ? 0u : 255u;
}
static WEBP_INLINE uint8_t VP8ClipN(int v, int N) { // clip to N bits
return ((v & ~YUV_MASK) == 0) ? (uint8_t)(v >> (YUV_FIX + (8 - N)))
: (v < 0) ? 0u : (255u >> (8 - N));
}
static WEBP_INLINE int VP8YUVToR(int y, int v) {
return kYScale * y + kVToR * v + kRCst;
}
static WEBP_INLINE int VP8YUVToG(int y, int u, int v) {
return kYScale * y - kUToG * u - kVToG * v + kGCst;
}
static WEBP_INLINE int VP8YUVToB(int y, int u) {
return kYScale * y + kUToB * u + kBCst;
}
static WEBP_INLINE void VP8YuvToRgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
rgb[0] = VP8Clip8(VP8YUVToR(y, v));
rgb[1] = VP8Clip8(VP8YUVToG(y, u, v));
rgb[2] = VP8Clip8(VP8YUVToB(y, u));
}
static WEBP_INLINE void VP8YuvToBgr(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const bgr) {
bgr[0] = VP8Clip8(VP8YUVToB(y, u));
bgr[1] = VP8Clip8(VP8YUVToG(y, u, v));
bgr[2] = VP8Clip8(VP8YUVToR(y, v));
}
static WEBP_INLINE void VP8YuvToRgb565(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const rgb) {
const int r = VP8Clip8(VP8YUVToR(y, u));
const int g = VP8ClipN(VP8YUVToG(y, u, v), 6);
const int b = VP8ClipN(VP8YUVToB(y, v), 5);
const uint8_t rg = (r & 0xf8) | (g >> 3);
const uint8_t gb = (g << 5) | b;
#ifdef WEBP_SWAP_16BIT_CSP
rgb[0] = gb;
rgb[1] = rg;
#else
rgb[0] = rg;
rgb[1] = gb;
#endif
}
static WEBP_INLINE void VP8YuvToRgba4444(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
const int r = VP8Clip8(VP8YUVToR(y, u));
const int g = VP8ClipN(VP8YUVToG(y, u, v), 4);
const int b = VP8Clip8(VP8YUVToB(y, v));
const uint8_t rg = (r & 0xf0) | g;
const uint8_t ba = b | 0x0f; // overwrite the lower 4 bits
#ifdef WEBP_SWAP_16BIT_CSP
argb[0] = ba;
argb[1] = rg;
#else
argb[0] = rg;
argb[1] = ba;
#endif
}
#endif // WEBP_YUV_USE_TABLE
static WEBP_INLINE void VP8YuvToArgb(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const argb) {
argb[0] = 0xff;
VP8YuvToRgb(y, u, v, argb + 1);
}
static WEBP_INLINE void VP8YuvToBgra(uint8_t y, uint8_t u, uint8_t v,
uint8_t* const bgra) {
VP8YuvToBgr(y, u, v, bgra);
@ -95,18 +230,14 @@ void VP8YUVInit(void);
//------------------------------------------------------------------------------
// RGB -> YUV conversion
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
// More information at: http://en.wikipedia.org/wiki/YCbCr
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128
// We use 16bit fixed point operations.
static WEBP_INLINE int VP8ClipUV(int v) {
v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
v = (v + (257 << (YUV_FIX + 2 - 1))) >> (YUV_FIX + 2);
return ((v & ~0xff) == 0) ? v : (v < 0) ? 0 : 255;
}
#ifndef USE_YUVj
static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
const int kRound = (1 << (YUV_FIX - 1)) + (16 << YUV_FIX);
const int luma = 16839 * r + 33059 * g + 6420 * b;
@ -114,13 +245,38 @@ static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
}
static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
return VP8ClipUV(-9719 * r - 19081 * g + 28800 * b);
const int u = -9719 * r - 19081 * g + 28800 * b;
return VP8ClipUV(u);
}
static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
return VP8ClipUV(+28800 * r - 24116 * g - 4684 * b);
const int v = +28800 * r - 24116 * g - 4684 * b;
return VP8ClipUV(v);
}
#else
// This JPEG-YUV colorspace, only for comparison!
// These are also 16-bit precision coefficients from Rec.601, but with full
// [0..255] output range.
static WEBP_INLINE int VP8RGBToY(int r, int g, int b) {
const int kRound = (1 << (YUV_FIX - 1));
const int luma = 19595 * r + 38470 * g + 7471 * b;
return (luma + kRound) >> YUV_FIX; // no need to clip
}
static WEBP_INLINE int VP8RGBToU(int r, int g, int b) {
const int u = -11058 * r - 21710 * g + 32768 * b;
return VP8ClipUV(u);
}
static WEBP_INLINE int VP8RGBToV(int r, int g, int b) {
const int v = 32768 * r - 27439 * g - 5329 * b;
return VP8ClipUV(v);
}
#endif // USE_YUVj
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Alpha-plane compression.
@ -79,18 +81,17 @@ static int EncodeLossless(const uint8_t* const data, int width, int height,
WebPConfigInit(&config);
config.lossless = 1;
config.method = effort_level; // impact is very small
// Set moderate default quality setting for alpha. Higher qualities (80 and
// above) could be very slow.
config.quality = 10.f + 15.f * effort_level;
if (config.quality > 100.f) config.quality = 100.f;
// Set a moderate default quality setting for alpha.
config.quality = 10.f * effort_level;
assert(config.quality >= 0 && config.quality <= 100.f);
ok = VP8LBitWriterInit(&tmp_bw, (width * height) >> 3);
ok = ok && (VP8LEncodeStream(&config, &picture, &tmp_bw) == VP8_ENC_OK);
WebPPictureFree(&picture);
if (ok) {
const uint8_t* const data = VP8LBitWriterFinish(&tmp_bw);
const size_t data_size = VP8LBitWriterNumBytes(&tmp_bw);
VP8BitWriterAppend(bw, data, data_size);
const uint8_t* const buffer = VP8LBitWriterFinish(&tmp_bw);
const size_t buffer_size = VP8LBitWriterNumBytes(&tmp_bw);
VP8BitWriterAppend(bw, buffer, buffer_size);
}
VP8LBitWriterDestroy(&tmp_bw);
return ok && !bw->error_;
@ -128,8 +129,8 @@ static int EncodeAlphaInternal(const uint8_t* const data, int width, int height,
VP8BitWriterAppend(bw, &header, ALPHA_HEADER_LEN);
filter_func = WebPFilters[filter];
if (filter_func) {
filter_func(data, width, height, 1, width, tmp_alpha);
if (filter_func != NULL) {
filter_func(data, width, height, width, tmp_alpha);
alpha_src = tmp_alpha;
} else {
alpha_src = data;
@ -157,6 +158,25 @@ static void CopyPlane(const uint8_t* src, int src_stride,
}
}
static int GetNumColors(const uint8_t* data, int width, int height,
int stride) {
int j;
int colors = 0;
uint8_t color[256] = { 0 };
for (j = 0; j < height; ++j) {
int i;
const uint8_t* const p = data + j * stride;
for (i = 0; i < width; ++i) {
color[p[i]] = 1;
}
}
for (j = 0; j < 256; ++j) {
if (color[j] > 0) ++colors;
}
return colors;
}
static int EncodeAlpha(VP8Encoder* const enc,
int quality, int method, int filter,
int effort_level,
@ -208,18 +228,32 @@ static int EncodeAlpha(VP8Encoder* const enc,
VP8BitWriter bw;
int test_filter;
uint8_t* filtered_alpha = NULL;
int try_filter_none = (effort_level > 3);
// We always test WEBP_FILTER_NONE first.
ok = EncodeAlphaInternal(quant_alpha, width, height,
method, WEBP_FILTER_NONE, reduce_levels,
effort_level, NULL, &bw, pic->stats);
if (!ok) {
VP8BitWriterWipeOut(&bw);
goto End;
if (filter == WEBP_FILTER_FAST) { // Quick estimate of the best candidate.
const int kMinColorsForFilterNone = 16;
const int kMaxColorsForFilterNone = 192;
const int num_colors = GetNumColors(quant_alpha, width, height, width);
// For low number of colors, NONE yeilds better compression.
filter = (num_colors <= kMinColorsForFilterNone) ? WEBP_FILTER_NONE :
EstimateBestFilter(quant_alpha, width, height, width);
// For large number of colors, try FILTER_NONE in addition to the best
// filter as well.
if (num_colors > kMaxColorsForFilterNone) {
try_filter_none = 1;
}
}
if (filter == WEBP_FILTER_FAST) { // Quick estimate of a second candidate?
filter = EstimateBestFilter(quant_alpha, width, height, width);
// Test for WEBP_FILTER_NONE for higher effort levels.
if (try_filter_none || filter == WEBP_FILTER_NONE) {
ok = EncodeAlphaInternal(quant_alpha, width, height,
method, WEBP_FILTER_NONE, reduce_levels,
effort_level, NULL, &bw, pic->stats);
if (!ok) {
VP8BitWriterWipeOut(&bw);
goto End;
}
}
// Stop?
if (filter == WEBP_FILTER_NONE) {
@ -235,11 +269,14 @@ static int EncodeAlpha(VP8Encoder* const enc,
// Try the other mode(s).
{
WebPAuxStats best_stats;
size_t best_score = VP8BitWriterSize(&bw);
size_t best_score = try_filter_none ?
VP8BitWriterSize(&bw) : (size_t)~0U;
int wipe_tmp_bw = try_filter_none;
memset(&best_stats, 0, sizeof(best_stats)); // prevent spurious warning
if (pic->stats != NULL) best_stats = *pic->stats;
for (test_filter = WEBP_FILTER_HORIZONTAL;
for (test_filter =
try_filter_none ? WEBP_FILTER_HORIZONTAL : WEBP_FILTER_NONE;
ok && (test_filter <= WEBP_FILTER_GRADIENT);
++test_filter) {
VP8BitWriter tmp_bw;
@ -263,7 +300,10 @@ static int EncodeAlpha(VP8Encoder* const enc,
} else {
VP8BitWriterWipeOut(&bw);
}
VP8BitWriterWipeOut(&tmp_bw);
if (wipe_tmp_bw) {
VP8BitWriterWipeOut(&tmp_bw);
}
wipe_tmp_bw = 1; // For next filter trial for WEBP_FILTER_BEST.
}
if (pic->stats != NULL) *pic->stats = best_stats;
}
@ -287,42 +327,80 @@ static int EncodeAlpha(VP8Encoder* const enc,
//------------------------------------------------------------------------------
// Main calls
static int CompressAlphaJob(VP8Encoder* const enc, void* dummy) {
const WebPConfig* config = enc->config_;
uint8_t* alpha_data = NULL;
size_t alpha_size = 0;
const int effort_level = config->method; // maps to [0..6]
const WEBP_FILTER_TYPE filter =
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
WEBP_FILTER_BEST;
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &alpha_data, &alpha_size)) {
return 0;
}
if (alpha_size != (uint32_t)alpha_size) { // Sanity check.
free(alpha_data);
return 0;
}
enc->alpha_data_size_ = (uint32_t)alpha_size;
enc->alpha_data_ = alpha_data;
(void)dummy;
return 1;
}
void VP8EncInitAlpha(VP8Encoder* const enc) {
enc->has_alpha_ = WebPPictureHasTransparency(enc->pic_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
WebPWorkerInit(worker);
worker->data1 = enc;
worker->data2 = NULL;
worker->hook = (WebPWorkerHook)CompressAlphaJob;
}
}
int VP8EncStartAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (!WebPWorkerReset(worker)) { // Makes sure worker is good to go.
return 0;
}
WebPWorkerLaunch(worker);
return 1;
} else {
return CompressAlphaJob(enc, NULL); // just do the job right away
}
}
return 1;
}
int VP8EncFinishAlpha(VP8Encoder* const enc) {
if (enc->has_alpha_) {
const WebPConfig* config = enc->config_;
uint8_t* tmp_data = NULL;
size_t tmp_size = 0;
const int effort_level = config->method; // maps to [0..6]
const WEBP_FILTER_TYPE filter =
(config->alpha_filtering == 0) ? WEBP_FILTER_NONE :
(config->alpha_filtering == 1) ? WEBP_FILTER_FAST :
WEBP_FILTER_BEST;
if (!EncodeAlpha(enc, config->alpha_quality, config->alpha_compression,
filter, effort_level, &tmp_data, &tmp_size)) {
return 0;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
if (!WebPWorkerSync(worker)) return 0; // error
}
if (tmp_size != (uint32_t)tmp_size) { // Sanity check.
free(tmp_data);
return 0;
}
enc->alpha_data_size_ = (uint32_t)tmp_size;
enc->alpha_data_ = tmp_data;
}
return WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
}
void VP8EncDeleteAlpha(VP8Encoder* const enc) {
int VP8EncDeleteAlpha(VP8Encoder* const enc) {
int ok = 1;
if (enc->thread_level_ > 0) {
WebPWorker* const worker = &enc->alpha_worker_;
ok = WebPWorkerSync(worker); // finish anything left in flight
WebPWorkerEnd(worker); // still need to end the worker, even if !ok
}
free(enc->alpha_data_);
enc->alpha_data_ = NULL;
enc->alpha_data_size_ = 0;
enc->has_alpha_ = 0;
return ok;
}
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Macroblock analysis
@ -23,10 +25,6 @@ extern "C" {
#define MAX_ITERS_K_MEANS 6
static int ClipAlpha(int alpha) {
return alpha < 0 ? 0 : alpha > 255 ? 255 : alpha;
}
//------------------------------------------------------------------------------
// Smooth the segment map by replacing isolated block by the majority of its
// neighbours.
@ -72,50 +70,10 @@ static void SmoothSegmentMap(VP8Encoder* const enc) {
}
//------------------------------------------------------------------------------
// Finalize Segment probability based on the coding tree
static int GetProba(int a, int b) {
int proba;
const int total = a + b;
if (total == 0) return 255; // that's the default probability.
proba = (255 * a + total / 2) / total;
return proba;
}
static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
const VP8MBInfo* const mb = &enc->mb_info_[n];
p[mb->segment_]++;
}
if (enc->pic_->stats) {
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
enc->pic_->stats->segment_size[n] = p[n];
}
}
if (enc->segment_hdr_.num_segments_ > 1) {
uint8_t* const probas = enc->proba_.segments_;
probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
probas[1] = GetProba(p[0], p[1]);
probas[2] = GetProba(p[2], p[3]);
enc->segment_hdr_.update_map_ =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
enc->segment_hdr_.size_ =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
} else {
enc->segment_hdr_.update_map_ = 0;
enc->segment_hdr_.size_ = 0;
}
}
// set segment susceptibility alpha_ / beta_
static WEBP_INLINE int clip(int v, int m, int M) {
return v < m ? m : v > M ? M : v;
return (v < m) ? m : (v > M) ? M : v;
}
static void SetSegmentAlphas(VP8Encoder* const enc,
@ -141,23 +99,64 @@ static void SetSegmentAlphas(VP8Encoder* const enc,
}
}
//------------------------------------------------------------------------------
// Compute susceptibility based on DCT-coeff histograms:
// the higher, the "easier" the macroblock is to compress.
#define MAX_ALPHA 255 // 8b of precision for susceptibilities.
#define ALPHA_SCALE (2 * MAX_ALPHA) // scaling factor for alpha.
#define DEFAULT_ALPHA (-1)
#define IS_BETTER_ALPHA(alpha, best_alpha) ((alpha) > (best_alpha))
static int FinalAlphaValue(int alpha) {
alpha = MAX_ALPHA - alpha;
return clip(alpha, 0, MAX_ALPHA);
}
static int GetAlpha(const VP8Histogram* const histo) {
int max_value = 0, last_non_zero = 1;
int k;
int alpha;
for (k = 0; k <= MAX_COEFF_THRESH; ++k) {
const int value = histo->distribution[k];
if (value > 0) {
if (value > max_value) max_value = value;
last_non_zero = k;
}
}
// 'alpha' will later be clipped to [0..MAX_ALPHA] range, clamping outer
// values which happen to be mostly noise. This leaves the maximum precision
// for handling the useful small values which contribute most.
alpha = (max_value > 1) ? ALPHA_SCALE * last_non_zero / max_value : 0;
return alpha;
}
static void MergeHistograms(const VP8Histogram* const in,
VP8Histogram* const out) {
int i;
for (i = 0; i <= MAX_COEFF_THRESH; ++i) {
out->distribution[i] += in->distribution[i];
}
}
//------------------------------------------------------------------------------
// Simplified k-Means, to assign Nb segments based on alpha-histogram
static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
static void AssignSegments(VP8Encoder* const enc,
const int alphas[MAX_ALPHA + 1]) {
const int nb = enc->segment_hdr_.num_segments_;
int centers[NUM_MB_SEGMENTS];
int weighted_average = 0;
int map[256];
int map[MAX_ALPHA + 1];
int a, n, k;
int min_a = 0, max_a = 255, range_a;
int min_a = 0, max_a = MAX_ALPHA, range_a;
// 'int' type is ok for histo, and won't overflow
int accum[NUM_MB_SEGMENTS], dist_accum[NUM_MB_SEGMENTS];
// bracket the input
for (n = 0; n < 256 && alphas[n] == 0; ++n) {}
for (n = 0; n <= MAX_ALPHA && alphas[n] == 0; ++n) {}
min_a = n;
for (n = 255; n > min_a && alphas[n] == 0; --n) {}
for (n = MAX_ALPHA; n > min_a && alphas[n] == 0; --n) {}
max_a = n;
range_a = max_a - min_a;
@ -210,7 +209,7 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
VP8MBInfo* const mb = &enc->mb_info_[n];
const int alpha = mb->alpha_;
mb->segment_ = map[alpha];
mb->alpha_ = centers[map[alpha]]; // just for the record.
mb->alpha_ = centers[map[alpha]]; // for the record.
}
if (nb > 1) {
@ -218,7 +217,6 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
if (smooth) SmoothSegmentMap(enc);
}
SetSegmentProbas(enc); // Assign final proba
SetSegmentAlphas(enc, centers, weighted_average); // pick some alphas.
}
@ -227,24 +225,32 @@ static void AssignSegments(VP8Encoder* const enc, const int alphas[256]) {
// susceptibility and set best modes for this macroblock.
// Segment assignment is done later.
// Number of modes to inspect for alpha_ evaluation. For high-quality settings,
// we don't need to test all the possible modes during the analysis phase.
// Number of modes to inspect for alpha_ evaluation. For high-quality settings
// (method >= FAST_ANALYSIS_METHOD) we don't need to test all the possible modes
// during the analysis phase.
#define FAST_ANALYSIS_METHOD 4 // method above which we do partial analysis
#define MAX_INTRA16_MODE 2
#define MAX_INTRA4_MODE 2
#define MAX_UV_MODE 2
static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA16_MODE : 4;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA16_MODE
: NUM_PRED_MODES;
int mode;
int best_alpha = -1;
int best_alpha = DEFAULT_ALPHA;
int best_mode = 0;
VP8MakeLuma16Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(it->yuv_in_ + Y_OFF,
it->yuv_p_ + VP8I16ModeOffsets[mode],
0, 16);
if (alpha > best_alpha) {
VP8Histogram histo = { { 0 } };
int alpha;
VP8CollectHistogram(it->yuv_in_ + Y_OFF,
it->yuv_p_ + VP8I16ModeOffsets[mode],
0, 16, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
best_alpha = alpha;
best_mode = mode;
}
@ -256,46 +262,63 @@ static int MBAnalyzeBestIntra16Mode(VP8EncIterator* const it) {
static int MBAnalyzeBestIntra4Mode(VP8EncIterator* const it,
int best_alpha) {
uint8_t modes[16];
const int max_mode = (it->enc_->method_ >= 3) ? MAX_INTRA4_MODE : NUM_BMODES;
int i4_alpha = 0;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_INTRA4_MODE
: NUM_BMODES;
int i4_alpha;
VP8Histogram total_histo = { { 0 } };
int cur_histo = 0;
VP8IteratorStartI4(it);
do {
int mode;
int best_mode_alpha = -1;
int best_mode_alpha = DEFAULT_ALPHA;
VP8Histogram histos[2];
const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
VP8MakeIntra4Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(src,
it->yuv_p_ + VP8I4ModeOffsets[mode],
0, 1);
if (alpha > best_mode_alpha) {
int alpha;
memset(&histos[cur_histo], 0, sizeof(histos[cur_histo]));
VP8CollectHistogram(src, it->yuv_p_ + VP8I4ModeOffsets[mode],
0, 1, &histos[cur_histo]);
alpha = GetAlpha(&histos[cur_histo]);
if (IS_BETTER_ALPHA(alpha, best_mode_alpha)) {
best_mode_alpha = alpha;
modes[it->i4_] = mode;
cur_histo ^= 1; // keep track of best histo so far.
}
}
i4_alpha += best_mode_alpha;
// accumulate best histogram
MergeHistograms(&histos[cur_histo ^ 1], &total_histo);
// Note: we reuse the original samples for predictors
} while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
if (i4_alpha > best_alpha) {
i4_alpha = GetAlpha(&total_histo);
if (IS_BETTER_ALPHA(i4_alpha, best_alpha)) {
VP8SetIntra4Mode(it, modes);
best_alpha = ClipAlpha(i4_alpha);
best_alpha = i4_alpha;
}
return best_alpha;
}
static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
int best_alpha = -1;
int best_alpha = DEFAULT_ALPHA;
int best_mode = 0;
const int max_mode = (it->enc_->method_ >= 3) ? MAX_UV_MODE : 4;
const int max_mode =
(it->enc_->method_ >= FAST_ANALYSIS_METHOD) ? MAX_UV_MODE
: NUM_PRED_MODES;
int mode;
VP8MakeChroma8Preds(it);
for (mode = 0; mode < max_mode; ++mode) {
const int alpha = VP8CollectHistogram(it->yuv_in_ + U_OFF,
it->yuv_p_ + VP8UVModeOffsets[mode],
16, 16 + 4 + 4);
if (alpha > best_alpha) {
VP8Histogram histo = { { 0 } };
int alpha;
VP8CollectHistogram(it->yuv_in_ + U_OFF,
it->yuv_p_ + VP8UVModeOffsets[mode],
16, 16 + 4 + 4, &histo);
alpha = GetAlpha(&histo);
if (IS_BETTER_ALPHA(alpha, best_alpha)) {
best_alpha = alpha;
best_mode = mode;
}
@ -305,7 +328,8 @@ static int MBAnalyzeBestUVMode(VP8EncIterator* const it) {
}
static void MBAnalyze(VP8EncIterator* const it,
int alphas[256], int* const uv_alpha) {
int alphas[MAX_ALPHA + 1],
int* const alpha, int* const uv_alpha) {
const VP8Encoder* const enc = it->enc_;
int best_alpha, best_uv_alpha;
@ -314,7 +338,7 @@ static void MBAnalyze(VP8EncIterator* const it,
VP8SetSegment(it, 0); // default segment, spec-wise.
best_alpha = MBAnalyzeBestIntra16Mode(it);
if (enc->method_ != 3) {
if (enc->method_ >= 5) {
// We go and make a fast decision for intra4/intra16.
// It's usually not a good and definitive pick, but helps seeding the stats
// about level bit-cost.
@ -324,10 +348,22 @@ static void MBAnalyze(VP8EncIterator* const it,
best_uv_alpha = MBAnalyzeBestUVMode(it);
// Final susceptibility mix
best_alpha = (best_alpha + best_uv_alpha + 1) / 2;
best_alpha = (3 * best_alpha + best_uv_alpha + 2) >> 2;
best_alpha = FinalAlphaValue(best_alpha);
alphas[best_alpha]++;
it->mb_->alpha_ = best_alpha; // for later remapping.
// Accumulate for later complexity analysis.
*alpha += best_alpha; // mixed susceptibility (not just luma)
*uv_alpha += best_uv_alpha;
it->mb_->alpha_ = best_alpha; // Informative only.
}
static void DefaultMBInfo(VP8MBInfo* const mb) {
mb->type_ = 1; // I16x16
mb->uv_mode_ = 0;
mb->skip_ = 0; // not skipped
mb->segment_ = 0; // default segment
mb->alpha_ = 0;
}
//------------------------------------------------------------------------------
@ -340,22 +376,43 @@ static void MBAnalyze(VP8EncIterator* const it,
// and decide intra4/intra16, but that's usually almost always a bad choice at
// this stage.
static void ResetAllMBInfo(VP8Encoder* const enc) {
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
DefaultMBInfo(&enc->mb_info_[n]);
}
// Default susceptibilities.
enc->dqm_[0].alpha_ = 0;
enc->dqm_[0].beta_ = 0;
// Note: we can't compute this alpha_ / uv_alpha_.
WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
}
int VP8EncAnalyze(VP8Encoder* const enc) {
int ok = 1;
int alphas[256] = { 0 };
VP8EncIterator it;
VP8IteratorInit(enc, &it);
const int do_segments =
enc->config_->emulate_jpeg_size || // We need the complexity evaluation.
(enc->segment_hdr_.num_segments_ > 1) ||
(enc->method_ == 0); // for method 0, we need preds_[] to be filled.
enc->alpha_ = 0;
enc->uv_alpha_ = 0;
do {
VP8IteratorImport(&it);
MBAnalyze(&it, alphas, &enc->uv_alpha_);
ok = VP8IteratorProgress(&it, 20);
// Let's pretend we have perfect lossless reconstruction.
} while (ok && VP8IteratorNext(&it, it.yuv_in_));
enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
if (ok) AssignSegments(enc, alphas);
if (do_segments) {
int alphas[MAX_ALPHA + 1] = { 0 };
VP8EncIterator it;
VP8IteratorInit(enc, &it);
do {
VP8IteratorImport(&it);
MBAnalyze(&it, alphas, &enc->alpha_, &enc->uv_alpha_);
ok = VP8IteratorProgress(&it, 20);
// Let's pretend we have perfect lossless reconstruction.
} while (ok && VP8IteratorNext(&it, it.yuv_in_));
enc->alpha_ /= enc->mb_w_ * enc->mb_h_;
enc->uv_alpha_ /= enc->mb_w_ * enc->mb_h_;
if (ok) AssignSegments(enc, alphas);
} else { // Use only one default segment.
ResetAllMBInfo(enc);
}
return ok;
}

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
@ -141,74 +143,86 @@ static void HashChainInsert(HashChain* const p,
p->hash_to_first_index_[hash_code] = pos;
}
static void GetParamsForHashChainFindCopy(int quality, int xsize,
int cache_bits, int* window_size,
int* iter_pos, int* iter_limit) {
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
const int iter_neg = -iter_mult * (quality >> 1);
// Limit the backward-ref window size for lower qualities.
const int max_window_size = (quality > 50) ? WINDOW_SIZE
: (quality > 25) ? (xsize << 8)
: (xsize << 4);
assert(xsize > 0);
*window_size = (max_window_size > WINDOW_SIZE) ? WINDOW_SIZE
: max_window_size;
*iter_pos = 8 + (quality >> 3);
// For lower entropy images, the rigourous search loop in HashChainFindCopy
// can be relaxed.
*iter_limit = (cache_bits > 0) ? iter_neg : iter_neg / 2;
}
static int HashChainFindCopy(const HashChain* const p,
int quality, int index, int xsize,
int base_position, int xsize_signed,
const uint32_t* const argb, int maxlen,
int window_size, int iter_pos, int iter_limit,
int* const distance_ptr,
int* const length_ptr) {
const uint64_t hash_code = GetPixPairHash64(&argb[index]);
int prev_length = 0;
int64_t best_val = 0;
int best_length = 0;
int best_distance = 0;
const uint32_t* const argb_start = argb + index;
const int iter_min_mult = (quality < 50) ? 2 : (quality < 75) ? 4 : 8;
const int iter_min = -quality * iter_min_mult;
int iter_cnt = 10 + (quality >> 1);
const int min_pos = (index > WINDOW_SIZE) ? index - WINDOW_SIZE : 0;
const uint32_t* const argb_start = argb + base_position;
uint64_t best_val = 0;
uint32_t best_length = 1;
uint32_t best_distance = 0;
const uint32_t xsize = (uint32_t)xsize_signed;
const int min_pos =
(base_position > window_size) ? base_position - window_size : 0;
int pos;
assert(xsize > 0);
for (pos = p->hash_to_first_index_[hash_code];
for (pos = p->hash_to_first_index_[GetPixPairHash64(argb_start)];
pos >= min_pos;
pos = p->chain_[pos]) {
int64_t val;
int curr_length;
if (iter_cnt < 0) {
if (iter_cnt < iter_min || best_val >= 0xff0000) {
uint64_t val;
uint32_t curr_length;
uint32_t distance;
if (iter_pos < 0) {
if (iter_pos < iter_limit || best_val >= 0xff0000) {
break;
}
}
--iter_cnt;
if (best_length != 0 &&
argb[pos + best_length - 1] != argb_start[best_length - 1]) {
--iter_pos;
if (argb[pos + best_length - 1] != argb_start[best_length - 1]) {
continue;
}
curr_length = FindMatchLength(argb + pos, argb_start, maxlen);
if (curr_length < prev_length) {
if (curr_length < best_length) {
continue;
}
val = 65536 * curr_length;
distance = (uint32_t)(base_position - pos);
val = curr_length << 16;
// Favoring 2d locality here gives savings for certain images.
if (index - pos < 9 * xsize) {
const int y = (index - pos) / xsize;
int x = (index - pos) % xsize;
if (x > xsize / 2) {
if (distance < 9 * xsize) {
const uint32_t y = distance / xsize;
uint32_t x = distance % xsize;
if (x > (xsize >> 1)) {
x = xsize - x;
}
if (x <= 7 && x >= -8) {
if (x <= 7) {
val += 9 * 9 + 9 * 9;
val -= y * y + x * x;
} else {
val -= 9 * 9 + 9 * 9;
}
} else {
val -= 9 * 9 + 9 * 9;
}
if (best_val < val) {
prev_length = curr_length;
best_val = val;
best_length = curr_length;
best_distance = index - pos;
best_distance = distance;
if (curr_length >= MAX_LENGTH) {
break;
}
if ((best_distance == 1 || best_distance == xsize) &&
if ((best_distance == 1 || distance == xsize) &&
best_length >= 128) {
break;
}
}
}
*distance_ptr = best_distance;
*distance_ptr = (int)best_distance;
*length_ptr = best_length;
return (best_length >= MIN_LENGTH);
}
@ -257,6 +271,9 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
const int pix_count = xsize * ysize;
HashChain* const hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
VP8LColorCache hashers;
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
if (hash_chain == NULL) return 0;
if (use_color_cache) {
@ -267,6 +284,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (!HashChainInit(hash_chain, pix_count)) goto Error;
refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&window_size, &iter_pos, &iter_limit);
for (i = 0; i < pix_count; ) {
// Alternative#1: Code the pixels starting at 'i' using backward reference.
int offset = 0;
@ -276,7 +295,8 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (maxlen > MAX_LENGTH) {
maxlen = MAX_LENGTH;
}
HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
}
if (len >= MIN_LENGTH) {
@ -291,8 +311,9 @@ static int BackwardReferencesHashChain(int xsize, int ysize,
if (maxlen > MAX_LENGTH) {
maxlen = MAX_LENGTH;
}
HashChainFindCopy(hash_chain, quality,
i + 1, xsize, argb, maxlen, &offset2, &len2);
HashChainFindCopy(hash_chain, i + 1, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset2, &len2);
if (len2 > len + 1) {
const uint32_t pixel = argb[i];
// Alternative#2 is a better match. So push pixel at 'i' as literal.
@ -362,7 +383,8 @@ typedef struct {
static int BackwardReferencesTraceBackwards(
int xsize, int ysize, int recursive_cost_model,
const uint32_t* const argb, int cache_bits, VP8LBackwardRefs* const refs);
const uint32_t* const argb, int quality, int cache_bits,
VP8LBackwardRefs* const refs);
static void ConvertPopulationCountTableToBitEstimates(
int num_symbols, const int population_counts[], double output[]) {
@ -387,17 +409,16 @@ static void ConvertPopulationCountTableToBitEstimates(
static int CostModelBuild(CostModel* const m, int xsize, int ysize,
int recursion_level, const uint32_t* const argb,
int cache_bits) {
int quality, int cache_bits) {
int ok = 0;
VP8LHistogram histo;
VP8LBackwardRefs refs;
const int quality = 100;
if (!VP8LBackwardRefsAlloc(&refs, xsize * ysize)) goto Error;
if (recursion_level > 0) {
if (!BackwardReferencesTraceBackwards(xsize, ysize, recursion_level - 1,
argb, cache_bits, &refs)) {
argb, quality, cache_bits, &refs)) {
goto Error;
}
} else {
@ -452,11 +473,10 @@ static WEBP_INLINE double GetDistanceCost(const CostModel* const m,
static int BackwardReferencesHashChainDistanceOnly(
int xsize, int ysize, int recursive_cost_model, const uint32_t* const argb,
int cache_bits, uint32_t* const dist_array) {
int quality, int cache_bits, uint32_t* const dist_array) {
int i;
int ok = 0;
int cc_init = 0;
const int quality = 100;
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
float* const cost =
@ -466,6 +486,10 @@ static int BackwardReferencesHashChainDistanceOnly(
VP8LColorCache hashers;
const double mul0 = (recursive_cost_model != 0) ? 1.0 : 0.68;
const double mul1 = (recursive_cost_model != 0) ? 1.0 : 0.82;
const int min_distance_code = 2; // TODO(vikasa): tune as function of quality
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
if (cost == NULL || cost_model == NULL || hash_chain == NULL) goto Error;
@ -477,7 +501,7 @@ static int BackwardReferencesHashChainDistanceOnly(
}
if (!CostModelBuild(cost_model, xsize, ysize, recursive_cost_model, argb,
cache_bits)) {
quality, cache_bits)) {
goto Error;
}
@ -486,6 +510,8 @@ static int BackwardReferencesHashChainDistanceOnly(
// We loop one pixel at a time, but store all currently best points to
// non-processed locations from this point.
dist_array[0] = 0;
GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&window_size, &iter_pos, &iter_limit);
for (i = 0; i < pix_count; ++i) {
double prev_cost = 0.0;
int shortmax;
@ -500,7 +526,8 @@ static int BackwardReferencesHashChainDistanceOnly(
if (maxlen > pix_count - i) {
maxlen = pix_count - i;
}
HashChainFindCopy(hash_chain, quality, i, xsize, argb, maxlen,
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
}
if (len >= MIN_LENGTH) {
@ -517,7 +544,7 @@ static int BackwardReferencesHashChainDistanceOnly(
}
// This if is for speedup only. It roughly doubles the speed, and
// makes compression worse by .1 %.
if (len >= 128 && code < 2) {
if (len >= 128 && code <= min_distance_code) {
// Long copy for short distances, let's skip the middle
// lookups for better copies.
// 1) insert the hashes.
@ -528,10 +555,10 @@ static int BackwardReferencesHashChainDistanceOnly(
}
// 2) Add to the hash_chain (but cannot add the last pixel)
{
const int last = (len < pix_count - 1 - i) ? len
: pix_count - 1 - i;
for (k = 0; k < last; ++k) {
HashChainInsert(hash_chain, &argb[i + k], i + k);
const int last = (len + i < pix_count - 1) ? len + i
: pix_count - 1;
for (k = i; k < last; ++k) {
HashChainInsert(hash_chain, &argb[k], k);
}
}
// 3) jump.
@ -571,40 +598,30 @@ Error:
return ok;
}
static int TraceBackwards(const uint32_t* const dist_array,
int dist_array_size,
uint32_t** const chosen_path,
int* const chosen_path_size) {
int i;
// Count how many.
int count = 0;
for (i = dist_array_size - 1; i >= 0; ) {
int k = dist_array[i];
assert(k >= 1);
++count;
i -= k;
// We pack the path at the end of *dist_array and return
// a pointer to this part of the array. Example:
// dist_array = [1x2xx3x2] => packed [1x2x1232], chosen_path = [1232]
static void TraceBackwards(uint32_t* const dist_array,
int dist_array_size,
uint32_t** const chosen_path,
int* const chosen_path_size) {
uint32_t* path = dist_array + dist_array_size;
uint32_t* cur = dist_array + dist_array_size - 1;
while (cur >= dist_array) {
const int k = *cur;
--path;
*path = k;
cur -= k;
}
// Allocate.
*chosen_path_size = count;
*chosen_path =
(uint32_t*)WebPSafeMalloc((uint64_t)count, sizeof(**chosen_path));
if (*chosen_path == NULL) return 0;
// Write in reverse order.
for (i = dist_array_size - 1; i >= 0; ) {
int k = dist_array[i];
assert(k >= 1);
(*chosen_path)[--count] = k;
i -= k;
}
return 1;
*chosen_path = path;
*chosen_path_size = (int)(dist_array + dist_array_size - path);
}
static int BackwardReferencesHashChainFollowChosenPath(
int xsize, int ysize, const uint32_t* const argb, int cache_bits,
int xsize, int ysize, const uint32_t* const argb,
int quality, int cache_bits,
const uint32_t* const chosen_path, int chosen_path_size,
VP8LBackwardRefs* const refs) {
const int quality = 100;
const int pix_count = xsize * ysize;
const int use_color_cache = (cache_bits > 0);
int size = 0;
@ -613,6 +630,9 @@ static int BackwardReferencesHashChainFollowChosenPath(
int ix;
int ok = 0;
int cc_init = 0;
int window_size = WINDOW_SIZE;
int iter_pos = 1;
int iter_limit = -1;
HashChain* hash_chain = (HashChain*)malloc(sizeof(*hash_chain));
VP8LColorCache hashers;
@ -625,13 +645,16 @@ static int BackwardReferencesHashChainFollowChosenPath(
}
refs->size = 0;
GetParamsForHashChainFindCopy(quality, xsize, cache_bits,
&window_size, &iter_pos, &iter_limit);
for (ix = 0; ix < chosen_path_size; ++ix, ++size) {
int offset = 0;
int len = 0;
int maxlen = chosen_path[ix];
if (maxlen != 1) {
HashChainFindCopy(hash_chain, quality,
i, xsize, argb, maxlen, &offset, &len);
HashChainFindCopy(hash_chain, i, xsize, argb, maxlen,
window_size, iter_pos, iter_limit,
&offset, &len);
assert(len == maxlen);
refs->refs[size] = PixOrCopyCreateCopy(offset, len);
if (use_color_cache) {
@ -674,7 +697,7 @@ Error:
static int BackwardReferencesTraceBackwards(int xsize, int ysize,
int recursive_cost_model,
const uint32_t* const argb,
int cache_bits,
int quality, int cache_bits,
VP8LBackwardRefs* const refs) {
int ok = 0;
const int dist_array_size = xsize * ysize;
@ -686,22 +709,18 @@ static int BackwardReferencesTraceBackwards(int xsize, int ysize,
if (dist_array == NULL) goto Error;
if (!BackwardReferencesHashChainDistanceOnly(
xsize, ysize, recursive_cost_model, argb, cache_bits, dist_array)) {
xsize, ysize, recursive_cost_model, argb, quality, cache_bits,
dist_array)) {
goto Error;
}
if (!TraceBackwards(dist_array, dist_array_size,
&chosen_path, &chosen_path_size)) {
goto Error;
}
free(dist_array); // no need to retain this memory any longer
dist_array = NULL;
TraceBackwards(dist_array, dist_array_size, &chosen_path, &chosen_path_size);
if (!BackwardReferencesHashChainFollowChosenPath(
xsize, ysize, argb, cache_bits, chosen_path, chosen_path_size, refs)) {
xsize, ysize, argb, quality, cache_bits, chosen_path, chosen_path_size,
refs)) {
goto Error;
}
ok = 1;
Error:
free(chosen_path);
free(dist_array);
return ok;
}
@ -761,18 +780,20 @@ int VP8LGetBackwardReferences(int width, int height,
// Choose appropriate backward reference.
if (lz77_is_useful) {
// TraceBackwards is costly. Run it for higher qualities.
const int try_lz77_trace_backwards = (quality >= 75);
// TraceBackwards is costly. Don't execute it at lower quality (q <= 10).
const int try_lz77_trace_backwards = (quality > 10);
*best = refs_lz77; // default guess: lz77 is better
VP8LClearBackwardRefs(&refs_rle);
if (try_lz77_trace_backwards) {
const int recursion_level = (num_pix < 320 * 200) ? 1 : 0;
// Set recursion level for large images using a color cache.
const int recursion_level =
(num_pix < 320 * 200) && (cache_bits > 0) ? 1 : 0;
VP8LBackwardRefs refs_trace;
if (!VP8LBackwardRefsAlloc(&refs_trace, num_pix)) {
goto End;
}
if (BackwardReferencesTraceBackwards(
width, height, recursion_level, argb, cache_bits, &refs_trace)) {
if (BackwardReferencesTraceBackwards(width, height, recursion_level, argb,
quality, cache_bits, &refs_trace)) {
VP8LClearBackwardRefs(&refs_lz77);
*best = refs_trace;
}

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
@ -35,7 +37,8 @@ extern "C" {
#if defined(__GNUC__) && \
((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4)
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
return n == 0 ? -1 : 31 ^ __builtin_clz(n);
assert(n != 0);
return 31 ^ __builtin_clz(n);
}
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
#include <intrin.h>
@ -43,15 +46,18 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
unsigned long first_set_bit;
return _BitScanReverse(&first_set_bit, n) ? first_set_bit : -1;
assert(n != 0);
_BitScanReverse(&first_set_bit, n);
return first_set_bit;
}
#else
// Returns (int)floor(log2(n)). n must be > 0.
static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
int log = 0;
uint32_t value = n;
int i;
if (value == 0) return -1;
assert(n != 0);
for (i = 4; i >= 0; --i) {
const int shift = (1 << i);
const uint32_t x = value >> shift;
@ -65,11 +71,11 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) {
#endif
static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
const int floor = BitsLog2Floor(n);
const int log_floor = BitsLog2Floor(n);
if (n == (n & ~(n - 1))) // zero or a power of two.
return floor;
return log_floor;
else
return floor + 1;
return log_floor + 1;
}
// Splitting of distance and length codes into prefixes and
@ -78,16 +84,17 @@ static WEBP_INLINE int VP8LBitsLog2Ceiling(uint32_t n) {
static WEBP_INLINE void PrefixEncode(int distance, int* const code,
int* const extra_bits_count,
int* const extra_bits_value) {
// Collect the two most significant bits where the highest bit is 1.
const int highest_bit = BitsLog2Floor(--distance);
// & 0x3f is to make behavior well defined when highest_bit
// does not exist or is the least significant bit.
const int second_highest_bit =
(distance >> ((highest_bit - 1) & 0x3f)) & 1;
*extra_bits_count = (highest_bit > 0) ? (highest_bit - 1) : 0;
*extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
*code = (highest_bit > 0) ? (2 * highest_bit + second_highest_bit)
: (highest_bit == 0) ? 1 : 0;
if (distance > 2) { // Collect the two most significant bits.
const int highest_bit = BitsLog2Floor(--distance);
const int second_highest_bit = (distance >> (highest_bit - 1)) & 1;
*extra_bits_count = highest_bit - 1;
*extra_bits_value = distance & ((1 << *extra_bits_count) - 1);
*code = 2 * highest_bit + second_highest_bit;
} else {
*extra_bits_count = 0;
*extra_bits_value = 0;
*code = (distance == 2) ? 1 : 0;
}
}
// -----------------------------------------------------------------------------

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Coding tools configuration
@ -31,9 +33,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->target_PSNR = 0.;
config->method = 4;
config->sns_strength = 50;
config->filter_strength = 20; // default: light filtering
config->filter_strength = 60; // rather high filtering, helps w/ gradients.
config->filter_sharpness = 0;
config->filter_type = 0; // default: simple
config->filter_type = 1; // default: strong (so U/V is filtered too)
config->partitions = 0;
config->segments = 4;
config->pass = 1;
@ -46,6 +48,9 @@ int WebPConfigInitInternal(WebPConfig* config,
config->alpha_quality = 100;
config->lossless = 0;
config->image_hint = WEBP_HINT_DEFAULT;
config->emulate_jpeg_size = 0;
config->thread_level = 0;
config->low_memory = 0;
// TODO(skal): tune.
switch (preset) {
@ -122,6 +127,12 @@ int WebPValidateConfig(const WebPConfig* config) {
return 0;
if (config->image_hint >= WEBP_HINT_LAST)
return 0;
if (config->emulate_jpeg_size < 0 || config->emulate_jpeg_size > 1)
return 0;
if (config->thread_level < 0 || config->thread_level > 1)
return 0;
if (config->low_memory < 0 || config->low_memory > 1)
return 0;
return 1;
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Cost tables for level and modes
@ -75,7 +77,7 @@ const uint16_t VP8LevelCodes[MAX_VARIABLE_LEVEL][2] = {
// fixed costs for coding levels, deduce from the coding tree.
// This is only the part that doesn't depend on the probability state.
const uint16_t VP8LevelFixedCosts[2048] = {
const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1] = {
0, 256, 256, 256, 256, 432, 618, 630,
731, 640, 640, 828, 901, 948, 1021, 1101,
1174, 1221, 1294, 1042, 1085, 1115, 1158, 1202,
@ -359,7 +361,7 @@ void VP8CalculateLevelCosts(VP8Proba* const proba) {
for (ctype = 0; ctype < NUM_TYPES; ++ctype) {
for (band = 0; band < NUM_BANDS; ++band) {
for(ctx = 0; ctx < NUM_CTX; ++ctx) {
for (ctx = 0; ctx < NUM_CTX; ++ctx) {
const uint8_t* const p = proba->coeffs_[ctype][band][ctx];
uint16_t* const table = proba->level_cost_[ctype][band][ctx];
const int cost_base = VP8BitCost(1, p[1]);

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Cost tables for level and modes.
@ -18,7 +20,8 @@
extern "C" {
#endif
extern const uint16_t VP8LevelFixedCosts[2048]; // approximate cost per level
// approximate cost per level:
extern const uint16_t VP8LevelFixedCosts[MAX_LEVEL + 1];
extern const uint16_t VP8EntropyCost[256]; // 8bit fixed-point log(p)
// Cost of coding one event with probability 'proba'.

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Selecting filter level

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// frame coding and analysis
@ -45,10 +47,10 @@ const uint8_t VP8EncBands[16 + 1] = {
0 // sentinel
};
static const uint8_t kCat3[] = { 173, 148, 140 };
static const uint8_t kCat4[] = { 176, 155, 140, 135 };
static const uint8_t kCat5[] = { 180, 157, 141, 134, 130 };
static const uint8_t kCat6[] =
const uint8_t VP8Cat3[] = { 173, 148, 140 };
const uint8_t VP8Cat4[] = { 176, 155, 140, 135 };
const uint8_t VP8Cat5[] = { 180, 157, 141, 134, 130 };
const uint8_t VP8Cat6[] =
{ 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 };
//------------------------------------------------------------------------------
@ -113,14 +115,15 @@ static int Record(int bit, proba_t* const stats) {
// Note: no need to record the fixed probas.
static int RecordCoeffs(int ctx, const VP8Residual* const res) {
int n = res->first;
proba_t* s = res->stats[VP8EncBands[n]][ctx];
// should be stats[VP8EncBands[n]], but it's equivalent for n=0 or 1
proba_t* s = res->stats[n][ctx];
if (res->last < 0) {
Record(0, s + 0);
return 0;
}
while (n <= res->last) {
int v;
Record(1, s + 0);
Record(1, s + 0); // order of record doesn't matter
while ((v = res->coeffs[n++]) == 0) {
Record(0, s + 1);
s = res->stats[VP8EncBands[n]][0];
@ -174,8 +177,7 @@ static int BranchCost(int nb, int total, int proba) {
return nb * VP8BitCost(1, proba) + (total - nb) * VP8BitCost(0, proba);
}
static int FinalizeTokenProbas(VP8Encoder* const enc) {
VP8Proba* const proba = &enc->proba_;
static int FinalizeTokenProbas(VP8Proba* const proba) {
int has_changed = 0;
int size = 0;
int t, b, c, p;
@ -211,6 +213,47 @@ static int FinalizeTokenProbas(VP8Encoder* const enc) {
return size;
}
//------------------------------------------------------------------------------
// Finalize Segment probability based on the coding tree
static int GetProba(int a, int b) {
const int total = a + b;
return (total == 0) ? 255 // that's the default probability.
: (255 * a + total / 2) / total; // rounded proba
}
static void SetSegmentProbas(VP8Encoder* const enc) {
int p[NUM_MB_SEGMENTS] = { 0 };
int n;
for (n = 0; n < enc->mb_w_ * enc->mb_h_; ++n) {
const VP8MBInfo* const mb = &enc->mb_info_[n];
p[mb->segment_]++;
}
if (enc->pic_->stats != NULL) {
for (n = 0; n < NUM_MB_SEGMENTS; ++n) {
enc->pic_->stats->segment_size[n] = p[n];
}
}
if (enc->segment_hdr_.num_segments_ > 1) {
uint8_t* const probas = enc->proba_.segments_;
probas[0] = GetProba(p[0] + p[1], p[2] + p[3]);
probas[1] = GetProba(p[0], p[1]);
probas[2] = GetProba(p[2], p[3]);
enc->segment_hdr_.update_map_ =
(probas[0] != 255) || (probas[1] != 255) || (probas[2] != 255);
enc->segment_hdr_.size_ =
p[0] * (VP8BitCost(0, probas[0]) + VP8BitCost(0, probas[1])) +
p[1] * (VP8BitCost(0, probas[0]) + VP8BitCost(1, probas[1])) +
p[2] * (VP8BitCost(1, probas[0]) + VP8BitCost(0, probas[2])) +
p[3] * (VP8BitCost(1, probas[0]) + VP8BitCost(1, probas[2]));
} else {
enc->segment_hdr_.update_map_ = 0;
enc->segment_hdr_.size_ = 0;
}
}
//------------------------------------------------------------------------------
// helper functions for residuals struct VP8Residual.
@ -239,18 +282,19 @@ static void SetResidualCoeffs(const int16_t* const coeffs,
//------------------------------------------------------------------------------
// Mode costs
static int GetResidualCost(int ctx, const VP8Residual* const res) {
static int GetResidualCost(int ctx0, const VP8Residual* const res) {
int n = res->first;
int p0 = res->prob[VP8EncBands[n]][ctx][0];
const uint16_t* t = res->cost[VP8EncBands[n]][ctx];
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
int p0 = res->prob[n][ctx0][0];
const uint16_t* t = res->cost[n][ctx0];
int cost;
if (res->last < 0) {
return VP8BitCost(0, p0);
}
cost = 0;
while (n <= res->last) {
const int v = res->coeffs[n];
while (n < res->last) {
int v = res->coeffs[n];
const int b = VP8EncBands[n + 1];
++n;
if (v == 0) {
@ -259,19 +303,28 @@ static int GetResidualCost(int ctx, const VP8Residual* const res) {
t = res->cost[b][0];
continue;
}
v = abs(v);
cost += VP8BitCost(1, p0);
if (2u >= (unsigned int)(v + 1)) { // v = -1 or 1
// short-case for "VP8LevelCost(t, 1)" (256 is VP8LevelFixedCosts[1]):
cost += 256 + t[1];
p0 = res->prob[b][1][0];
t = res->cost[b][1];
} else {
cost += VP8LevelCost(t, abs(v));
p0 = res->prob[b][2][0];
t = res->cost[b][2];
cost += VP8LevelCost(t, v);
{
const int ctx = (v == 1) ? 1 : 2;
p0 = res->prob[b][ctx][0];
t = res->cost[b][ctx];
}
}
// Last coefficient is always non-zero
{
const int v = abs(res->coeffs[n]);
assert(v != 0);
cost += VP8BitCost(1, p0);
cost += VP8LevelCost(t, v);
if (n < 15) {
const int b = VP8EncBands[n + 1];
const int ctx = (v == 1) ? 1 : 2;
const int last_p0 = res->prob[b][ctx][0];
cost += VP8BitCost(0, last_p0);
}
}
if (n < 16) cost += VP8BitCost(0, p0);
return cost;
}
@ -342,7 +395,8 @@ int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd) {
static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
int n = res->first;
const uint8_t* p = res->prob[VP8EncBands[n]][ctx];
// should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1
const uint8_t* p = res->prob[n][ctx];
if (!VP8PutBit(bw, res->last >= 0, p[0])) {
return 0;
}
@ -371,30 +425,30 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // kCat3 (3b)
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
VP8PutBit(bw, 0, p[8]);
VP8PutBit(bw, 0, p[9]);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = kCat3;
} else if (v < 3 + (8 << 2)) { // kCat4 (4b)
tab = VP8Cat3;
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
VP8PutBit(bw, 0, p[8]);
VP8PutBit(bw, 1, p[9]);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = kCat4;
} else if (v < 3 + (8 << 3)) { // kCat5 (5b)
tab = VP8Cat4;
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
VP8PutBit(bw, 1, p[8]);
VP8PutBit(bw, 0, p[10]);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = kCat5;
} else { // kCat6 (11b)
tab = VP8Cat5;
} else { // VP8Cat6 (11b)
VP8PutBit(bw, 1, p[8]);
VP8PutBit(bw, 1, p[10]);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = kCat6;
tab = VP8Cat6;
}
while (mask) {
VP8PutBit(bw, !!(v & mask), *tab++);
@ -411,8 +465,7 @@ static int PutCoeffs(VP8BitWriter* const bw, int ctx, const VP8Residual* res) {
return 1;
}
static void CodeResiduals(VP8BitWriter* const bw,
VP8EncIterator* const it,
static void CodeResiduals(VP8BitWriter* const bw, VP8EncIterator* const it,
const VP8ModeScore* const rd) {
int x, y, ch;
VP8Residual res;
@ -512,146 +565,23 @@ static void RecordResiduals(VP8EncIterator* const it,
//------------------------------------------------------------------------------
// Token buffer
#ifdef USE_TOKEN_BUFFER
#if !defined(DISABLE_TOKEN_BUFFER)
void VP8TBufferInit(VP8TBuffer* const b) {
b->rows_ = NULL;
b->tokens_ = NULL;
b->last_ = &b->rows_;
b->left_ = 0;
b->error_ = 0;
}
int VP8TBufferNewPage(VP8TBuffer* const b) {
VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
if (page == NULL) {
b->error_ = 1;
return 0;
}
*b->last_ = page;
b->last_ = &page->next_;
b->left_ = MAX_NUM_TOKEN;
b->tokens_ = page->tokens_;
return 1;
}
void VP8TBufferClear(VP8TBuffer* const b) {
if (b != NULL) {
const VP8Tokens* p = b->rows_;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
free((void*)p);
p = next;
}
VP8TBufferInit(b);
}
}
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas) {
VP8Tokens* p = b->rows_;
if (b->error_) return 0;
while (p != NULL) {
const int N = (p->next_ == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
VP8PutBit(bw, (p->tokens_[n] >> 15) & 1, probas[p->tokens_[n] & 0x7fff]);
}
p = p->next_;
}
return 1;
}
#define TOKEN_ID(b, ctx, p) ((p) + NUM_PROBAS * ((ctx) + (b) * NUM_CTX))
static int RecordCoeffTokens(int ctx, const VP8Residual* const res,
VP8TBuffer* tokens) {
int n = res->first;
int b = VP8EncBands[n];
if (!VP8AddToken(tokens, res->last >= 0, TOKEN_ID(b, ctx, 0))) {
return 0;
}
while (n < 16) {
const int c = res->coeffs[n++];
const int sign = c < 0;
int v = sign ? -c : c;
const int base_id = TOKEN_ID(b, ctx, 0);
if (!VP8AddToken(tokens, v != 0, base_id + 1)) {
b = VP8EncBands[n];
ctx = 0;
continue;
}
if (!VP8AddToken(tokens, v > 1, base_id + 2)) {
b = VP8EncBands[n];
ctx = 1;
} else {
if (!VP8AddToken(tokens, v > 4, base_id + 3)) {
if (VP8AddToken(tokens, v != 2, base_id + 4))
VP8AddToken(tokens, v == 4, base_id + 5);
} else if (!VP8AddToken(tokens, v > 10, base_id + 6)) {
if (!VP8AddToken(tokens, v > 6, base_id + 7)) {
// VP8AddToken(tokens, v == 6, 159);
} else {
// VP8AddToken(tokens, v >= 9, 165);
// VP8AddToken(tokens, !(v & 1), 145);
}
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // kCat3 (3b)
VP8AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 0, base_id + 9);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = kCat3;
} else if (v < 3 + (8 << 2)) { // kCat4 (4b)
VP8AddToken(tokens, 0, base_id + 8);
VP8AddToken(tokens, 1, base_id + 9);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = kCat4;
} else if (v < 3 + (8 << 3)) { // kCat5 (5b)
VP8AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 0, base_id + 10);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = kCat5;
} else { // kCat6 (11b)
VP8AddToken(tokens, 1, base_id + 8);
VP8AddToken(tokens, 1, base_id + 10);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = kCat6;
}
while (mask) {
// VP8AddToken(tokens, !!(v & mask), *tab++);
mask >>= 1;
}
}
ctx = 2;
}
b = VP8EncBands[n];
// VP8PutBitUniform(bw, sign);
if (n == 16 || !VP8AddToken(tokens, n <= res->last, TOKEN_ID(b, ctx, 0))) {
return 1; // EOB
}
}
return 1;
}
static void RecordTokens(VP8EncIterator* const it,
const VP8ModeScore* const rd, VP8TBuffer tokens[2]) {
static void RecordTokens(VP8EncIterator* const it, const VP8ModeScore* const rd,
VP8TBuffer* const tokens) {
int x, y, ch;
VP8Residual res;
VP8Encoder* const enc = it->enc_;
VP8IteratorNzToBytes(it);
if (it->mb_->type_ == 1) { // i16x16
const int ctx = it->top_nz_[8] + it->left_nz_[8];
InitResidual(0, 1, enc, &res);
SetResidualCoeffs(rd->y_dc_levels, &res);
// TODO(skal): FIX -> it->top_nz_[8] = it->left_nz_[8] =
RecordCoeffTokens(it->top_nz_[8] + it->left_nz_[8], &res, &tokens[0]);
it->top_nz_[8] = it->left_nz_[8] =
VP8RecordCoeffTokens(ctx, 1,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
InitResidual(1, 0, enc, &res);
} else {
InitResidual(0, 3, enc, &res);
@ -663,7 +593,9 @@ static void RecordTokens(VP8EncIterator* const it,
const int ctx = it->top_nz_[x] + it->left_nz_[y];
SetResidualCoeffs(rd->y_ac_levels[x + y * 4], &res);
it->top_nz_[x] = it->left_nz_[y] =
RecordCoeffTokens(ctx, &res, &tokens[0]);
VP8RecordCoeffTokens(ctx, res.coeff_type,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
}
}
@ -675,13 +607,16 @@ static void RecordTokens(VP8EncIterator* const it,
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
SetResidualCoeffs(rd->uv_levels[ch * 2 + x + y * 2], &res);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] =
RecordCoeffTokens(ctx, &res, &tokens[1]);
VP8RecordCoeffTokens(ctx, 2,
res.first, res.last, res.coeffs, tokens);
RecordCoeffs(ctx, &res);
}
}
}
VP8IteratorBytesToNz(it);
}
#endif // USE_TOKEN_BUFFER
#endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// ExtraInfo map / Debug function
@ -697,7 +632,10 @@ static void SetBlock(uint8_t* p, int value, int size) {
#endif
static void ResetSSE(VP8Encoder* const enc) {
memset(enc->sse_, 0, sizeof(enc->sse_));
enc->sse_[0] = 0;
enc->sse_[1] = 0;
enc->sse_[2] = 0;
// Note: enc->sse_[3] is managed by alpha.c
enc->sse_count_ = 0;
}
@ -736,6 +674,7 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
const int b = (int)((it->luma_bits_ + it->uv_bits_ + 7) >> 3);
*info = (b > 255) ? 255 : b; break;
}
case 7: *info = mb->alpha_; break;
default: *info = 0; break;
};
}
@ -746,9 +685,173 @@ static void StoreSideInfo(const VP8EncIterator* const it) {
#endif
}
//------------------------------------------------------------------------------
// StatLoop(): only collect statistics (number of skips, token usage, ...).
// This is used for deciding optimal probabilities. It also modifies the
// quantizer value if some target (size, PNSR) was specified.
#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better
static void SetLoopParams(VP8Encoder* const enc, float q) {
// Make sure the quality parameter is inside valid bounds
if (q < 0.) {
q = 0;
} else if (q > 100.) {
q = 100;
}
VP8SetSegmentParams(enc, q); // setup segment quantizations and filters
SetSegmentProbas(enc); // compute segment probabilities
ResetStats(enc);
ResetTokenStats(enc);
ResetSSE(enc);
}
static int OneStatPass(VP8Encoder* const enc, float q, VP8RDLevel rd_opt,
int nb_mbs, float* const PSNR, int percent_delta) {
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
SetLoopParams(enc, q);
VP8IteratorInit(enc, &it);
do {
VP8ModeScore info;
VP8IteratorImport(&it);
if (VP8Decimate(&it, &info, rd_opt)) {
// Just record the number of skips and act like skip_proba is not used.
enc->proba_.nb_skip_++;
}
RecordResiduals(&it, &info);
size += info.R;
distortion += info.D;
if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
return 0;
} while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(&enc->proba_);
size += enc->segment_hdr_.size_;
size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
if (PSNR) {
*PSNR = (float)(10.* log10(255. * 255. * pixel_count / distortion));
}
return (int)size;
}
// successive refinement increments.
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
static int StatLoop(VP8Encoder* const enc) {
const int method = enc->method_;
const int do_search = enc->do_search_;
const int fast_probe = ((method == 0 || method == 3) && !do_search);
float q = enc->config_->quality;
const int max_passes = enc->config_->pass;
const int task_percent = 20;
const int percent_per_pass = (task_percent + max_passes / 2) / max_passes;
const int final_percent = enc->percent_ + task_percent;
int pass;
int nb_mbs;
// Fast mode: quick analysis pass over few mbs. Better than nothing.
nb_mbs = enc->mb_w_ * enc->mb_h_;
if (fast_probe) {
if (method == 3) { // we need more stats for method 3 to be reliable.
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 1 : 100;
} else {
nb_mbs = (nb_mbs > 200) ? nb_mbs >> 2 : 50;
}
}
// No target size: just do several pass without changing 'q'
if (!do_search) {
for (pass = 0; pass < max_passes; ++pass) {
const VP8RDLevel rd_opt = (method >= 3) ? RD_OPT_BASIC : RD_OPT_NONE;
if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
return 0;
}
}
} else {
// binary search for a size close to target
for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
float PSNR;
int criterion;
const int size = OneStatPass(enc, q, RD_OPT_BASIC, nb_mbs, &PSNR,
percent_per_pass);
#if DEBUG_SEARCH
printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
#endif
if (size == 0) return 0;
if (enc->config_->target_PSNR > 0) {
criterion = (PSNR < enc->config_->target_PSNR);
} else {
criterion = (size < enc->config_->target_size);
}
// dichotomize
if (criterion) {
q += dqs[pass];
} else {
q -= dqs[pass];
}
}
}
VP8CalculateLevelCosts(&enc->proba_); // finalize costs
return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
}
//------------------------------------------------------------------------------
// Main loops
//
static const int kAverageBytesPerMB[8] = { 50, 24, 16, 9, 7, 5, 3, 2 };
static int PreLoopInitialize(VP8Encoder* const enc) {
int p;
int ok = 1;
const int average_bytes_per_MB = kAverageBytesPerMB[enc->base_quant_ >> 4];
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * average_bytes_per_MB / enc->num_parts_;
// Initialize the bit-writers
for (p = 0; ok && p < enc->num_parts_; ++p) {
ok = VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
if (!ok) VP8EncFreeBitWriters(enc); // malloc error occurred
return ok;
}
static int PostLoopFinalize(VP8EncIterator* const it, int ok) {
VP8Encoder* const enc = it->enc_;
if (ok) { // Finalize the partitions, check for extra errors.
int p;
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
int i, s;
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it->bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
}
//------------------------------------------------------------------------------
// VP8EncLoop(): does the final bitstream coding.
static void ResetAfterSkip(VP8EncIterator* const it) {
@ -761,27 +864,19 @@ static void ResetAfterSkip(VP8EncIterator* const it) {
}
int VP8EncLoop(VP8Encoder* const enc) {
int i, s, p;
int ok = 1;
VP8EncIterator it;
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const int rd_opt = enc->rd_opt_level_;
const int kAverageBytesPerMB = 5; // TODO: have a kTable[quality/10]
const int bytes_per_parts =
enc->mb_w_ * enc->mb_h_ * kAverageBytesPerMB / enc->num_parts_;
int ok = PreLoopInitialize(enc);
if (!ok) return 0;
// Initialize the bit-writers
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterInit(enc->parts_ + p, bytes_per_parts);
}
ResetStats(enc);
ResetSSE(enc);
StatLoop(enc); // stats-collection loop
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
const int dont_use_skip = !enc->proba_.use_skip_proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
VP8IteratorImport(&it);
// Warning! order is important: first call VP8Decimate() and
// *then* decide how to code the skip decision if there's one.
@ -801,137 +896,82 @@ int VP8EncLoop(VP8Encoder* const enc) {
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
if (ok) { // Finalize the partitions, check for extra errors.
for (p = 0; p < enc->num_parts_; ++p) {
VP8BitWriterFinish(enc->parts_ + p);
ok &= !enc->parts_[p].error_;
}
}
if (ok) { // All good. Finish up.
if (enc->pic_->stats) { // finalize byte counters...
for (i = 0; i <= 2; ++i) {
for (s = 0; s < NUM_MB_SEGMENTS; ++s) {
enc->residual_bytes_[i][s] = (int)((it.bit_count_[s][i] + 7) >> 3);
}
}
}
VP8AdjustFilterStrength(&it); // ...and store filter stats.
} else {
// Something bad happened -> need to do some memory cleanup.
VP8EncFreeBitWriters(enc);
}
return ok;
return PostLoopFinalize(&it, ok);
}
//------------------------------------------------------------------------------
// VP8StatLoop(): only collect statistics (number of skips, token usage, ...)
// This is used for deciding optimal probabilities. It also
// modifies the quantizer value if some target (size, PNSR)
// was specified.
// Single pass using Token Buffer.
#define kHeaderSizeEstimate (15 + 20 + 10) // TODO: fix better
#if !defined(DISABLE_TOKEN_BUFFER)
static int OneStatPass(VP8Encoder* const enc, float q, int rd_opt, int nb_mbs,
float* const PSNR, int percent_delta) {
#define MIN_COUNT 96 // minimum number of macroblocks before updating stats
int VP8EncTokenLoop(VP8Encoder* const enc) {
int ok;
// Roughly refresh the proba height times per pass
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
int cnt;
VP8EncIterator it;
uint64_t size = 0;
uint64_t distortion = 0;
const uint64_t pixel_count = nb_mbs * 384;
VP8Proba* const proba = &enc->proba_;
const VP8RDLevel rd_opt = enc->rd_opt_level_;
// Make sure the quality parameter is inside valid bounds
if (q < 0.) {
q = 0;
} else if (q > 100.) {
q = 100;
}
if (max_count < MIN_COUNT) max_count = MIN_COUNT;
cnt = max_count;
VP8SetSegmentParams(enc, q); // setup segment quantizations and filters
assert(enc->num_parts_ == 1);
assert(enc->use_tokens_);
assert(proba->use_skip_proba_ == 0);
assert(rd_opt >= RD_OPT_BASIC); // otherwise, token-buffer won't be useful
assert(!enc->do_search_); // TODO(skal): handle pass and dichotomy
ResetStats(enc);
ResetTokenStats(enc);
SetLoopParams(enc, enc->config_->quality);
ok = PreLoopInitialize(enc);
if (!ok) return 0;
VP8IteratorInit(enc, &it);
VP8InitFilter(&it);
do {
VP8ModeScore info;
VP8IteratorImport(&it);
if (VP8Decimate(&it, &info, rd_opt)) {
// Just record the number of skips and act like skip_proba is not used.
enc->proba_.nb_skip_++;
if (--cnt < 0) {
FinalizeTokenProbas(proba);
VP8CalculateLevelCosts(proba); // refresh cost tables for rd-opt
cnt = max_count;
}
RecordResiduals(&it, &info);
size += info.R;
distortion += info.D;
if (percent_delta && !VP8IteratorProgress(&it, percent_delta))
return 0;
} while (VP8IteratorNext(&it, it.yuv_out_) && --nb_mbs > 0);
size += FinalizeSkipProba(enc);
size += FinalizeTokenProbas(enc);
size += enc->segment_hdr_.size_;
size = ((size + 1024) >> 11) + kHeaderSizeEstimate;
if (PSNR) {
*PSNR = (float)(10.* log10(255. * 255. * pixel_count / distortion));
}
return (int)size;
}
// successive refinement increments.
static const int dqs[] = { 20, 15, 10, 8, 6, 4, 2, 1, 0 };
int VP8StatLoop(VP8Encoder* const enc) {
const int do_search =
(enc->config_->target_size > 0 || enc->config_->target_PSNR > 0);
const int fast_probe = (enc->method_ < 2 && !do_search);
float q = enc->config_->quality;
const int max_passes = enc->config_->pass;
const int task_percent = 20;
const int percent_per_pass = (task_percent + max_passes / 2) / max_passes;
const int final_percent = enc->percent_ + task_percent;
int pass;
int nb_mbs;
// Fast mode: quick analysis pass over few mbs. Better than nothing.
nb_mbs = enc->mb_w_ * enc->mb_h_;
if (fast_probe && nb_mbs > 100) nb_mbs = 100;
// No target size: just do several pass without changing 'q'
if (!do_search) {
for (pass = 0; pass < max_passes; ++pass) {
const int rd_opt = (enc->method_ > 2);
if (!OneStatPass(enc, q, rd_opt, nb_mbs, NULL, percent_per_pass)) {
return 0;
}
VP8Decimate(&it, &info, rd_opt);
RecordTokens(&it, &info, &enc->tokens_);
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (enc->use_layer_) {
VP8EncCodeLayerBlock(&it);
}
} else {
// binary search for a size close to target
for (pass = 0; pass < max_passes && (dqs[pass] > 0); ++pass) {
const int rd_opt = 1;
float PSNR;
int criterion;
const int size = OneStatPass(enc, q, rd_opt, nb_mbs, &PSNR,
percent_per_pass);
#if DEBUG_SEARCH
printf("#%d size=%d PSNR=%.2f q=%.2f\n", pass, size, PSNR, q);
#endif
if (!size) return 0;
if (enc->config_->target_PSNR > 0) {
criterion = (PSNR < enc->config_->target_PSNR);
} else {
criterion = (size < enc->config_->target_size);
}
// dichotomize
if (criterion) {
q += dqs[pass];
} else {
q -= dqs[pass];
}
}
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
} while (ok && VP8IteratorNext(&it, it.yuv_out_));
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
if (ok) {
FinalizeTokenProbas(proba);
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
(const uint8_t*)proba->coeffs_, 1);
}
return WebPReportProgress(enc->pic_, final_percent, &enc->percent_);
return PostLoopFinalize(&it, ok);
}
#else
int VP8EncTokenLoop(VP8Encoder* const enc) {
(void)enc;
return 0; // we shouldn't be here.
}
#endif // DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
#if defined(__cplusplus) || defined(c_plusplus)

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
@ -98,8 +100,6 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
}
}
static double BitsEntropy(const int* const array, int n) {
double retval = 0.;
int sum = 0;
@ -149,25 +149,6 @@ static double BitsEntropy(const int* const array, int n) {
}
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
double retval = BitsEntropy(&p->literal_[0], VP8LHistogramNumCodes(p))
+ BitsEntropy(&p->red_[0], 256)
+ BitsEntropy(&p->blue_[0], 256)
+ BitsEntropy(&p->alpha_[0], 256)
+ BitsEntropy(&p->distance_[0], NUM_DISTANCE_CODES);
// Compute the extra bits cost.
int i;
for (i = 2; i < NUM_LENGTH_CODES - 2; ++i) {
retval +=
(i >> 1) * p->literal_[256 + i + 2];
}
for (i = 2; i < NUM_DISTANCE_CODES - 2; ++i) {
retval += (i >> 1) * p->distance_[i + 2];
}
return retval;
}
// Returns the cost encode the rle-encoded entropy code.
// The constants in this function are experimental.
static double HuffmanCost(const int* const population, int length) {
@ -207,19 +188,150 @@ static double HuffmanCost(const int* const population, int length) {
return retval;
}
// Estimates the Huffman dictionary + other block overhead size.
static double HistogramEstimateBitsHeader(const VP8LHistogram* const p) {
return HuffmanCost(&p->alpha_[0], 256) +
HuffmanCost(&p->red_[0], 256) +
HuffmanCost(&p->literal_[0], VP8LHistogramNumCodes(p)) +
HuffmanCost(&p->blue_[0], 256) +
HuffmanCost(&p->distance_[0], NUM_DISTANCE_CODES);
static double PopulationCost(const int* const population, int length) {
return BitsEntropy(population, length) + HuffmanCost(population, length);
}
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return HistogramEstimateBitsHeader(p) + VP8LHistogramEstimateBitsBulk(p);
static double ExtraCost(const int* const population, int length) {
int i;
double cost = 0.;
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
return cost;
}
// Estimates the Entropy + Huffman + other block overhead size cost.
double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
return PopulationCost(p->literal_, VP8LHistogramNumCodes(p))
+ PopulationCost(p->red_, 256)
+ PopulationCost(p->blue_, 256)
+ PopulationCost(p->alpha_, 256)
+ PopulationCost(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
return BitsEntropy(p->literal_, VP8LHistogramNumCodes(p))
+ BitsEntropy(p->red_, 256)
+ BitsEntropy(p->blue_, 256)
+ BitsEntropy(p->alpha_, 256)
+ BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
+ ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
+ ExtraCost(p->distance_, NUM_DISTANCE_CODES);
}
// -----------------------------------------------------------------------------
// Various histogram combine/cost-eval functions
// Adds 'in' histogram to 'out'
static void HistogramAdd(const VP8LHistogram* const in,
VP8LHistogram* const out) {
int i;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
out->literal_[i] += in->literal_[i];
}
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
out->distance_[i] += in->distance_[i];
}
for (i = 0; i < 256; ++i) {
out->red_[i] += in->red_[i];
out->blue_[i] += in->blue_[i];
out->alpha_[i] += in->alpha_[i];
}
}
// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
// to the threshold value 'cost_threshold'. The score returned is
// Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
// Since the previous score passed is 'cost_threshold', we only need to compare
// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
// early.
static double HistogramAddEval(const VP8LHistogram* const a,
const VP8LHistogram* const b,
VP8LHistogram* const out,
double cost_threshold) {
double cost = 0;
const double sum_cost = a->bit_cost_ + b->bit_cost_;
int i;
cost_threshold += sum_cost;
// palette_code_bits_ is part of the cost evaluation for literal_.
// TODO(skal): remove/simplify this palette_code_bits_?
out->palette_code_bits_ =
(a->palette_code_bits_ > b->palette_code_bits_) ? a->palette_code_bits_ :
b->palette_code_bits_;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
out->literal_[i] = a->literal_[i] + b->literal_[i];
}
cost += PopulationCost(out->literal_, VP8LHistogramNumCodes(out));
cost += ExtraCost(out->literal_ + 256, NUM_LENGTH_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->red_[i] = a->red_[i] + b->red_[i];
cost += PopulationCost(out->red_, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->blue_[i] = a->blue_[i] + b->blue_[i];
cost += PopulationCost(out->blue_, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
out->distance_[i] = a->distance_[i] + b->distance_[i];
}
cost += PopulationCost(out->distance_, NUM_DISTANCE_CODES);
cost += ExtraCost(out->distance_, NUM_DISTANCE_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
cost += PopulationCost(out->alpha_, 256);
out->bit_cost_ = cost;
return cost - sum_cost;
}
// Same as HistogramAddEval(), except that the resulting histogram
// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
// the term C(b) which is constant over all the evaluations.
static double HistogramAddThresh(const VP8LHistogram* const a,
const VP8LHistogram* const b,
double cost_threshold) {
int tmp[PIX_OR_COPY_CODES_MAX]; // <= max storage we'll need
int i;
double cost = -a->bit_cost_;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
tmp[i] = a->literal_[i] + b->literal_[i];
}
// note that the tests are ordered so that the usually largest
// cost shares come first.
cost += PopulationCost(tmp, VP8LHistogramNumCodes(a));
cost += ExtraCost(tmp + 256, NUM_LENGTH_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->red_[i] + b->red_[i];
cost += PopulationCost(tmp, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->blue_[i] + b->blue_[i];
cost += PopulationCost(tmp, 256);
if (cost > cost_threshold) return cost;
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
tmp[i] = a->distance_[i] + b->distance_[i];
}
cost += PopulationCost(tmp, NUM_DISTANCE_CODES);
cost += ExtraCost(tmp, NUM_DISTANCE_CODES);
if (cost > cost_threshold) return cost;
for (i = 0; i < 256; ++i) tmp[i] = a->alpha_[i] + b->alpha_[i];
cost += PopulationCost(tmp, 256);
return cost;
}
// -----------------------------------------------------------------------------
static void HistogramBuildImage(int xsize, int histo_bits,
const VP8LBackwardRefs* const backward_refs,
VP8LHistogramSet* const image) {
@ -249,14 +361,15 @@ static uint32_t MyRand(uint32_t *seed) {
}
static int HistogramCombine(const VP8LHistogramSet* const in,
VP8LHistogramSet* const out, int num_pairs) {
VP8LHistogramSet* const out, int iter_mult,
int num_pairs, int num_tries_no_success) {
int ok = 0;
int i, iter;
uint32_t seed = 0;
int tries_with_no_success = 0;
const int min_cluster_size = 2;
int out_size = in->size;
const int outer_iters = in->size * 3;
const int outer_iters = in->size * iter_mult;
const int min_cluster_size = 2;
VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
VP8LHistogram* cur_combo = histos + 0; // trial merged histogram
VP8LHistogram* best_combo = histos + 1; // best merged histogram so far
@ -271,29 +384,26 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
// Collapse similar histograms in 'out'.
for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
// We pick the best pair to be combined out of 'inner_iters' pairs.
double best_cost_diff = 0.;
int best_idx1 = 0, best_idx2 = 1;
int best_idx1 = -1, best_idx2 = 1;
int j;
const int num_tries = (num_pairs < out_size) ? num_pairs : out_size;
seed += iter;
for (j = 0; j < num_pairs; ++j) {
for (j = 0; j < num_tries; ++j) {
double curr_cost_diff;
// Choose two histograms at random and try to combine them.
const uint32_t idx1 = MyRand(&seed) % out_size;
const uint32_t tmp = ((j & 7) + 1) % (out_size - 1);
const uint32_t tmp = (j & 7) + 1;
const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
const uint32_t idx2 = (idx1 + diff + 1) % out_size;
if (idx1 == idx2) {
continue;
}
*cur_combo = *out->histograms[idx1];
VP8LHistogramAdd(cur_combo, out->histograms[idx2]);
cur_combo->bit_cost_ = VP8LHistogramEstimateBits(cur_combo);
// Calculate cost reduction on combining.
curr_cost_diff = cur_combo->bit_cost_
- out->histograms[idx1]->bit_cost_
- out->histograms[idx2]->bit_cost_;
if (best_cost_diff > curr_cost_diff) { // found a better pair?
curr_cost_diff = HistogramAddEval(out->histograms[idx1],
out->histograms[idx2],
cur_combo, best_cost_diff);
if (curr_cost_diff < best_cost_diff) { // found a better pair?
{ // swap cur/best combo histograms
VP8LHistogram* const tmp_histo = cur_combo;
cur_combo = best_combo;
@ -305,7 +415,7 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
}
}
if (best_cost_diff < 0.0) {
if (best_idx1 >= 0) {
*out->histograms[best_idx1] = *best_combo;
// swap best_idx2 slot with last one (which is now unused)
--out_size;
@ -315,7 +425,7 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
}
tries_with_no_success = 0;
}
if (++tries_with_no_success >= 50) {
if (++tries_with_no_success >= num_tries_no_success) {
break;
}
}
@ -330,20 +440,11 @@ static int HistogramCombine(const VP8LHistogramSet* const in,
// -----------------------------------------------------------------------------
// Histogram refinement
// What is the bit cost of moving square_histogram from
// cur_symbol to candidate_symbol.
// TODO(skal): we don't really need to copy the histogram and Add(). Instead
// we just need VP8LDualHistogramEstimateBits(A, B) estimation function.
// What is the bit cost of moving square_histogram from cur_symbol to candidate.
static double HistogramDistance(const VP8LHistogram* const square_histogram,
const VP8LHistogram* const candidate) {
const double previous_bit_cost = candidate->bit_cost_;
double new_bit_cost;
VP8LHistogram modified_histo;
modified_histo = *candidate;
VP8LHistogramAdd(&modified_histo, square_histogram);
new_bit_cost = VP8LHistogramEstimateBits(&modified_histo);
return new_bit_cost - previous_bit_cost;
const VP8LHistogram* const candidate,
double cost_threshold) {
return HistogramAddThresh(candidate, square_histogram, cost_threshold);
}
// Find the best 'out' histogram for each of the 'in' histograms.
@ -354,11 +455,12 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
int i;
for (i = 0; i < in->size; ++i) {
int best_out = 0;
double best_bits = HistogramDistance(in->histograms[i], out->histograms[0]);
double best_bits =
HistogramDistance(in->histograms[i], out->histograms[0], 1.e38);
int k;
for (k = 1; k < out->size; ++k) {
const double cur_bits =
HistogramDistance(in->histograms[i], out->histograms[k]);
HistogramDistance(in->histograms[i], out->histograms[k], best_bits);
if (cur_bits < best_bits) {
best_bits = cur_bits;
best_out = k;
@ -372,7 +474,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in,
HistogramClear(out->histograms[i]);
}
for (i = 0; i < in->size; ++i) {
VP8LHistogramAdd(out->histograms[symbols[i]], in->histograms[i]);
HistogramAdd(in->histograms[i], out->histograms[symbols[i]]);
}
}
@ -384,8 +486,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
int ok = 0;
const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
const int num_histo_pairs = 10 + quality / 2; // For HistogramCombine().
const int histo_image_raw_size = histo_xsize * histo_ysize;
// Heuristic params for HistogramCombine().
const int num_tries_no_success = 8 + (quality >> 1);
const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
const int num_pairs = (quality < 25) ? 10 : (5 * quality) >> 3;
VP8LHistogramSet* const image_out =
VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
if (image_out == NULL) return 0;
@ -393,7 +500,8 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize,
// Build histogram image.
HistogramBuildImage(xsize, histo_bits, refs, image_out);
// Collapse similar histograms.
if (!HistogramCombine(image_out, image_in, num_histo_pairs)) {
if (!HistogramCombine(image_out, image_in, iter_mult, num_pairs,
num_tries_no_success)) {
goto Error;
}
// Find the optimal map from original histograms to the final ones.

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Author: Jyrki Alakuijala (jyrki@google.com)
@ -80,22 +82,6 @@ double VP8LHistogramEstimateBits(const VP8LHistogram* const p);
// represent the entropy code itself.
double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p);
static WEBP_INLINE void VP8LHistogramAdd(VP8LHistogram* const p,
const VP8LHistogram* const a) {
int i;
for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
p->literal_[i] += a->literal_[i];
}
for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
p->distance_[i] += a->distance_[i];
}
for (i = 0; i < 256; ++i) {
p->red_[i] += a->red_[i];
p->blue_[i] += a->blue_[i];
p->alpha_[i] += a->alpha_[i];
}
}
static WEBP_INLINE int VP8LHistogramNumCodes(const VP8LHistogram* const p) {
return 256 + NUM_LENGTH_CODES +
((p->palette_code_bits_ > 0) ? (1 << p->palette_code_bits_) : 0);

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// VP8Iterator: block iterator

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Enhancement layer (for YUV444/422)

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// WebPPicture utils: colorspace conversion, crop, ...
@ -290,8 +292,11 @@ int WebPPictureView(const WebPPicture* src,
dst->y = src->y + top * src->y_stride + left;
dst->u = src->u + (top >> 1) * src->uv_stride + (left >> 1);
dst->v = src->v + (top >> 1) * src->uv_stride + (left >> 1);
dst->y_stride = src->y_stride;
dst->uv_stride = src->uv_stride;
if (src->a != NULL) {
dst->a = src->a + top * src->a_stride + left;
dst->a_stride = src->a_stride;
}
#ifdef WEBP_EXPERIMENTAL_FEATURES
if (src->u0 != NULL) {
@ -299,10 +304,12 @@ int WebPPictureView(const WebPPicture* src,
IS_YUV_CSP(dst->colorspace, WEBP_YUV422) ? (left >> 1) : left;
dst->u0 = src->u0 + top * src->uv0_stride + left_pos;
dst->v0 = src->v0 + top * src->uv0_stride + left_pos;
dst->uv0_stride = src->uv0_stride;
}
#endif
} else {
dst->argb = src->argb + top * src->argb_stride + left;
dst->argb_stride = src->argb_stride;
}
return 1;
}
@ -457,7 +464,6 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) {
(uint8_t*)tmp.argb, width, height,
tmp.argb_stride * 4,
work, 4);
}
WebPPictureFree(pic);
free(work);
@ -705,7 +711,7 @@ static int Import(WebPPicture* const picture,
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const int offset = step * x + y * rgb_stride;
const uint32_t argb = (a_ptr[offset] << 24) |
const uint32_t argb = ((uint32_t)a_ptr[offset] << 24) |
(r_ptr[offset] << 16) |
(g_ptr[offset] << 8) |
(b_ptr[offset]);
@ -801,11 +807,11 @@ int WebPPictureYUVAToARGB(WebPPicture* picture) {
// Insert alpha values if needed, in replacement for the default 0xff ones.
if (picture->colorspace & WEBP_CSP_ALPHA_BIT) {
for (y = 0; y < height; ++y) {
uint32_t* const dst = picture->argb + y * picture->argb_stride;
uint32_t* const argb_dst = picture->argb + y * picture->argb_stride;
const uint8_t* const src = picture->a + y * picture->a_stride;
int x;
for (x = 0; x < width; ++x) {
dst[x] = (dst[x] & 0x00ffffffu) | (src[x] << 24);
argb_dst[x] = (argb_dst[x] & 0x00ffffffu) | ((uint32_t)src[x] << 24);
}
}
}
@ -906,67 +912,135 @@ void WebPCleanupTransparentArea(WebPPicture* pic) {
#undef SIZE
#undef SIZE2
//------------------------------------------------------------------------------
// local-min distortion
//
// For every pixel in the *reference* picture, we search for the local best
// match in the compressed image. This is not a symmetrical measure.
// search radius. Shouldn't be too large.
#define RADIUS 2
static float AccumulateLSIM(const uint8_t* src, int src_stride,
const uint8_t* ref, int ref_stride,
int w, int h) {
int x, y;
double total_sse = 0.;
for (y = 0; y < h; ++y) {
const int y_0 = (y - RADIUS < 0) ? 0 : y - RADIUS;
const int y_1 = (y + RADIUS + 1 >= h) ? h : y + RADIUS + 1;
for (x = 0; x < w; ++x) {
const int x_0 = (x - RADIUS < 0) ? 0 : x - RADIUS;
const int x_1 = (x + RADIUS + 1 >= w) ? w : x + RADIUS + 1;
double best_sse = 255. * 255.;
const double value = (double)ref[y * ref_stride + x];
int i, j;
for (j = y_0; j < y_1; ++j) {
const uint8_t* s = src + j * src_stride;
for (i = x_0; i < x_1; ++i) {
const double sse = (double)(s[i] - value) * (s[i] - value);
if (sse < best_sse) best_sse = sse;
}
}
total_sse += best_sse;
}
}
return (float)total_sse;
}
#undef RADIUS
//------------------------------------------------------------------------------
// Distortion
// Max value returned in case of exact similarity.
static const double kMinDistortion_dB = 99.;
static float GetPSNR(const double v) {
return (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
: kMinDistortion_dB);
}
int WebPPictureDistortion(const WebPPicture* pic1, const WebPPicture* pic2,
int WebPPictureDistortion(const WebPPicture* src, const WebPPicture* ref,
int type, float result[5]) {
int c;
DistoStats stats[5];
int has_alpha;
int uv_w, uv_h;
if (pic1 == NULL || pic2 == NULL ||
pic1->width != pic2->width || pic1->height != pic2->height ||
pic1->y == NULL || pic2->y == NULL ||
pic1->u == NULL || pic2->u == NULL ||
pic1->v == NULL || pic2->v == NULL ||
if (src == NULL || ref == NULL ||
src->width != ref->width || src->height != ref->height ||
src->y == NULL || ref->y == NULL ||
src->u == NULL || ref->u == NULL ||
src->v == NULL || ref->v == NULL ||
result == NULL) {
return 0;
}
// TODO(skal): provide distortion for ARGB too.
if (pic1->use_argb == 1 || pic1->use_argb != pic2->use_argb) {
if (src->use_argb == 1 || src->use_argb != ref->use_argb) {
return 0;
}
has_alpha = !!(pic1->colorspace & WEBP_CSP_ALPHA_BIT);
if (has_alpha != !!(pic2->colorspace & WEBP_CSP_ALPHA_BIT) ||
(has_alpha && (pic1->a == NULL || pic2->a == NULL))) {
has_alpha = !!(src->colorspace & WEBP_CSP_ALPHA_BIT);
if (has_alpha != !!(ref->colorspace & WEBP_CSP_ALPHA_BIT) ||
(has_alpha && (src->a == NULL || ref->a == NULL))) {
return 0;
}
memset(stats, 0, sizeof(stats));
VP8SSIMAccumulatePlane(pic1->y, pic1->y_stride,
pic2->y, pic2->y_stride,
pic1->width, pic1->height, &stats[0]);
VP8SSIMAccumulatePlane(pic1->u, pic1->uv_stride,
pic2->u, pic2->uv_stride,
(pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
&stats[1]);
VP8SSIMAccumulatePlane(pic1->v, pic1->uv_stride,
pic2->v, pic2->uv_stride,
(pic1->width + 1) >> 1, (pic1->height + 1) >> 1,
&stats[2]);
if (has_alpha) {
VP8SSIMAccumulatePlane(pic1->a, pic1->a_stride,
pic2->a, pic2->a_stride,
pic1->width, pic1->height, &stats[3]);
}
for (c = 0; c <= 4; ++c) {
if (type == 1) {
const double v = VP8SSIMGet(&stats[c]);
result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
: kMinDistortion_dB);
} else {
const double v = VP8SSIMGetSquaredError(&stats[c]);
result[c] = (float)((v > 0.) ? -4.3429448 * log(v / (255 * 255.))
: kMinDistortion_dB);
uv_w = HALVE(src->width);
uv_h = HALVE(src->height);
if (type >= 2) {
float sse[4];
sse[0] = AccumulateLSIM(src->y, src->y_stride,
ref->y, ref->y_stride, src->width, src->height);
sse[1] = AccumulateLSIM(src->u, src->uv_stride,
ref->u, ref->uv_stride, uv_w, uv_h);
sse[2] = AccumulateLSIM(src->v, src->uv_stride,
ref->v, ref->uv_stride, uv_w, uv_h);
sse[3] = has_alpha ? AccumulateLSIM(src->a, src->a_stride,
ref->a, ref->a_stride,
src->width, src->height)
: 0.f;
result[0] = GetPSNR(sse[0] / (src->width * src->height));
result[1] = GetPSNR(sse[1] / (uv_w * uv_h));
result[2] = GetPSNR(sse[2] / (uv_w * uv_h));
result[3] = GetPSNR(sse[3] / (src->width * src->height));
{
double total_sse = sse[0] + sse[1] + sse[2];
int total_pixels = src->width * src->height + 2 * uv_w * uv_h;
if (has_alpha) {
total_pixels += src->width * src->height;
total_sse += sse[3];
}
result[4] = GetPSNR(total_sse / total_pixels);
}
} else {
int c;
VP8SSIMAccumulatePlane(src->y, src->y_stride,
ref->y, ref->y_stride,
src->width, src->height, &stats[0]);
VP8SSIMAccumulatePlane(src->u, src->uv_stride,
ref->u, ref->uv_stride,
uv_w, uv_h, &stats[1]);
VP8SSIMAccumulatePlane(src->v, src->uv_stride,
ref->v, ref->uv_stride,
uv_w, uv_h, &stats[2]);
if (has_alpha) {
VP8SSIMAccumulatePlane(src->a, src->a_stride,
ref->a, ref->a_stride,
src->width, src->height, &stats[3]);
}
for (c = 0; c <= 4; ++c) {
if (type == 1) {
const double v = VP8SSIMGet(&stats[c]);
result[c] = (float)((v < 1.) ? -10.0 * log10(1. - v)
: kMinDistortion_dB);
} else {
const double v = VP8SSIMGetSquaredError(&stats[c]);
result[c] = GetPSNR(v);
}
// Accumulate forward
if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
}
// Accumulate forward
if (c < 4) VP8SSIMAddStats(&stats[c], &stats[4]);
}
return 1;
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Quantization
@ -27,6 +29,8 @@
#define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
// power-law modulation. Must be strictly less than 1.
#define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
#define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
#if defined(__cplusplus) || defined(c_plusplus)
@ -224,28 +228,90 @@ static void SetupFilterStrength(VP8Encoder* const enc) {
// We want to emulate jpeg-like behaviour where the expected "good" quality
// is around q=75. Internally, our "good" middle is around c=50. So we
// map accordingly using linear piece-wise function
static double QualityToCompression(double q) {
const double c = q / 100.;
return (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
static double QualityToCompression(double c) {
const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
// The file size roughly scales as pow(quantizer, 3.). Actually, the
// exponent is somewhere between 2.8 and 3.2, but we're mostly interested
// in the mid-quant range. So we scale the compressibility inversely to
// this power-law: quant ~= compression ^ 1/3. This law holds well for
// low quant. Finer modelling for high-quant would make use of kAcTable[]
// more explicitly.
const double v = pow(linear_c, 1 / 3.);
return v;
}
static double QualityToJPEGCompression(double c, double alpha) {
// We map the complexity 'alpha' and quality setting 'c' to a compression
// exponent empirically matched to the compression curve of libjpeg6b.
// On average, the WebP output size will be roughly similar to that of a
// JPEG file compressed with same quality factor.
const double amin = 0.30;
const double amax = 0.85;
const double exp_min = 0.4;
const double exp_max = 0.9;
const double slope = (exp_min - exp_max) / (amax - amin);
// Linearly interpolate 'expn' from exp_min to exp_max
// in the [amin, amax] range.
const double expn = (alpha > amax) ? exp_min
: (alpha < amin) ? exp_max
: exp_max + slope * (alpha - amin);
const double v = pow(c, expn);
return v;
}
static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
const VP8SegmentInfo* const S2) {
return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
}
static void SimplifySegments(VP8Encoder* const enc) {
int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
const int num_segments = enc->segment_hdr_.num_segments_;
int num_final_segments = 1;
int s1, s2;
for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
int found = 0;
// check if we already have similar segment
for (s2 = 0; s2 < num_final_segments; ++s2) {
const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
if (SegmentsAreEquivalent(S1, S2)) {
found = 1;
break;
}
}
map[s1] = s2;
if (!found) {
if (num_final_segments != s1) {
enc->dqm_[num_final_segments] = enc->dqm_[s1];
}
++num_final_segments;
}
}
if (num_final_segments < num_segments) { // Remap
int i = enc->mb_w_ * enc->mb_h_;
while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
enc->segment_hdr_.num_segments_ = num_final_segments;
// Replicate the trailing segment infos (it's mostly cosmetics)
for (i = num_final_segments; i < num_segments; ++i) {
enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
}
}
}
void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
int i;
int dq_uv_ac, dq_uv_dc;
const int num_segments = enc->config_->segments;
const int num_segments = enc->segment_hdr_.num_segments_;
const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
const double c_base = QualityToCompression(quality);
const double Q = quality / 100.;
const double c_base = enc->config_->emulate_jpeg_size ?
QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
QualityToCompression(Q);
for (i = 0; i < num_segments; ++i) {
// The file size roughly scales as pow(quantizer, 3.). Actually, the
// exponent is somewhere between 2.8 and 3.2, but we're mostly interested
// in the mid-quant range. So we scale the compressibility inversely to
// this power-law: quant ~= compression ^ 1/3. This law holds well for
// low quant. Finer modelling for high-quant would make use of kAcTable[]
// more explicitely.
// Additionally, we modulate the base exponent 1/3 to accommodate for the
// quantization susceptibility and allow denser segments to be quantized
// more.
const double expn = (1. - amp * enc->dqm_[i].alpha_) / 3.;
// We modulate the base coefficient to accommodate for the quantization
// susceptibility and allow denser segments to be quantized more.
const double expn = 1. - amp * enc->dqm_[i].alpha_;
const double c = pow(c_base, expn);
const int q = (int)(127. * (1. - c));
assert(expn > 0.);
@ -281,9 +347,11 @@ void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
enc->dq_uv_dc_ = dq_uv_dc;
enc->dq_uv_ac_ = dq_uv_ac;
SetupMatrices(enc);
SetupFilterStrength(enc); // initialize segments' filtering, eventually
if (num_segments > 1) SimplifySegments(enc);
SetupMatrices(enc); // finalize quantization matrices
}
//------------------------------------------------------------------------------
@ -709,7 +777,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
int mode;
rd->mode_i16 = -1;
for (mode = 0; mode < 4; ++mode) {
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
int nz;
@ -838,7 +906,7 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->mode_uv = -1;
InitScore(&rd_best);
for (mode = 0; mode < 4; ++mode) {
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
VP8ModeScore rd_uv;
// Reconstruct
@ -867,10 +935,10 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
const VP8Encoder* const enc = it->enc_;
const int i16 = (it->mb_->type_ == 1);
const int is_i16 = (it->mb_->type_ == 1);
int nz = 0;
if (i16) {
if (is_i16) {
nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
} else {
VP8IteratorStartI4(it);
@ -889,11 +957,66 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
rd->nz = nz;
}
// Refine intra16/intra4 sub-modes based on distortion only (not rate).
static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
const int is_i16 = (it->mb_->type_ == 1);
score_t best_score = MAX_COST;
if (try_both_i4_i16 || is_i16) {
int mode;
int best_mode = -1;
for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
const uint8_t* const src = it->yuv_in_ + Y_OFF;
const score_t score = VP8SSE16x16(src, ref);
if (score < best_score) {
best_mode = mode;
best_score = score;
}
}
VP8SetIntra16Mode(it, best_mode);
}
if (try_both_i4_i16 || !is_i16) {
uint8_t modes_i4[16];
// We don't evaluate the rate here, but just account for it through a
// constant penalty (i4 mode usually needs more bits compared to i16).
score_t score_i4 = (score_t)I4_PENALTY;
VP8IteratorStartI4(it);
do {
int mode;
int best_sub_mode = -1;
score_t best_sub_score = MAX_COST;
const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
// TODO(skal): we don't really need the prediction pixels here,
// but just the distortion against 'src'.
VP8MakeIntra4Preds(it);
for (mode = 0; mode < NUM_BMODES; ++mode) {
const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
const score_t score = VP8SSE4x4(src, ref);
if (score < best_sub_score) {
best_sub_mode = mode;
best_sub_score = score;
}
}
modes_i4[it->i4_] = best_sub_mode;
score_i4 += best_sub_score;
if (score_i4 >= best_score) break;
} while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
if (score_i4 < best_score) {
VP8SetIntra4Mode(it, modes_i4);
}
}
}
//------------------------------------------------------------------------------
// Entry point
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
VP8RDLevel rd_opt) {
int is_skipped;
const int method = it->enc_->method_;
InitScore(rd);
@ -902,22 +1025,21 @@ int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt) {
VP8MakeLuma16Preds(it);
VP8MakeChroma8Preds(it);
// for rd_opt = 2, we perform trellis-quant on the final decision only.
// for rd_opt > 2, we use it for every scoring (=much slower).
if (rd_opt > 0) {
it->do_trellis_ = (rd_opt > 2);
if (rd_opt > RD_OPT_NONE) {
it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
PickBestIntra16(it, rd);
if (it->enc_->method_ >= 2) {
if (method >= 2) {
PickBestIntra4(it, rd);
}
PickBestUV(it, rd);
if (rd_opt == 2) {
if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
it->do_trellis_ = 1;
SimpleQuantize(it, rd);
}
} else {
// TODO: for method_ == 2, pick the best intra4/intra16 based on SSE
it->do_trellis_ = (it->enc_->method_ == 2);
// For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
// For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
DistoRefine(it, (method >= 2));
SimpleQuantize(it, rd);
}
is_skipped = (rd->nz == 0);

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Header syntax writing
@ -11,7 +13,9 @@
#include <assert.h>
#include "../webp/format_constants.h"
#include "../utils/utils.h"
#include "../webp/format_constants.h" // RIFF constants
#include "../webp/mux_types.h" // ALPHA_FLAG
#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -21,25 +25,12 @@ extern "C" {
//------------------------------------------------------------------------------
// Helper functions
// TODO(later): Move to webp/format_constants.h?
static void PutLE24(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
}
static void PutLE32(uint8_t* const data, uint32_t val) {
PutLE24(data, val);
data[3] = (val >> 24) & 0xff;
}
static int IsVP8XNeeded(const VP8Encoder* const enc) {
return !!enc->has_alpha_; // Currently the only case when VP8X is needed.
// This could change in the future.
}
static int PutPaddingByte(const WebPPicture* const pic) {
const uint8_t pad_byte[1] = { 0 };
return !!pic->writer(pad_byte, 1, pic);
}
@ -73,14 +64,14 @@ static WebPEncodingError PutVP8XHeader(const VP8Encoder* const enc) {
assert(pic->width <= MAX_CANVAS_SIZE && pic->height <= MAX_CANVAS_SIZE);
if (enc->has_alpha_) {
flags |= ALPHA_FLAG_BIT;
flags |= ALPHA_FLAG;
}
PutLE32(vp8x + TAG_SIZE, VP8X_CHUNK_SIZE);
PutLE32(vp8x + CHUNK_HEADER_SIZE, flags);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 4, pic->width - 1);
PutLE24(vp8x + CHUNK_HEADER_SIZE + 7, pic->height - 1);
if(!pic->writer(vp8x, sizeof(vp8x), pic)) {
if (!pic->writer(vp8x, sizeof(vp8x), pic)) {
return VP8_ENC_ERROR_BAD_WRITE;
}
return VP8_ENC_OK;
@ -327,7 +318,9 @@ static size_t GeneratePartition0(VP8Encoder* const enc) {
PutSegmentHeader(bw, enc);
PutFilterHeader(bw, &enc->filter_hdr_);
VP8PutValue(bw, enc->config_->partitions, 2);
VP8PutValue(bw, enc->num_parts_ == 8 ? 3 :
enc->num_parts_ == 4 ? 2 :
enc->num_parts_ == 2 ? 1 : 0, 2);
PutQuant(bw, enc);
VP8PutBitUniform(bw, 0); // no proba update
VP8WriteProbas(bw, &enc->proba_);

256
3rdparty/libwebp/enc/token.c vendored Normal file
View File

@ -0,0 +1,256 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Paginated token buffer
//
// A 'token' is a bit value associated with a probability, either fixed
// or a later-to-be-determined after statistics have been collected.
// For dynamic probability, we just record the slot id (idx) for the probability
// value in the final probability array (uint8_t* probas in VP8EmitTokens).
//
// Author: Skal (pascal.massimino@gmail.com)
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "./vp8enci.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
#endif
#if !defined(DISABLE_TOKEN_BUFFER)
// we use pages to reduce the number of memcpy()
#define MAX_NUM_TOKEN 8192 // max number of token per page
#define FIXED_PROBA_BIT (1u << 14)
struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit
// bit #14: constant proba or idx
// bits 0..13: slot or constant proba
VP8Tokens* next_;
};
//------------------------------------------------------------------------------
void VP8TBufferInit(VP8TBuffer* const b) {
b->tokens_ = NULL;
b->pages_ = NULL;
b->last_page_ = &b->pages_;
b->left_ = 0;
b->error_ = 0;
}
void VP8TBufferClear(VP8TBuffer* const b) {
if (b != NULL) {
const VP8Tokens* p = b->pages_;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
free((void*)p);
p = next;
}
VP8TBufferInit(b);
}
}
static int TBufferNewPage(VP8TBuffer* const b) {
VP8Tokens* const page = b->error_ ? NULL : (VP8Tokens*)malloc(sizeof(*page));
if (page == NULL) {
b->error_ = 1;
return 0;
}
*b->last_page_ = page;
b->last_page_ = &page->next_;
b->left_ = MAX_NUM_TOKEN;
b->tokens_ = page->tokens_;
page->next_ = NULL;
return 1;
}
//------------------------------------------------------------------------------
#define TOKEN_ID(t, b, ctx, p) \
((p) + NUM_PROBAS * ((ctx) + NUM_CTX * ((b) + NUM_BANDS * (t))))
static WEBP_INLINE int AddToken(VP8TBuffer* const b,
int bit, uint32_t proba_idx) {
assert(proba_idx < FIXED_PROBA_BIT);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
}
return bit;
}
static WEBP_INLINE void AddConstantToken(VP8TBuffer* const b,
int bit, int proba) {
assert(proba < 256);
assert(bit == 0 || bit == 1);
if (b->left_ > 0 || TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | FIXED_PROBA_BIT | proba;
}
}
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens) {
int n = first;
uint32_t base_id = TOKEN_ID(coeff_type, n, ctx, 0);
if (!AddToken(tokens, last >= 0, base_id + 0)) {
return 0;
}
while (n < 16) {
const int c = coeffs[n++];
const int sign = c < 0;
int v = sign ? -c : c;
if (!AddToken(tokens, v != 0, base_id + 1)) {
ctx = 0;
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
continue;
}
if (!AddToken(tokens, v > 1, base_id + 2)) {
ctx = 1;
} else {
if (!AddToken(tokens, v > 4, base_id + 3)) {
if (AddToken(tokens, v != 2, base_id + 4))
AddToken(tokens, v == 4, base_id + 5);
} else if (!AddToken(tokens, v > 10, base_id + 6)) {
if (!AddToken(tokens, v > 6, base_id + 7)) {
AddConstantToken(tokens, v == 6, 159);
} else {
AddConstantToken(tokens, v >= 9, 165);
AddConstantToken(tokens, !(v & 1), 145);
}
} else {
int mask;
const uint8_t* tab;
if (v < 3 + (8 << 1)) { // VP8Cat3 (3b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 0, base_id + 9);
v -= 3 + (8 << 0);
mask = 1 << 2;
tab = VP8Cat3;
} else if (v < 3 + (8 << 2)) { // VP8Cat4 (4b)
AddToken(tokens, 0, base_id + 8);
AddToken(tokens, 1, base_id + 9);
v -= 3 + (8 << 1);
mask = 1 << 3;
tab = VP8Cat4;
} else if (v < 3 + (8 << 3)) { // VP8Cat5 (5b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 0, base_id + 10);
v -= 3 + (8 << 2);
mask = 1 << 4;
tab = VP8Cat5;
} else { // VP8Cat6 (11b)
AddToken(tokens, 1, base_id + 8);
AddToken(tokens, 1, base_id + 10);
v -= 3 + (8 << 3);
mask = 1 << 10;
tab = VP8Cat6;
}
while (mask) {
AddConstantToken(tokens, !!(v & mask), *tab++);
mask >>= 1;
}
}
ctx = 2;
}
AddConstantToken(tokens, sign, 128);
base_id = TOKEN_ID(coeff_type, VP8EncBands[n], ctx, 0);
if (n == 16 || !AddToken(tokens, n <= last, base_id + 0)) {
return 1; // EOB
}
}
return 1;
}
#undef TOKEN_ID
//------------------------------------------------------------------------------
// This function works, but isn't currently used. Saved for later.
#if 0
static void Record(int bit, proba_t* const stats) {
proba_t p = *stats;
if (p >= 0xffff0000u) { // an overflow is inbound.
p = ((p + 1u) >> 1) & 0x7fff7fffu; // -> divide the stats by 2.
}
// record bit count (lower 16 bits) and increment total count (upper 16 bits).
p += 0x00010000u + bit;
*stats = p;
}
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats) {
const VP8Tokens* p = b->pages_;
while (p != NULL) {
const int N = (p->next_ == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
const uint16_t token = p->tokens_[n];
if (!(token & FIXED_PROBA_BIT)) {
Record((token >> 15) & 1, stats + (token & 0x3fffu));
}
}
p = p->next_;
}
}
#endif // 0
//------------------------------------------------------------------------------
// Final coding pass, with known probabilities
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass) {
const VP8Tokens* p = b->pages_;
(void)final_pass;
if (b->error_) return 0;
while (p != NULL) {
const VP8Tokens* const next = p->next_;
const int N = (next == NULL) ? b->left_ : 0;
int n = MAX_NUM_TOKEN;
while (n-- > N) {
const uint16_t token = p->tokens_[n];
const int bit = (token >> 15) & 1;
if (token & FIXED_PROBA_BIT) {
VP8PutBit(bw, bit, token & 0xffu); // constant proba
} else {
VP8PutBit(bw, bit, probas[token & 0x3fffu]);
}
}
if (final_pass) free((void*)p);
p = next;
}
if (final_pass) b->pages_ = NULL;
return 1;
}
//------------------------------------------------------------------------------
#else // DISABLE_TOKEN_BUFFER
void VP8TBufferInit(VP8TBuffer* const b) {
(void)b;
}
void VP8TBufferClear(VP8TBuffer* const b) {
(void)b;
}
#endif // !DISABLE_TOKEN_BUFFER
#if defined(__cplusplus) || defined(c_plusplus)
} // extern "C"
#endif

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Token probabilities

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// WebP encoder: internal header.
@ -16,6 +18,7 @@
#include "../webp/encode.h"
#include "../dsp/dsp.h"
#include "../utils/bit_writer.h"
#include "../utils/thread.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -26,12 +29,9 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 0
#define ENC_MIN_VERSION 2
#define ENC_MIN_VERSION 3
#define ENC_REV_VERSION 1
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 64
// intra prediction modes
enum { B_DC_PRED = 0, // 4x4 modes
B_TM_PRED = 1,
@ -47,7 +47,8 @@ enum { B_DC_PRED = 0, // 4x4 modes
// Luma16 or UV modes
DC_PRED = B_DC_PRED, V_PRED = B_VE_PRED,
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED
H_PRED = B_HE_PRED, TM_PRED = B_TM_PRED,
NUM_PRED_MODES = 4
};
enum { NUM_MB_SEGMENTS = 4,
@ -56,10 +57,18 @@ enum { NUM_MB_SEGMENTS = 4,
NUM_BANDS = 8,
NUM_CTX = 3,
NUM_PROBAS = 11,
MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67 // last (inclusive) level with variable cost
MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost
MAX_LEVEL = 2047 // max level (note: max codable is 2047 + 67)
};
typedef enum { // Rate-distortion optimization levels
RD_OPT_NONE = 0, // no rd-opt
RD_OPT_BASIC = 1, // basic scoring (no trellis)
RD_OPT_TRELLIS = 2, // perform trellis-quant on the final decision only
RD_OPT_TRELLIS_ALL = 3 // trellis-quant for every scoring (much slower)
} VP8RDLevel;
// YUV-cache parameters. Cache is 16-pixels wide.
// The original or reconstructed samples can be accessed using VP8Scan[]
// The predicted blocks can be accessed using offsets to yuv_p_ and
@ -160,7 +169,17 @@ typedef int64_t score_t; // type used for scores, rate, distortion
static WEBP_INLINE int QUANTDIV(int n, int iQ, int B) {
return (n * iQ + B) >> QFIX;
}
extern const uint8_t VP8Zigzag[16];
// size of histogram used by CollectHistogram.
#define MAX_COEFF_THRESH 31
typedef struct VP8Histogram VP8Histogram;
struct VP8Histogram {
// TODO(skal): we only need to store the max_value and last_non_zero actually.
int distribution[MAX_COEFF_THRESH + 1];
};
// Uncomment the following to remove token-buffer code:
// #define DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// Headers
@ -314,44 +333,37 @@ void VP8SetSegment(const VP8EncIterator* const it, int segment);
//------------------------------------------------------------------------------
// Paginated token buffer
// WIP: #define USE_TOKEN_BUFFER
#ifdef USE_TOKEN_BUFFER
#define MAX_NUM_TOKEN 2048
typedef struct VP8Tokens VP8Tokens;
struct VP8Tokens {
uint16_t tokens_[MAX_NUM_TOKEN]; // bit#15: bit, bits 0..14: slot
int left_;
VP8Tokens* next_;
};
typedef struct VP8Tokens VP8Tokens; // struct details in token.c
typedef struct {
VP8Tokens* rows_;
uint16_t* tokens_; // set to (*last_)->tokens_
VP8Tokens** last_;
int left_;
int error_; // true in case of malloc error
#if !defined(DISABLE_TOKEN_BUFFER)
VP8Tokens* pages_; // first page
VP8Tokens** last_page_; // last page
uint16_t* tokens_; // set to (*last_page_)->tokens_
int left_; // how many free tokens left before the page is full.
#endif
int error_; // true in case of malloc error
} VP8TBuffer;
void VP8TBufferInit(VP8TBuffer* const b); // initialize an empty buffer
int VP8TBufferNewPage(VP8TBuffer* const b); // allocate a new page
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate memory
void VP8TBufferClear(VP8TBuffer* const b); // de-allocate pages memory
int VP8EmitTokens(const VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas);
#if !defined(DISABLE_TOKEN_BUFFER)
static WEBP_INLINE int VP8AddToken(VP8TBuffer* const b,
int bit, int proba_idx) {
if (b->left_ > 0 || VP8TBufferNewPage(b)) {
const int slot = --b->left_;
b->tokens_[slot] = (bit << 15) | proba_idx;
}
return bit;
}
// Finalizes bitstream when probabilities are known.
// Deletes the allocated token memory if final_pass is true.
int VP8EmitTokens(VP8TBuffer* const b, VP8BitWriter* const bw,
const uint8_t* const probas, int final_pass);
#endif // USE_TOKEN_BUFFER
// record the coding of coefficients without knowing the probabilities yet
int VP8RecordCoeffTokens(int ctx, int coeff_type, int first, int last,
const int16_t* const coeffs,
VP8TBuffer* const tokens);
// unused for now
void VP8TokenToStats(const VP8TBuffer* const b, proba_t* const stats);
#endif // !DISABLE_TOKEN_BUFFER
//------------------------------------------------------------------------------
// VP8Encoder
@ -376,6 +388,7 @@ struct VP8Encoder {
// per-partition boolean decoders.
VP8BitWriter bw_; // part0
VP8BitWriter parts_[MAX_NUM_PARTITIONS]; // token partitions
VP8TBuffer tokens_; // token buffer
int percent_; // for progress
@ -383,6 +396,7 @@ struct VP8Encoder {
int has_alpha_;
uint8_t* alpha_data_; // non-NULL if transparency is present
uint32_t alpha_data_size_;
WebPWorker alpha_worker_;
// enhancement layer
int use_layer_;
@ -394,6 +408,7 @@ struct VP8Encoder {
VP8SegmentInfo dqm_[NUM_MB_SEGMENTS];
int base_quant_; // nominal quantizer value. Only used
// for relative coding of segments' quant.
int alpha_; // global susceptibility (<=> complexity)
int uv_alpha_; // U/V quantization susceptibility
// global offset of quantizers, shared by all segments
int dq_y1_dc_;
@ -409,9 +424,12 @@ struct VP8Encoder {
int block_count_[3];
// quality/speed settings
int method_; // 0=fastest, 6=best/slowest.
int rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor
int method_; // 0=fastest, 6=best/slowest.
VP8RDLevel rd_opt_level_; // Deduced from method_.
int max_i4_header_bits_; // partition #0 safeness factor
int thread_level_; // derived from config->thread_level
int do_search_; // derived from config->target_XXX
int use_tokens_; // if true, use token buffer
// Memory
VP8MBInfo* mb_info_; // contextual macroblock infos (mb_w_ + 1)
@ -455,6 +473,11 @@ void VP8EncFreeBitWriters(VP8Encoder* const enc);
// in frame.c
extern const uint8_t VP8EncBands[16 + 1];
extern const uint8_t VP8Cat3[];
extern const uint8_t VP8Cat4[];
extern const uint8_t VP8Cat5[];
extern const uint8_t VP8Cat6[];
// Form all the four Intra16x16 predictions in the yuv_p_ cache
void VP8MakeLuma16Preds(const VP8EncIterator* const it);
// Form all the four Chroma8x8 predictions in the yuv_p_ cache
@ -466,9 +489,9 @@ void VP8MakeIntra4Preds(const VP8EncIterator* const it);
int VP8GetCostLuma16(VP8EncIterator* const it, const VP8ModeScore* const rd);
int VP8GetCostLuma4(VP8EncIterator* const it, const int16_t levels[16]);
int VP8GetCostUV(VP8EncIterator* const it, const VP8ModeScore* const rd);
// Main stat / coding passes
// Main coding calls
int VP8EncLoop(VP8Encoder* const enc);
int VP8StatLoop(VP8Encoder* const enc);
int VP8EncTokenLoop(VP8Encoder* const enc);
// in webpenc.c
// Assign an error code to a picture. Return false for convenience.
@ -485,12 +508,14 @@ int VP8EncAnalyze(VP8Encoder* const enc);
// Sets up segment's quantization values, base_quant_ and filter strengths.
void VP8SetSegmentParams(VP8Encoder* const enc, float quality);
// Pick best modes and fills the levels. Returns true if skipped.
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, int rd_opt);
int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
VP8RDLevel rd_opt);
// in alpha.c
void VP8EncInitAlpha(VP8Encoder* const enc); // initialize alpha compression
int VP8EncStartAlpha(VP8Encoder* const enc); // start alpha coding process
int VP8EncFinishAlpha(VP8Encoder* const enc); // finalize compressed data
void VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
int VP8EncDeleteAlpha(VP8Encoder* const enc); // delete compressed data
// in layer.c
void VP8EncInitLayer(VP8Encoder* const enc); // init everything

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// main entry for the lossless encoder.
@ -37,7 +39,8 @@ extern "C" {
static int CompareColors(const void* p1, const void* p2) {
const uint32_t a = *(const uint32_t*)p1;
const uint32_t b = *(const uint32_t*)p2;
return (a < b) ? -1 : (a > b) ? 1 : 0;
assert(a != b);
return (a < b) ? -1 : 1;
}
// If number of colors in the image is less than or equal to MAX_PALETTE_SIZE,
@ -85,7 +88,7 @@ static int AnalyzeAndCreatePalette(const WebPPicture* const pic,
argb += pic->argb_stride;
}
// TODO(skal): could we reuse in_use[] to speed up ApplyPalette()?
// TODO(skal): could we reuse in_use[] to speed up EncodePalette()?
num_colors = 0;
for (i = 0; i < (int)(sizeof(in_use) / sizeof(in_use[0])); ++i) {
if (in_use[i]) {
@ -220,7 +223,7 @@ static int GetHuffBitLengthsAndCodes(
}
// Create Huffman trees.
for (i = 0; i < histogram_image_size; ++i) {
for (i = 0; ok && (i < histogram_image_size); ++i) {
HuffmanTreeCode* const codes = &huffman_codes[5 * i];
VP8LHistogram* const histo = histogram_image->histograms[i];
ok = ok && VP8LCreateHuffmanTree(histo->literal_, 15, codes + 0);
@ -231,7 +234,11 @@ static int GetHuffBitLengthsAndCodes(
}
End:
if (!ok) free(mem_buf);
if (!ok) {
free(mem_buf);
// If one VP8LCreateHuffmanTree() above fails, we need to clean up behind.
memset(huffman_codes, 0, 5 * histogram_image_size * sizeof(*huffman_codes));
}
return ok;
}
@ -406,9 +413,10 @@ static int StoreHuffmanCode(VP8LBitWriter* const bw,
}
static void WriteHuffmanCode(VP8LBitWriter* const bw,
const HuffmanTreeCode* const code, int index) {
const int depth = code->code_lengths[index];
const int symbol = code->codes[index];
const HuffmanTreeCode* const code,
int code_index) {
const int depth = code->code_lengths[code_index];
const int symbol = code->codes[code_index];
VP8LWriteBits(bw, depth, symbol);
}
@ -557,6 +565,9 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) {
goto Error;
}
// Free combined histograms.
free(histogram_image);
histogram_image = NULL;
// Color Cache parameters.
VP8LWriteBits(bw, 1, use_color_cache);
@ -576,10 +587,10 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
uint32_t i;
if (histogram_argb == NULL) goto Error;
for (i = 0; i < histogram_image_xysize; ++i) {
const int index = histogram_symbols[i] & 0xffff;
histogram_argb[i] = 0xff000000 | (index << 8);
if (index >= max_index) {
max_index = index + 1;
const int symbol_index = histogram_symbols[i] & 0xffff;
histogram_argb[i] = 0xff000000 | (symbol_index << 8);
if (symbol_index >= max_index) {
max_index = symbol_index + 1;
}
}
histogram_image_size = max_index;
@ -603,9 +614,6 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
ClearHuffmanTreeIfOnlyOneSymbol(codes);
}
}
// Free combined histograms.
free(histogram_image);
histogram_image = NULL;
// Store actual literals.
StoreImageToBitMask(bw, width, histogram_bits, &refs,
@ -613,7 +621,7 @@ static int EncodeImageInternal(VP8LBitWriter* const bw,
ok = 1;
Error:
if (!ok) free(histogram_image);
free(histogram_image);
VP8LClearBackwardRefs(&refs);
if (huffman_codes != NULL) {
@ -711,13 +719,6 @@ static int ApplyCrossColorFilter(const VP8LEncoder* const enc,
// -----------------------------------------------------------------------------
static void PutLE32(uint8_t* const data, uint32_t val) {
data[0] = (val >> 0) & 0xff;
data[1] = (val >> 8) & 0xff;
data[2] = (val >> 16) & 0xff;
data[3] = (val >> 24) & 0xff;
}
static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic,
size_t riff_size, size_t vp8l_size) {
uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = {
@ -812,61 +813,94 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc,
return err;
}
// Bundles multiple (2, 4 or 8) pixels into a single pixel.
// Returns the new xsize.
static void BundleColorMap(const WebPPicture* const pic,
int xbits, uint32_t* bundled_argb, int xs) {
int y;
const int bit_depth = 1 << (3 - xbits);
uint32_t code = 0;
const uint32_t* argb = pic->argb;
const int width = pic->width;
const int height = pic->height;
for (y = 0; y < height; ++y) {
int x;
for (x = 0; x < width; ++x) {
const int mask = (1 << xbits) - 1;
const int xsub = x & mask;
if (xsub == 0) {
code = 0;
}
// TODO(vikasa): simplify the bundling logic.
code |= (argb[x] & 0xff00) << (bit_depth * xsub);
bundled_argb[y * xs + (x >> xbits)] = 0xff000000 | code;
static void ApplyPalette(uint32_t* src, uint32_t* dst,
uint32_t src_stride, uint32_t dst_stride,
const uint32_t* palette, int palette_size,
int width, int height, int xbits, uint8_t* row) {
int i, x, y;
int use_LUT = 1;
for (i = 0; i < palette_size; ++i) {
if ((palette[i] & 0xffff00ffu) != 0) {
use_LUT = 0;
break;
}
}
if (use_LUT) {
int inv_palette[MAX_PALETTE_SIZE] = { 0 };
for (i = 0; i < palette_size; ++i) {
const int color = (palette[i] >> 8) & 0xff;
inv_palette[color] = i;
}
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const int color = (src[x] >> 8) & 0xff;
row[x] = inv_palette[color];
}
VP8LBundleColorMap(row, width, xbits, dst);
src += src_stride;
dst += dst_stride;
}
} else {
// Use 1 pixel cache for ARGB pixels.
uint32_t last_pix = palette[0];
int last_idx = 0;
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const uint32_t pix = src[x];
if (pix != last_pix) {
for (i = 0; i < palette_size; ++i) {
if (pix == palette[i]) {
last_idx = i;
last_pix = pix;
break;
}
}
}
row[x] = last_idx;
}
VP8LBundleColorMap(row, width, xbits, dst);
src += src_stride;
dst += dst_stride;
}
argb += pic->argb_stride;
}
}
// Note: Expects "enc->palette_" to be set properly.
// Also, "enc->palette_" will be modified after this call and should not be used
// later.
static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
VP8LEncoder* const enc, int quality) {
static WebPEncodingError EncodePalette(VP8LBitWriter* const bw,
VP8LEncoder* const enc, int quality) {
WebPEncodingError err = VP8_ENC_OK;
int i, x, y;
int i;
const WebPPicture* const pic = enc->pic_;
uint32_t* argb = pic->argb;
uint32_t* src = pic->argb;
uint32_t* dst;
const int width = pic->width;
const int height = pic->height;
uint32_t* const palette = enc->palette_;
const int palette_size = enc->palette_size_;
uint8_t* row = NULL;
int xbits;
// Replace each input pixel by corresponding palette index.
for (y = 0; y < height; ++y) {
for (x = 0; x < width; ++x) {
const uint32_t pix = argb[x];
for (i = 0; i < palette_size; ++i) {
if (pix == palette[i]) {
argb[x] = 0xff000000u | (i << 8);
break;
}
}
}
argb += pic->argb_stride;
// This is done line by line.
if (palette_size <= 4) {
xbits = (palette_size <= 2) ? 3 : 2;
} else {
xbits = (palette_size <= 16) ? 1 : 0;
}
err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
if (err != VP8_ENC_OK) goto Error;
dst = enc->argb_;
row = WebPSafeMalloc((uint64_t)width, sizeof(*row));
if (row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY;
ApplyPalette(src, dst, pic->argb_stride, enc->current_width_,
palette, palette_size, width, height, xbits, row);
// Save palette to bitstream.
VP8LWriteBits(bw, 1, TRANSFORM_PRESENT);
VP8LWriteBits(bw, 2, COLOR_INDEXING_TRANSFORM);
@ -880,32 +914,17 @@ static WebPEncodingError ApplyPalette(VP8LBitWriter* const bw,
goto Error;
}
if (palette_size <= 16) {
// Image can be packed (multiple pixels per uint32_t).
int xbits = 1;
if (palette_size <= 2) {
xbits = 3;
} else if (palette_size <= 4) {
xbits = 2;
}
err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height);
if (err != VP8_ENC_OK) goto Error;
BundleColorMap(pic, xbits, enc->argb_, enc->current_width_);
}
Error:
free(row);
return err;
}
// -----------------------------------------------------------------------------
static int GetHistoBits(const WebPConfig* const config,
const WebPPicture* const pic) {
const int width = pic->width;
const int height = pic->height;
static int GetHistoBits(int method, int use_palette, int width, int height) {
const uint64_t hist_size = sizeof(VP8LHistogram);
// Make tile size a function of encoding method (Range: 0 to 6).
int histo_bits = 7 - config->method;
int histo_bits = (use_palette ? 9 : 7) - method;
while (1) {
const uint64_t huff_image_size = VP8LSubSampleSize(width, histo_bits) *
VP8LSubSampleSize(height, histo_bits) *
@ -917,13 +936,14 @@ static int GetHistoBits(const WebPConfig* const config,
(histo_bits > MAX_HUFFMAN_BITS) ? MAX_HUFFMAN_BITS : histo_bits;
}
static void InitEncParams(VP8LEncoder* const enc) {
static void FinishEncParams(VP8LEncoder* const enc) {
const WebPConfig* const config = enc->config_;
const WebPPicture* const picture = enc->pic_;
const WebPPicture* const pic = enc->pic_;
const int method = config->method;
const float quality = config->quality;
const int use_palette = enc->use_palette_;
enc->transform_bits_ = (method < 4) ? 5 : (method > 4) ? 3 : 4;
enc->histo_bits_ = GetHistoBits(config, picture);
enc->histo_bits_ = GetHistoBits(method, use_palette, pic->width, pic->height);
enc->cache_bits_ = (quality <= 25.f) ? 0 : 7;
}
@ -965,8 +985,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
goto Error;
}
InitEncParams(enc);
// ---------------------------------------------------------------------------
// Analyze image (entropy, num_palettes etc)
@ -975,8 +993,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config,
goto Error;
}
FinishEncParams(enc);
if (enc->use_palette_) {
err = ApplyPalette(bw, enc, quality);
err = EncodePalette(bw, enc, quality);
if (err != VP8_ENC_OK) goto Error;
// Color cache is disabled for palette.
enc->cache_bits_ = 0;

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Lossless encoder: internal header.

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// WebP encoder: main entry point
@ -93,34 +95,53 @@ static void ResetBoundaryPredictions(VP8Encoder* const enc) {
enc->nz_[-1] = 0; // constant
}
// Map configured quality level to coding tools used.
//-------------+---+---+---+---+---+---+
// Quality | 0 | 1 | 2 | 3 | 4 | 5 +
//-------------+---+---+---+---+---+---+
// dynamic prob| ~ | x | x | x | x | x |
//-------------+---+---+---+---+---+---+
// rd-opt modes| | | x | x | x | x |
//-------------+---+---+---+---+---+---+
// fast i4/i16 | x | x | | | | |
//-------------+---+---+---+---+---+---+
// rd-opt i4/16| | | x | x | x | x |
//-------------+---+---+---+---+---+---+
// Trellis | | x | | | x | x |
//-------------+---+---+---+---+---+---+
// full-SNS | | | | | | x |
//-------------+---+---+---+---+---+---+
// Mapping from config->method_ to coding tools used.
//-------------------+---+---+---+---+---+---+---+
// Method | 0 | 1 | 2 | 3 |(4)| 5 | 6 |
//-------------------+---+---+---+---+---+---+---+
// fast probe | x | | | x | | | |
//-------------------+---+---+---+---+---+---+---+
// dynamic proba | ~ | x | x | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// fast mode analysis| | | | | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// basic rd-opt | | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// disto-score i4/16 | | | x | | | | |
//-------------------+---+---+---+---+---+---+---+
// rd-opt i4/16 | | | ~ | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// token buffer (opt)| | | | x | x | x | x |
//-------------------+---+---+---+---+---+---+---+
// Trellis | | | | | | x |Ful|
//-------------------+---+---+---+---+---+---+---+
// full-SNS | | | | | x | x | x |
//-------------------+---+---+---+---+---+---+---+
static void MapConfigToTools(VP8Encoder* const enc) {
const int method = enc->config_->method;
const int limit = 100 - enc->config_->partition_limit;
const WebPConfig* const config = enc->config_;
const int method = config->method;
const int limit = 100 - config->partition_limit;
enc->method_ = method;
enc->rd_opt_level_ = (method >= 6) ? 3
: (method >= 5) ? 2
: (method >= 3) ? 1
: 0;
enc->rd_opt_level_ = (method >= 6) ? RD_OPT_TRELLIS_ALL
: (method >= 5) ? RD_OPT_TRELLIS
: (method >= 3) ? RD_OPT_BASIC
: RD_OPT_NONE;
enc->max_i4_header_bits_ =
256 * 16 * 16 * // upper bound: up to 16bit per 4x4 block
(limit * limit) / (100 * 100); // ... modulated with a quadratic curve.
enc->thread_level_ = config->thread_level;
enc->do_search_ = (config->target_size > 0 || config->target_PSNR > 0);
if (!config->low_memory) {
#if !defined(DISABLE_TOKEN_BUFFER)
enc->use_tokens_ = (method >= 3) && !enc->do_search_;
#endif
if (enc->use_tokens_) {
enc->num_parts_ = 1; // doesn't work with multi-partition
}
}
}
// Memory scaling with dimensions:
@ -259,17 +280,21 @@ static VP8Encoder* InitVP8Encoder(const WebPConfig* const config,
VP8EncInitLayer(enc);
#endif
VP8TBufferInit(&enc->tokens_);
return enc;
}
static void DeleteVP8Encoder(VP8Encoder* enc) {
static int DeleteVP8Encoder(VP8Encoder* enc) {
int ok = 1;
if (enc != NULL) {
VP8EncDeleteAlpha(enc);
ok = VP8EncDeleteAlpha(enc);
#ifdef WEBP_EXPERIMENTAL_FEATURES
VP8EncDeleteLayer(enc);
#endif
VP8TBufferClear(&enc->tokens_);
free(enc);
}
return ok;
}
//------------------------------------------------------------------------------
@ -332,7 +357,7 @@ int WebPReportProgress(const WebPPicture* const pic,
//------------------------------------------------------------------------------
int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
int ok;
int ok = 0;
if (pic == NULL)
return 0;
@ -351,32 +376,38 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) {
if (!config->lossless) {
VP8Encoder* enc = NULL;
if (pic->y == NULL || pic->u == NULL || pic->v == NULL) {
if (pic->argb != NULL) {
if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
} else {
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
}
// Make sure we have YUVA samples.
if (!WebPPictureARGBToYUVA(pic, WEBP_YUV420)) return 0;
}
enc = InitVP8Encoder(config, pic);
if (enc == NULL) return 0; // pic->error is already set.
// Note: each of the tasks below account for 20% in the progress report.
ok = VP8EncAnalyze(enc)
&& VP8StatLoop(enc)
&& VP8EncLoop(enc)
&& VP8EncFinishAlpha(enc)
ok = VP8EncAnalyze(enc);
// Analysis is done, proceed to actual coding.
ok = ok && VP8EncStartAlpha(enc); // possibly done in parallel
if (!enc->use_tokens_) {
ok = ok && VP8EncLoop(enc);
} else {
ok = ok && VP8EncTokenLoop(enc);
}
ok = ok && VP8EncFinishAlpha(enc);
#ifdef WEBP_EXPERIMENTAL_FEATURES
&& VP8EncFinishLayer(enc)
ok = ok && VP8EncFinishLayer(enc);
#endif
&& VP8EncWrite(enc);
ok = ok && VP8EncWrite(enc);
StoreStats(enc);
if (!ok) {
VP8EncFreeBitWriters(enc);
}
DeleteVP8Encoder(enc);
ok &= DeleteVP8Encoder(enc); // must always be called, even if !ok
} else {
if (pic->argb == NULL)
return WebPEncodingSetError(pic, VP8_ENC_ERROR_NULL_PARAMETER);
// Make sure we have ARGB samples.
if (pic->argb == NULL && !WebPPictureYUVAToARGB(pic)) {
return 0;
}
ok = VP8LEncodeImage(config, pic); // Sets pic->error in case of problem.
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Set and delete APIs for mux.
@ -12,6 +14,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -47,8 +50,9 @@ static void MuxRelease(WebPMux* const mux) {
MuxImageDeleteAll(&mux->images_);
DeleteAllChunks(&mux->vp8x_);
DeleteAllChunks(&mux->iccp_);
DeleteAllChunks(&mux->loop_);
DeleteAllChunks(&mux->meta_);
DeleteAllChunks(&mux->anim_);
DeleteAllChunks(&mux->exif_);
DeleteAllChunks(&mux->xmp_);
DeleteAllChunks(&mux->unknown_);
}
@ -81,13 +85,14 @@ static WebPMuxError MuxSet(WebPMux* const mux, CHUNK_INDEX idx, uint32_t nth,
ChunkInit(&chunk);
SWITCH_ID_LIST(IDX_VP8X, &mux->vp8x_);
SWITCH_ID_LIST(IDX_ICCP, &mux->iccp_);
SWITCH_ID_LIST(IDX_LOOP, &mux->loop_);
SWITCH_ID_LIST(IDX_META, &mux->meta_);
if (idx == IDX_UNKNOWN && data->size_ > TAG_SIZE) {
SWITCH_ID_LIST(IDX_ANIM, &mux->anim_);
SWITCH_ID_LIST(IDX_EXIF, &mux->exif_);
SWITCH_ID_LIST(IDX_XMP, &mux->xmp_);
if (idx == IDX_UNKNOWN && data->size > TAG_SIZE) {
// For raw-data unknown chunk, the first four bytes should be the tag to be
// used for the chunk.
const WebPData tmp = { data->bytes_ + TAG_SIZE, data->size_ - TAG_SIZE };
err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes_ + 0));
const WebPData tmp = { data->bytes + TAG_SIZE, data->size - TAG_SIZE };
err = ChunkAssignData(&chunk, &tmp, copy_data, GetLE32(data->bytes + 0));
if (err == WEBP_MUX_OK)
err = ChunkSetNth(&chunk, &mux->unknown_, nth);
}
@ -106,39 +111,40 @@ static WebPMuxError MuxAddChunk(WebPMux* const mux, uint32_t nth, uint32_t tag,
return MuxSet(mux, idx, nth, &chunk_data, copy_data);
}
// Create data for frame/tile given image data, offsets and duration.
static WebPMuxError CreateFrameTileData(const WebPData* const image,
int x_offset, int y_offset,
int duration, int is_lossless,
int is_frame,
WebPData* const frame_tile) {
// Create data for frame/fragment given image data, offsets and duration.
static WebPMuxError CreateFrameFragmentData(
const WebPData* const image, int x_offset, int y_offset, int duration,
WebPMuxAnimDispose dispose_method, int is_lossless, int is_frame,
WebPData* const frame_frgm) {
int width;
int height;
uint8_t* frame_tile_bytes;
const size_t frame_tile_size = kChunks[is_frame ? IDX_FRAME : IDX_TILE].size;
uint8_t* frame_frgm_bytes;
const size_t frame_frgm_size = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].size;
const int ok = is_lossless ?
VP8LGetInfo(image->bytes_, image->size_, &width, &height, NULL) :
VP8GetInfo(image->bytes_, image->size_, image->size_, &width, &height);
VP8LGetInfo(image->bytes, image->size, &width, &height, NULL) :
VP8GetInfo(image->bytes, image->size, image->size, &width, &height);
if (!ok) return WEBP_MUX_INVALID_ARGUMENT;
assert(width > 0 && height > 0 && duration > 0);
assert(width > 0 && height > 0 && duration >= 0);
assert(dispose_method == (dispose_method & 1));
// Note: assertion on upper bounds is done in PutLE24().
frame_tile_bytes = (uint8_t*)malloc(frame_tile_size);
if (frame_tile_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
frame_frgm_bytes = (uint8_t*)malloc(frame_frgm_size);
if (frame_frgm_bytes == NULL) return WEBP_MUX_MEMORY_ERROR;
PutLE24(frame_tile_bytes + 0, x_offset / 2);
PutLE24(frame_tile_bytes + 3, y_offset / 2);
PutLE24(frame_frgm_bytes + 0, x_offset / 2);
PutLE24(frame_frgm_bytes + 3, y_offset / 2);
if (is_frame) {
PutLE24(frame_tile_bytes + 6, width - 1);
PutLE24(frame_tile_bytes + 9, height - 1);
PutLE24(frame_tile_bytes + 12, duration - 1);
PutLE24(frame_frgm_bytes + 6, width - 1);
PutLE24(frame_frgm_bytes + 9, height - 1);
PutLE24(frame_frgm_bytes + 12, duration);
frame_frgm_bytes[15] = (dispose_method & 1);
}
frame_tile->bytes_ = frame_tile_bytes;
frame_tile->size_ = frame_tile_size;
frame_frgm->bytes = frame_frgm_bytes;
frame_frgm->size = frame_frgm_size;
return WEBP_MUX_OK;
}
@ -149,8 +155,8 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
WebPData* const image, WebPData* const alpha,
int* const is_lossless) {
WebPDataInit(alpha); // Default: no alpha.
if (bitstream->size_ < TAG_SIZE ||
memcmp(bitstream->bytes_, "RIFF", TAG_SIZE)) {
if (bitstream->size < TAG_SIZE ||
memcmp(bitstream->bytes, "RIFF", TAG_SIZE)) {
// It is NOT webp file data. Return input data as is.
*image = *bitstream;
} else {
@ -166,7 +172,7 @@ static WebPMuxError GetImageData(const WebPData* const bitstream,
}
WebPMuxDelete(mux);
}
*is_lossless = VP8LCheckSignature(image->bytes_, image->size_);
*is_lossless = VP8LCheckSignature(image->bytes, image->size);
return WEBP_MUX_OK;
}
@ -185,203 +191,96 @@ static WebPMuxError DeleteChunks(WebPChunk** chunk_list, uint32_t tag) {
return err;
}
static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, CHUNK_INDEX idx) {
const WebPChunkId id = kChunks[idx].id;
static WebPMuxError MuxDeleteAllNamedData(WebPMux* const mux, uint32_t tag) {
const WebPChunkId id = ChunkGetIdFromTag(tag);
WebPChunk** chunk_list;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
assert(mux != NULL);
if (IsWPI(id)) return WEBP_MUX_INVALID_ARGUMENT;
chunk_list = MuxGetChunkListFromId(mux, id);
if (chunk_list == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return DeleteChunks(chunk_list, kChunks[idx].tag);
}
static WebPMuxError DeleteLoopCount(WebPMux* const mux) {
return MuxDeleteAllNamedData(mux, IDX_LOOP);
return DeleteChunks(chunk_list, tag);
}
//------------------------------------------------------------------------------
// Set API(s).
WebPMuxError WebPMuxSetImage(WebPMux* mux,
const WebPData* bitstream, int copy_data) {
WebPMuxError WebPMuxSetChunk(WebPMux* mux, const char fourcc[4],
const WebPData* chunk_data, int copy_data) {
CHUNK_INDEX idx;
uint32_t tag;
WebPMuxError err;
WebPChunk chunk;
WebPMuxImage wpi;
WebPData image;
WebPData alpha;
int is_lossless;
int image_tag;
if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
bitstream->size_ > MAX_CHUNK_PAYLOAD) {
if (mux == NULL || fourcc == NULL || chunk_data == NULL ||
chunk_data->bytes == NULL || chunk_data->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
idx = ChunkGetIndexFromFourCC(fourcc);
tag = ChunkGetTagFromFourCC(fourcc);
// If given data is for a whole webp file,
// extract only the VP8/VP8L data from it.
err = GetImageData(bitstream, &image, &alpha, &is_lossless);
if (err != WEBP_MUX_OK) return err;
image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
// Delete existing chunk(s) with the same 'fourcc'.
err = MuxDeleteAllNamedData(mux, tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Delete the existing images.
MuxImageDeleteAll(&mux->images_);
// Add the given chunk.
return MuxSet(mux, idx, 1, chunk_data, copy_data);
}
MuxImageInit(&wpi);
if (alpha.bytes_ != NULL) { // Add alpha chunk.
ChunkInit(&chunk);
err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
if (err != WEBP_MUX_OK) goto Err;
}
// Add image chunk.
// Creates a chunk from given 'data' and sets it as 1st chunk in 'chunk_list'.
static WebPMuxError AddDataToChunkList(
const WebPData* const data, int copy_data, uint32_t tag,
WebPChunk** chunk_list) {
WebPChunk chunk;
WebPMuxError err;
ChunkInit(&chunk);
err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
err = ChunkAssignData(&chunk, data, copy_data, tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.img_, 1);
err = ChunkSetNth(&chunk, chunk_list, 1);
if (err != WEBP_MUX_OK) goto Err;
// Add this image to mux.
err = MuxImagePush(&wpi, &mux->images_);
if (err != WEBP_MUX_OK) goto Err;
// All OK.
return WEBP_MUX_OK;
Err:
// Something bad happened.
ChunkRelease(&chunk);
MuxImageRelease(&wpi);
return err;
}
WebPMuxError WebPMuxSetMetadata(WebPMux* mux, const WebPData* metadata,
int copy_data) {
WebPMuxError err;
if (mux == NULL || metadata == NULL || metadata->bytes_ == NULL ||
metadata->size_ > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
// Extracts image & alpha data from the given bitstream and then sets wpi.alpha_
// and wpi.img_ appropriately.
static WebPMuxError SetAlphaAndImageChunks(
const WebPData* const bitstream, int copy_data, WebPMuxImage* const wpi) {
int is_lossless = 0;
WebPData image, alpha;
WebPMuxError err = GetImageData(bitstream, &image, &alpha, &is_lossless);
const int image_tag =
is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
if (err != WEBP_MUX_OK) return err;
if (alpha.bytes != NULL) {
err = AddDataToChunkList(&alpha, copy_data, kChunks[IDX_ALPHA].tag,
&wpi->alpha_);
if (err != WEBP_MUX_OK) return err;
}
// Delete the existing metadata chunk(s).
err = WebPMuxDeleteMetadata(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add the given metadata chunk.
return MuxSet(mux, IDX_META, 1, metadata, copy_data);
return AddDataToChunkList(&image, copy_data, image_tag, &wpi->img_);
}
WebPMuxError WebPMuxSetColorProfile(WebPMux* mux, const WebPData* color_profile,
int copy_data) {
WebPMuxError err;
if (mux == NULL || color_profile == NULL || color_profile->bytes_ == NULL ||
color_profile->size_ > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Delete the existing ICCP chunk(s).
err = WebPMuxDeleteColorProfile(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add the given ICCP chunk.
return MuxSet(mux, IDX_ICCP, 1, color_profile, copy_data);
}
WebPMuxError WebPMuxSetLoopCount(WebPMux* mux, int loop_count) {
WebPMuxError err;
uint8_t* data = NULL;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
if (loop_count >= MAX_LOOP_COUNT) return WEBP_MUX_INVALID_ARGUMENT;
// Delete the existing LOOP chunk(s).
err = DeleteLoopCount(mux);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Add the given loop count.
data = (uint8_t*)malloc(kChunks[IDX_LOOP].size);
if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
PutLE16(data, loop_count);
err = MuxAddChunk(mux, 1, kChunks[IDX_LOOP].tag, data,
kChunks[IDX_LOOP].size, 1);
free(data);
return err;
}
static WebPMuxError MuxPushFrameTileInternal(
WebPMux* const mux, const WebPData* const bitstream, int x_offset,
int y_offset, int duration, int copy_data, uint32_t tag) {
WebPChunk chunk;
WebPData image;
WebPData alpha;
WebPMuxError WebPMuxSetImage(WebPMux* mux, const WebPData* bitstream,
int copy_data) {
WebPMuxImage wpi;
WebPMuxError err;
WebPData frame_tile;
const int is_frame = (tag == kChunks[IDX_FRAME].tag) ? 1 : 0;
int is_lossless;
int image_tag;
// Sanity checks.
if (mux == NULL || bitstream == NULL || bitstream->bytes_ == NULL ||
bitstream->size_ > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
duration <= 0 || duration > MAX_DURATION) {
if (mux == NULL || bitstream == NULL || bitstream->bytes == NULL ||
bitstream->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Snap offsets to even positions.
x_offset &= ~1;
y_offset &= ~1;
if (mux->images_ != NULL) {
// Only one 'simple image' can be added in mux. So, remove present images.
MuxImageDeleteAll(&mux->images_);
}
// If given data is for a whole webp file,
// extract only the VP8/VP8L data from it.
err = GetImageData(bitstream, &image, &alpha, &is_lossless);
if (err != WEBP_MUX_OK) return err;
image_tag = is_lossless ? kChunks[IDX_VP8L].tag : kChunks[IDX_VP8].tag;
WebPDataInit(&frame_tile);
ChunkInit(&chunk);
MuxImageInit(&wpi);
if (alpha.bytes_ != NULL) {
// Add alpha chunk.
err = ChunkAssignData(&chunk, &alpha, copy_data, kChunks[IDX_ALPHA].tag);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.alpha_, 1);
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.alpha_ now.
}
// Add image chunk.
err = ChunkAssignData(&chunk, &image, copy_data, image_tag);
err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
if (err != WEBP_MUX_OK) goto Err;
err = ChunkSetNth(&chunk, &wpi.img_, 1);
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.img_ now.
// Create frame/tile data.
err = CreateFrameTileData(&image, x_offset, y_offset, duration, is_lossless,
is_frame, &frame_tile);
if (err != WEBP_MUX_OK) goto Err;
// Add frame/tile chunk (with copy_data = 1).
err = ChunkAssignData(&chunk, &frame_tile, 1, tag);
if (err != WEBP_MUX_OK) goto Err;
WebPDataClear(&frame_tile);
err = ChunkSetNth(&chunk, &wpi.header_, 1);
if (err != WEBP_MUX_OK) goto Err;
ChunkInit(&chunk); // chunk owned by wpi.header_ now.
// Add this WebPMuxImage to mux.
err = MuxImagePush(&wpi, &mux->images_);
@ -391,86 +290,137 @@ static WebPMuxError MuxPushFrameTileInternal(
return WEBP_MUX_OK;
Err: // Something bad happened.
WebPDataClear(&frame_tile);
ChunkRelease(&chunk);
MuxImageRelease(&wpi);
return err;
}
WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset,
int duration, int copy_data) {
return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
duration, copy_data, kChunks[IDX_FRAME].tag);
WebPMuxError WebPMuxPushFrame(WebPMux* mux, const WebPMuxFrameInfo* frame,
int copy_data) {
WebPMuxImage wpi;
WebPMuxError err;
int is_frame;
const WebPData* const bitstream = &frame->bitstream;
// Sanity checks.
if (mux == NULL || frame == NULL) return WEBP_MUX_INVALID_ARGUMENT;
is_frame = (frame->id == WEBP_CHUNK_ANMF);
if (!(is_frame || (frame->id == WEBP_CHUNK_FRGM))) {
return WEBP_MUX_INVALID_ARGUMENT;
}
#ifndef WEBP_EXPERIMENTAL_FEATURES
if (frame->id == WEBP_CHUNK_FRGM) { // disabled for now.
return WEBP_MUX_INVALID_ARGUMENT;
}
#endif
if (bitstream->bytes == NULL || bitstream->size > MAX_CHUNK_PAYLOAD) {
return WEBP_MUX_INVALID_ARGUMENT;
}
if (mux->images_ != NULL) {
const WebPMuxImage* const image = mux->images_;
const uint32_t image_id = (image->header_ != NULL) ?
ChunkGetIdFromTag(image->header_->tag_) : WEBP_CHUNK_IMAGE;
if (image_id != frame->id) {
return WEBP_MUX_INVALID_ARGUMENT; // Conflicting frame types.
}
}
MuxImageInit(&wpi);
err = SetAlphaAndImageChunks(bitstream, copy_data, &wpi);
if (err != WEBP_MUX_OK) goto Err;
assert(wpi.img_ != NULL); // As SetAlphaAndImageChunks() was successful.
{
const int is_lossless = (wpi.img_->tag_ == kChunks[IDX_VP8L].tag);
const int x_offset = frame->x_offset & ~1; // Snap offsets to even.
const int y_offset = frame->y_offset & ~1;
const int duration = is_frame ? frame->duration : 1 /* unused */;
const WebPMuxAnimDispose dispose_method =
is_frame ? frame->dispose_method : 0 /* unused */;
const uint32_t tag = kChunks[is_frame ? IDX_ANMF : IDX_FRGM].tag;
WebPData frame_frgm;
if (x_offset < 0 || x_offset >= MAX_POSITION_OFFSET ||
y_offset < 0 || y_offset >= MAX_POSITION_OFFSET ||
(duration < 0 || duration >= MAX_DURATION) ||
dispose_method != (dispose_method & 1)) {
err = WEBP_MUX_INVALID_ARGUMENT;
goto Err;
}
err = CreateFrameFragmentData(&wpi.img_->data_, x_offset, y_offset,
duration, dispose_method, is_lossless,
is_frame, &frame_frgm);
if (err != WEBP_MUX_OK) goto Err;
// Add frame/fragment chunk (with copy_data = 1).
err = AddDataToChunkList(&frame_frgm, 1, tag, &wpi.header_);
WebPDataClear(&frame_frgm); // frame_frgm owned by wpi.header_ now.
if (err != WEBP_MUX_OK) goto Err;
}
// Add this WebPMuxImage to mux.
err = MuxImagePush(&wpi, &mux->images_);
if (err != WEBP_MUX_OK) goto Err;
// All is well.
return WEBP_MUX_OK;
Err: // Something bad happened.
MuxImageRelease(&wpi);
return err;
}
WebPMuxError WebPMuxPushTile(WebPMux* mux, const WebPData* bitstream,
int x_offset, int y_offset,
int copy_data) {
return MuxPushFrameTileInternal(mux, bitstream, x_offset, y_offset,
1 /* unused duration */, copy_data,
kChunks[IDX_TILE].tag);
WebPMuxError WebPMuxSetAnimationParams(WebPMux* mux,
const WebPMuxAnimParams* params) {
WebPMuxError err;
uint8_t data[ANIM_CHUNK_SIZE];
if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
if (params->loop_count < 0 || params->loop_count >= MAX_LOOP_COUNT) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Delete any existing ANIM chunk(s).
err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Set the animation parameters.
PutLE32(data, params->bgcolor);
PutLE16(data + 4, params->loop_count);
return MuxAddChunk(mux, 1, kChunks[IDX_ANIM].tag, data, sizeof(data), 1);
}
//------------------------------------------------------------------------------
// Delete API(s).
WebPMuxError WebPMuxDeleteImage(WebPMux* mux) {
WebPMuxError err;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
err = MuxValidateForImage(mux);
if (err != WEBP_MUX_OK) return err;
// All well, delete image.
MuxImageDeleteAll(&mux->images_);
return WEBP_MUX_OK;
}
WebPMuxError WebPMuxDeleteMetadata(WebPMux* mux) {
return MuxDeleteAllNamedData(mux, IDX_META);
}
WebPMuxError WebPMuxDeleteColorProfile(WebPMux* mux) {
return MuxDeleteAllNamedData(mux, IDX_ICCP);
}
static WebPMuxError DeleteFrameTileInternal(WebPMux* const mux, uint32_t nth,
CHUNK_INDEX idx) {
const WebPChunkId id = kChunks[idx].id;
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
assert(idx == IDX_FRAME || idx == IDX_TILE);
return MuxImageDeleteNth(&mux->images_, nth, id);
WebPMuxError WebPMuxDeleteChunk(WebPMux* mux, const char fourcc[4]) {
if (mux == NULL || fourcc == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxDeleteAllNamedData(mux, ChunkGetTagFromFourCC(fourcc));
}
WebPMuxError WebPMuxDeleteFrame(WebPMux* mux, uint32_t nth) {
return DeleteFrameTileInternal(mux, nth, IDX_FRAME);
}
WebPMuxError WebPMuxDeleteTile(WebPMux* mux, uint32_t nth) {
return DeleteFrameTileInternal(mux, nth, IDX_TILE);
if (mux == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxImageDeleteNth(&mux->images_, nth);
}
//------------------------------------------------------------------------------
// Assembly of the WebP RIFF file.
static WebPMuxError GetFrameTileInfo(const WebPChunk* const frame_tile_chunk,
int* const x_offset, int* const y_offset,
int* const duration) {
const uint32_t tag = frame_tile_chunk->tag_;
const int is_frame = (tag == kChunks[IDX_FRAME].tag);
const WebPData* const data = &frame_tile_chunk->data_;
static WebPMuxError GetFrameFragmentInfo(
const WebPChunk* const frame_frgm_chunk,
int* const x_offset, int* const y_offset, int* const duration) {
const uint32_t tag = frame_frgm_chunk->tag_;
const int is_frame = (tag == kChunks[IDX_ANMF].tag);
const WebPData* const data = &frame_frgm_chunk->data_;
const size_t expected_data_size =
is_frame ? FRAME_CHUNK_SIZE : TILE_CHUNK_SIZE;
assert(frame_tile_chunk != NULL);
assert(tag == kChunks[IDX_FRAME].tag || tag == kChunks[IDX_TILE].tag);
if (data->size_ != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
is_frame ? ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
assert(frame_frgm_chunk != NULL);
assert(tag == kChunks[IDX_ANMF].tag || tag == kChunks[IDX_FRGM].tag);
if (data->size != expected_data_size) return WEBP_MUX_INVALID_ARGUMENT;
*x_offset = 2 * GetLE24(data->bytes_ + 0);
*y_offset = 2 * GetLE24(data->bytes_ + 3);
if (is_frame) *duration = 1 + GetLE24(data->bytes_ + 12);
*x_offset = 2 * GetLE24(data->bytes + 0);
*y_offset = 2 * GetLE24(data->bytes + 3);
if (is_frame) *duration = GetLE24(data->bytes + 12);
return WEBP_MUX_OK;
}
@ -483,8 +433,8 @@ WebPMuxError MuxGetImageWidthHeight(const WebPChunk* const image_chunk,
assert(image_chunk != NULL);
assert(tag == kChunks[IDX_VP8].tag || tag == kChunks[IDX_VP8L].tag);
ok = (tag == kChunks[IDX_VP8].tag) ?
VP8GetInfo(data->bytes_, data->size_, data->size_, &w, &h) :
VP8LGetInfo(data->bytes_, data->size_, &w, &h, NULL);
VP8GetInfo(data->bytes, data->size, data->size, &w, &h) :
VP8LGetInfo(data->bytes, data->size, &w, &h, NULL);
if (ok) {
*width = w;
*height = h;
@ -499,11 +449,11 @@ static WebPMuxError GetImageInfo(const WebPMuxImage* const wpi,
int* const duration,
int* const width, int* const height) {
const WebPChunk* const image_chunk = wpi->img_;
const WebPChunk* const frame_tile_chunk = wpi->header_;
const WebPChunk* const frame_frgm_chunk = wpi->header_;
// Get offsets and duration from FRM/TILE chunk.
// Get offsets and duration from ANMF/FRGM chunk.
const WebPMuxError err =
GetFrameTileInfo(frame_tile_chunk, x_offset, y_offset, duration);
GetFrameFragmentInfo(frame_frgm_chunk, x_offset, y_offset, duration);
if (err != WEBP_MUX_OK) return err;
// Get width and height from VP8/VP8L chunk.
@ -525,9 +475,9 @@ static WebPMuxError GetImageCanvasWidthHeight(
int max_x = 0;
int max_y = 0;
int64_t image_area = 0;
// Aggregate the bounding box for animation frames & tiled images.
// Aggregate the bounding box for animation frames & fragmented images.
for (; wpi != NULL; wpi = wpi->next_) {
int x_offset, y_offset, duration, w, h;
int x_offset = 0, y_offset = 0, duration = 0, w = 0, h = 0;
const WebPMuxError err = GetImageInfo(wpi, &x_offset, &y_offset,
&duration, &w, &h);
const int max_x_pos = x_offset + w;
@ -542,11 +492,11 @@ static WebPMuxError GetImageCanvasWidthHeight(
}
*width = max_x;
*height = max_y;
// Crude check to validate that there are no image overlaps/holes for tile
// images. Check that the aggregated image area for individual tiles exactly
// matches the image area of the constructed canvas. However, the area-match
// is necessary but not sufficient condition.
if ((flags & TILE_FLAG) && (image_area != (max_x * max_y))) {
// Crude check to validate that there are no image overlaps/holes for
// fragmented images. Check that the aggregated image area for individual
// fragments exactly matches the image area of the constructed canvas.
// However, the area-match is necessary but not sufficient condition.
if ((flags & FRAGMENTS_FLAG) && (image_area != (max_x * max_y))) {
*width = 0;
*height = 0;
return WEBP_MUX_INVALID_ARGUMENT;
@ -580,34 +530,34 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
assert(mux != NULL);
images = mux->images_; // First image.
if (images == NULL || images->img_ == NULL ||
images->img_->data_.bytes_ == NULL) {
images->img_->data_.bytes == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// If VP8X chunk(s) is(are) already present, remove them (and later add new
// VP8X chunk with updated flags).
err = MuxDeleteAllNamedData(mux, IDX_VP8X);
err = MuxDeleteAllNamedData(mux, kChunks[IDX_VP8X].tag);
if (err != WEBP_MUX_OK && err != WEBP_MUX_NOT_FOUND) return err;
// Set flags.
if (mux->iccp_ != NULL && mux->iccp_->data_.bytes_ != NULL) {
if (mux->iccp_ != NULL && mux->iccp_->data_.bytes != NULL) {
flags |= ICCP_FLAG;
}
if (mux->meta_ != NULL && mux->meta_->data_.bytes_ != NULL) {
flags |= META_FLAG;
if (mux->exif_ != NULL && mux->exif_->data_.bytes != NULL) {
flags |= EXIF_FLAG;
}
if (mux->xmp_ != NULL && mux->xmp_->data_.bytes != NULL) {
flags |= XMP_FLAG;
}
if (images->header_ != NULL) {
if (images->header_->tag_ == kChunks[IDX_TILE].tag) {
// This is a tiled image.
flags |= TILE_FLAG;
} else if (images->header_->tag_ == kChunks[IDX_FRAME].tag) {
if (images->header_->tag_ == kChunks[IDX_FRGM].tag) {
// This is a fragmented image.
flags |= FRAGMENTS_FLAG;
} else if (images->header_->tag_ == kChunks[IDX_ANMF].tag) {
// This is an image with animation.
flags |= ANIMATION_FLAG;
}
}
if (MuxImageCount(images, WEBP_CHUNK_ALPHA) > 0) {
flags |= ALPHA_FLAG; // Some images have an alpha channel.
}
@ -643,39 +593,63 @@ static WebPMuxError CreateVP8XChunk(WebPMux* const mux) {
return err;
}
// Cleans up 'mux' by removing any unnecessary chunks.
static WebPMuxError MuxCleanup(WebPMux* const mux) {
int num_frames;
int num_fragments;
int num_anim_chunks;
// If we have an image with single fragment or frame, convert it to a
// non-animated non-fragmented image (to avoid writing FRGM/ANMF chunk
// unnecessarily).
WebPMuxError err = WebPMuxNumChunks(mux, kChunks[IDX_ANMF].id, &num_frames);
if (err != WEBP_MUX_OK) return err;
err = WebPMuxNumChunks(mux, kChunks[IDX_FRGM].id, &num_fragments);
if (err != WEBP_MUX_OK) return err;
if (num_frames == 1 || num_fragments == 1) {
WebPMuxImage* frame_frag;
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, &frame_frag);
assert(err == WEBP_MUX_OK); // We know that one frame/fragment does exist.
if (frame_frag->header_ != NULL) {
assert(frame_frag->header_->tag_ == kChunks[IDX_ANMF].tag ||
frame_frag->header_->tag_ == kChunks[IDX_FRGM].tag);
ChunkDelete(frame_frag->header_); // Removes ANMF/FRGM chunk.
frame_frag->header_ = NULL;
}
num_frames = 0;
num_fragments = 0;
}
// Remove ANIM chunk if this is a non-animated image.
err = WebPMuxNumChunks(mux, kChunks[IDX_ANIM].id, &num_anim_chunks);
if (err != WEBP_MUX_OK) return err;
if (num_anim_chunks >= 1 && num_frames == 0) {
err = MuxDeleteAllNamedData(mux, kChunks[IDX_ANIM].tag);
if (err != WEBP_MUX_OK) return err;
}
return WEBP_MUX_OK;
}
WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
size_t size = 0;
uint8_t* data = NULL;
uint8_t* dst = NULL;
int num_frames;
int num_loop_chunks;
WebPMuxError err;
if (mux == NULL || assembled_data == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Remove LOOP chunk if unnecessary.
err = WebPMuxNumChunks(mux, kChunks[IDX_LOOP].id, &num_loop_chunks);
// Finalize mux.
err = MuxCleanup(mux);
if (err != WEBP_MUX_OK) return err;
if (num_loop_chunks >= 1) {
err = WebPMuxNumChunks(mux, kChunks[IDX_FRAME].id, &num_frames);
if (err != WEBP_MUX_OK) return err;
if (num_frames == 0) {
err = DeleteLoopCount(mux);
if (err != WEBP_MUX_OK) return err;
}
}
// Create VP8X chunk.
err = CreateVP8XChunk(mux);
if (err != WEBP_MUX_OK) return err;
// Allocate data.
size = ChunksListDiskSize(mux->vp8x_) + ChunksListDiskSize(mux->iccp_)
+ ChunksListDiskSize(mux->loop_) + MuxImageListDiskSize(mux->images_)
+ ChunksListDiskSize(mux->meta_) + ChunksListDiskSize(mux->unknown_)
+ RIFF_HEADER_SIZE;
+ ChunksListDiskSize(mux->anim_) + MuxImageListDiskSize(mux->images_)
+ ChunksListDiskSize(mux->exif_) + ChunksListDiskSize(mux->xmp_)
+ ChunksListDiskSize(mux->unknown_) + RIFF_HEADER_SIZE;
data = (uint8_t*)malloc(size);
if (data == NULL) return WEBP_MUX_MEMORY_ERROR;
@ -684,9 +658,10 @@ WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
dst = MuxEmitRiffHeader(data, size);
dst = ChunkListEmit(mux->vp8x_, dst);
dst = ChunkListEmit(mux->iccp_, dst);
dst = ChunkListEmit(mux->loop_, dst);
dst = ChunkListEmit(mux->anim_, dst);
dst = MuxImageListEmit(mux->images_, dst);
dst = ChunkListEmit(mux->meta_, dst);
dst = ChunkListEmit(mux->exif_, dst);
dst = ChunkListEmit(mux->xmp_, dst);
dst = ChunkListEmit(mux->unknown_, dst);
assert(dst == data + size);
@ -698,9 +673,9 @@ WebPMuxError WebPMuxAssemble(WebPMux* mux, WebPData* assembled_data) {
size = 0;
}
// Finalize.
assembled_data->bytes_ = data;
assembled_data->size_ = size;
// Finalize data.
assembled_data->bytes = data;
assembled_data->size = size;
return err;
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Internal header for mux library.
@ -15,7 +17,6 @@
#include <stdlib.h>
#include "../dec/vp8i.h"
#include "../dec/vp8li.h"
#include "../webp/format_constants.h"
#include "../webp/mux.h"
#if defined(__cplusplus) || defined(c_plusplus)
@ -25,22 +26,26 @@ extern "C" {
//------------------------------------------------------------------------------
// Defines and constants.
#define MUX_MAJ_VERSION 0
#define MUX_MIN_VERSION 1
#define MUX_REV_VERSION 1
// Chunk object.
typedef struct WebPChunk WebPChunk;
struct WebPChunk {
uint32_t tag_;
int owner_; // True if *data_ memory is owned internally.
// VP8X, Loop, and other internally created chunks
// like frame/tile are always owned.
// VP8X, ANIM, and other internally created chunks
// like ANMF/FRGM are always owned.
WebPData data_;
WebPChunk* next_;
};
// MuxImage object. Store a full webp image (including frame/tile chunk, alpha
// MuxImage object. Store a full WebP image (including ANMF/FRGM chunk, ALPH
// chunk and VP8/VP8L chunk),
typedef struct WebPMuxImage WebPMuxImage;
struct WebPMuxImage {
WebPChunk* header_; // Corresponds to WEBP_CHUNK_FRAME/WEBP_CHUNK_TILE.
WebPChunk* header_; // Corresponds to WEBP_CHUNK_ANMF/WEBP_CHUNK_FRGM.
WebPChunk* alpha_; // Corresponds to WEBP_CHUNK_ALPHA.
WebPChunk* img_; // Corresponds to WEBP_CHUNK_IMAGE.
int is_partial_; // True if only some of the chunks are filled.
@ -51,8 +56,9 @@ struct WebPMuxImage {
struct WebPMux {
WebPMuxImage* images_;
WebPChunk* iccp_;
WebPChunk* meta_;
WebPChunk* loop_;
WebPChunk* exif_;
WebPChunk* xmp_;
WebPChunk* anim_;
WebPChunk* vp8x_;
WebPChunk* unknown_;
@ -65,13 +71,14 @@ struct WebPMux {
typedef enum {
IDX_VP8X = 0,
IDX_ICCP,
IDX_LOOP,
IDX_FRAME,
IDX_TILE,
IDX_ANIM,
IDX_ANMF,
IDX_FRGM,
IDX_ALPHA,
IDX_VP8,
IDX_VP8L,
IDX_META,
IDX_EXIF,
IDX_XMP,
IDX_UNKNOWN,
IDX_NIL,
@ -80,8 +87,6 @@ typedef enum {
#define NIL_TAG 0x00000000u // To signal void chunk.
#define MKFOURCC(a, b, c, d) ((uint32_t)(a) | (b) << 8 | (c) << 16 | (d) << 24)
typedef struct {
uint32_t tag;
WebPChunkId id;
@ -90,44 +95,6 @@ typedef struct {
extern const ChunkInfo kChunks[IDX_LAST_CHUNK];
//------------------------------------------------------------------------------
// Helper functions.
// Read 16, 24 or 32 bits stored in little-endian order.
static WEBP_INLINE int GetLE16(const uint8_t* const data) {
return (int)(data[0] << 0) | (data[1] << 8);
}
static WEBP_INLINE int GetLE24(const uint8_t* const data) {
return GetLE16(data) | (data[2] << 16);
}
static WEBP_INLINE uint32_t GetLE32(const uint8_t* const data) {
return (uint32_t)GetLE16(data) | (GetLE16(data + 2) << 16);
}
// Store 16, 24 or 32 bits in little-endian order.
static WEBP_INLINE void PutLE16(uint8_t* const data, int val) {
assert(val < (1 << 16));
data[0] = (val >> 0);
data[1] = (val >> 8);
}
static WEBP_INLINE void PutLE24(uint8_t* const data, int val) {
assert(val < (1 << 24));
PutLE16(data, val & 0xffff);
data[2] = (val >> 16);
}
static WEBP_INLINE void PutLE32(uint8_t* const data, uint32_t val) {
PutLE16(data, (int)(val & 0xffff));
PutLE16(data + 2, (int)(val >> 16));
}
static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
}
//------------------------------------------------------------------------------
// Chunk object management.
@ -140,6 +107,12 @@ CHUNK_INDEX ChunkGetIndexFromTag(uint32_t tag);
// Get chunk id from chunk tag. Returns WEBP_CHUNK_NIL if not found.
WebPChunkId ChunkGetIdFromTag(uint32_t tag);
// Convert a fourcc string to a tag.
uint32_t ChunkGetTagFromFourCC(const char fourcc[4]);
// Get chunk index from fourcc. Returns IDX_UNKNOWN if given fourcc is unknown.
CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]);
// Search for nth chunk with given 'tag' in the chunk list.
// nth = 0 means "last of the list".
WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag);
@ -150,7 +123,8 @@ WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
// Sets 'chunk' at nth position in the 'chunk_list'.
// nth = 0 has the special meaning "last of the list".
WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
// On success ownership is transferred from 'chunk' to the 'chunk_list'.
WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
uint32_t nth);
// Releases chunk and returns chunk->next_.
@ -159,9 +133,14 @@ WebPChunk* ChunkRelease(WebPChunk* const chunk);
// Deletes given chunk & returns chunk->next_.
WebPChunk* ChunkDelete(WebPChunk* const chunk);
// Returns size of the chunk including chunk header and padding byte (if any).
static WEBP_INLINE size_t SizeWithPadding(size_t chunk_size) {
return CHUNK_HEADER_SIZE + ((chunk_size + 1) & ~1U);
}
// Size of a chunk including header and padding.
static WEBP_INLINE size_t ChunkDiskSize(const WebPChunk* chunk) {
const size_t data_size = chunk->data_.size_;
const size_t data_size = chunk->data_.size;
assert(data_size < MAX_CHUNK_PAYLOAD);
return SizeWithPadding(data_size);
}
@ -193,13 +172,14 @@ WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi);
void MuxImageDeleteAll(WebPMuxImage** const wpi_list);
// Count number of images matching the given tag id in the 'wpi_list'.
// If id == WEBP_CHUNK_NIL, all images will be matched.
int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id);
// Check if given ID corresponds to an image related chunk.
static WEBP_INLINE int IsWPI(WebPChunkId id) {
switch (id) {
case WEBP_CHUNK_FRAME:
case WEBP_CHUNK_TILE:
case WEBP_CHUNK_ANMF:
case WEBP_CHUNK_FRGM:
case WEBP_CHUNK_ALPHA:
case WEBP_CHUNK_IMAGE: return 1;
default: return 0;
@ -211,8 +191,8 @@ static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
const WebPMuxImage* const wpi, WebPChunkId id) {
assert(wpi != NULL);
switch (id) {
case WEBP_CHUNK_FRAME:
case WEBP_CHUNK_TILE: return (WebPChunk**)&wpi->header_;
case WEBP_CHUNK_ANMF:
case WEBP_CHUNK_FRGM: return (WebPChunk**)&wpi->header_;
case WEBP_CHUNK_ALPHA: return (WebPChunk**)&wpi->alpha_;
case WEBP_CHUNK_IMAGE: return (WebPChunk**)&wpi->img_;
default: return NULL;
@ -222,13 +202,12 @@ static WEBP_INLINE WebPChunk** MuxImageGetListFromId(
// Pushes 'wpi' at the end of 'wpi_list'.
WebPMuxError MuxImagePush(const WebPMuxImage* wpi, WebPMuxImage** wpi_list);
// Delete nth image in the image list with given tag id.
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id);
// Delete nth image in the image list.
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth);
// Get nth image in the image list with given tag id.
// Get nth image in the image list.
WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id, WebPMuxImage** wpi);
WebPMuxImage** wpi);
// Total size of the given image.
size_t MuxImageDiskSize(const WebPMuxImage* const wpi);

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Internal objects and utils for mux.
@ -12,6 +14,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -22,18 +25,25 @@ extern "C" {
const ChunkInfo kChunks[] = {
{ MKFOURCC('V', 'P', '8', 'X'), WEBP_CHUNK_VP8X, VP8X_CHUNK_SIZE },
{ MKFOURCC('I', 'C', 'C', 'P'), WEBP_CHUNK_ICCP, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('L', 'O', 'O', 'P'), WEBP_CHUNK_LOOP, LOOP_CHUNK_SIZE },
{ MKFOURCC('F', 'R', 'M', ' '), WEBP_CHUNK_FRAME, FRAME_CHUNK_SIZE },
{ MKFOURCC('T', 'I', 'L', 'E'), WEBP_CHUNK_TILE, TILE_CHUNK_SIZE },
{ MKFOURCC('A', 'N', 'I', 'M'), WEBP_CHUNK_ANIM, ANIM_CHUNK_SIZE },
{ MKFOURCC('A', 'N', 'M', 'F'), WEBP_CHUNK_ANMF, ANMF_CHUNK_SIZE },
{ MKFOURCC('F', 'R', 'G', 'M'), WEBP_CHUNK_FRGM, FRGM_CHUNK_SIZE },
{ MKFOURCC('A', 'L', 'P', 'H'), WEBP_CHUNK_ALPHA, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('V', 'P', '8', ' '), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('V', 'P', '8', 'L'), WEBP_CHUNK_IMAGE, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('M', 'E', 'T', 'A'), WEBP_CHUNK_META, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('E', 'X', 'I', 'F'), WEBP_CHUNK_EXIF, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('X', 'M', 'P', ' '), WEBP_CHUNK_XMP, UNDEFINED_CHUNK_SIZE },
{ MKFOURCC('U', 'N', 'K', 'N'), WEBP_CHUNK_UNKNOWN, UNDEFINED_CHUNK_SIZE },
{ NIL_TAG, WEBP_CHUNK_NIL, UNDEFINED_CHUNK_SIZE }
{ NIL_TAG, WEBP_CHUNK_NIL, UNDEFINED_CHUNK_SIZE }
};
//------------------------------------------------------------------------------
int WebPGetMuxVersion(void) {
return (MUX_MAJ_VERSION << 16) | (MUX_MIN_VERSION << 8) | MUX_REV_VERSION;
}
//------------------------------------------------------------------------------
// Life of a chunk object.
@ -73,12 +83,22 @@ WebPChunkId ChunkGetIdFromTag(uint32_t tag) {
return WEBP_CHUNK_NIL;
}
uint32_t ChunkGetTagFromFourCC(const char fourcc[4]) {
return MKFOURCC(fourcc[0], fourcc[1], fourcc[2], fourcc[3]);
}
CHUNK_INDEX ChunkGetIndexFromFourCC(const char fourcc[4]) {
const uint32_t tag = ChunkGetTagFromFourCC(fourcc);
const CHUNK_INDEX idx = ChunkGetIndexFromTag(tag);
return (idx == IDX_NIL) ? IDX_UNKNOWN : idx;
}
//------------------------------------------------------------------------------
// Chunk search methods.
// Returns next chunk in the chunk list with the given tag.
static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
while (chunk && chunk->tag_ != tag) {
while (chunk != NULL && chunk->tag_ != tag) {
chunk = chunk->next_;
}
return chunk;
@ -87,7 +107,7 @@ static WebPChunk* ChunkSearchNextInList(WebPChunk* chunk, uint32_t tag) {
WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
uint32_t iter = nth;
first = ChunkSearchNextInList(first, tag);
if (!first) return NULL;
if (first == NULL) return NULL;
while (--iter != 0) {
WebPChunk* next_chunk = ChunkSearchNextInList(first->next_, tag);
@ -99,14 +119,14 @@ WebPChunk* ChunkSearchList(WebPChunk* first, uint32_t nth, uint32_t tag) {
// Outputs a pointer to 'prev_chunk->next_',
// where 'prev_chunk' is the pointer to the chunk at position (nth - 1).
// Returns 1 if nth chunk was found, 0 otherwise.
// Returns true if nth chunk was found.
static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
WebPChunk*** const location) {
uint32_t count = 0;
assert(chunk_list);
assert(chunk_list != NULL);
*location = chunk_list;
while (*chunk_list) {
while (*chunk_list != NULL) {
WebPChunk* const cur_chunk = *chunk_list;
++count;
if (count == nth) return 1; // Found.
@ -124,34 +144,25 @@ static int ChunkSearchListToSet(WebPChunk** chunk_list, uint32_t nth,
WebPMuxError ChunkAssignData(WebPChunk* chunk, const WebPData* const data,
int copy_data, uint32_t tag) {
// For internally allocated chunks, always copy data & make it owner of data.
if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_LOOP].tag) {
if (tag == kChunks[IDX_VP8X].tag || tag == kChunks[IDX_ANIM].tag) {
copy_data = 1;
}
ChunkRelease(chunk);
if (data != NULL) {
if (copy_data) {
// Copy data.
chunk->data_.bytes_ = (uint8_t*)malloc(data->size_);
if (chunk->data_.bytes_ == NULL) return WEBP_MUX_MEMORY_ERROR;
memcpy((uint8_t*)chunk->data_.bytes_, data->bytes_, data->size_);
chunk->data_.size_ = data->size_;
// Chunk is owner of data.
chunk->owner_ = 1;
} else {
// Don't copy data.
if (copy_data) { // Copy data.
if (!WebPDataCopy(data, &chunk->data_)) return WEBP_MUX_MEMORY_ERROR;
chunk->owner_ = 1; // Chunk is owner of data.
} else { // Don't copy data.
chunk->data_ = *data;
}
}
chunk->tag_ = tag;
return WEBP_MUX_OK;
}
WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
WebPMuxError ChunkSetNth(WebPChunk* chunk, WebPChunk** chunk_list,
uint32_t nth) {
WebPChunk* new_chunk;
@ -162,6 +173,7 @@ WebPMuxError ChunkSetNth(const WebPChunk* chunk, WebPChunk** chunk_list,
new_chunk = (WebPChunk*)malloc(sizeof(*new_chunk));
if (new_chunk == NULL) return WEBP_MUX_MEMORY_ERROR;
*new_chunk = *chunk;
chunk->owner_ = 0;
new_chunk->next_ = *chunk_list;
*chunk_list = new_chunk;
return WEBP_MUX_OK;
@ -181,7 +193,7 @@ WebPChunk* ChunkDelete(WebPChunk* const chunk) {
size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
size_t size = 0;
while (chunk_list) {
while (chunk_list != NULL) {
size += ChunkDiskSize(chunk_list);
chunk_list = chunk_list->next_;
}
@ -189,55 +201,26 @@ size_t ChunksListDiskSize(const WebPChunk* chunk_list) {
}
static uint8_t* ChunkEmit(const WebPChunk* const chunk, uint8_t* dst) {
const size_t chunk_size = chunk->data_.size_;
const size_t chunk_size = chunk->data_.size;
assert(chunk);
assert(chunk->tag_ != NIL_TAG);
PutLE32(dst + 0, chunk->tag_);
PutLE32(dst + TAG_SIZE, (uint32_t)chunk_size);
assert(chunk_size == (uint32_t)chunk_size);
memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes_, chunk_size);
memcpy(dst + CHUNK_HEADER_SIZE, chunk->data_.bytes, chunk_size);
if (chunk_size & 1)
dst[CHUNK_HEADER_SIZE + chunk_size] = 0; // Add padding.
return dst + ChunkDiskSize(chunk);
}
uint8_t* ChunkListEmit(const WebPChunk* chunk_list, uint8_t* dst) {
while (chunk_list) {
while (chunk_list != NULL) {
dst = ChunkEmit(chunk_list, dst);
chunk_list = chunk_list->next_;
}
return dst;
}
//------------------------------------------------------------------------------
// Manipulation of a WebPData object.
void WebPDataInit(WebPData* webp_data) {
if (webp_data != NULL) {
memset(webp_data, 0, sizeof(*webp_data));
}
}
void WebPDataClear(WebPData* webp_data) {
if (webp_data != NULL) {
free((void*)webp_data->bytes_);
WebPDataInit(webp_data);
}
}
int WebPDataCopy(const WebPData* src, WebPData* dst) {
if (src == NULL || dst == NULL) return 0;
WebPDataInit(dst);
if (src->bytes_ != NULL && src->size_ != 0) {
dst->bytes_ = (uint8_t*)malloc(src->size_);
if (dst->bytes_ == NULL) return 0;
memcpy((void*)dst->bytes_, src->bytes_, src->size_);
dst->size_ = src->size_;
}
return 1;
}
//------------------------------------------------------------------------------
// Life of a MuxImage object.
@ -265,10 +248,14 @@ int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
int count = 0;
const WebPMuxImage* current;
for (current = wpi_list; current != NULL; current = current->next_) {
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) ++count;
if (id == WEBP_CHUNK_NIL) {
++count; // Special case: count all images.
} else {
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(current, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) ++count; // Count images with a matching 'id'.
}
}
}
return count;
@ -276,34 +263,22 @@ int MuxImageCount(const WebPMuxImage* wpi_list, WebPChunkId id) {
// Outputs a pointer to 'prev_wpi->next_',
// where 'prev_wpi' is the pointer to the image at position (nth - 1).
// Returns 1 if nth image with given id was found, 0 otherwise.
// Returns true if nth image was found.
static int SearchImageToGetOrDelete(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id,
WebPMuxImage*** const location) {
uint32_t count = 0;
assert(wpi_list);
*location = wpi_list;
// Search makes sense only for the following.
assert(id == WEBP_CHUNK_FRAME || id == WEBP_CHUNK_TILE ||
id == WEBP_CHUNK_IMAGE);
assert(id != WEBP_CHUNK_IMAGE || nth == 1);
if (nth == 0) {
nth = MuxImageCount(*wpi_list, id);
nth = MuxImageCount(*wpi_list, WEBP_CHUNK_NIL);
if (nth == 0) return 0; // Not found.
}
while (*wpi_list) {
while (*wpi_list != NULL) {
WebPMuxImage* const cur_wpi = *wpi_list;
const WebPChunk* const wpi_chunk = *MuxImageGetListFromId(cur_wpi, id);
if (wpi_chunk != NULL) {
const WebPChunkId wpi_chunk_id = ChunkGetIdFromTag(wpi_chunk->tag_);
if (wpi_chunk_id == id) {
++count;
if (count == nth) return 1; // Found.
}
}
++count;
if (count == nth) return 1; // Found.
wpi_list = &cur_wpi->next_;
*location = wpi_list;
}
@ -346,15 +321,14 @@ WebPMuxImage* MuxImageDelete(WebPMuxImage* const wpi) {
}
void MuxImageDeleteAll(WebPMuxImage** const wpi_list) {
while (*wpi_list) {
while (*wpi_list != NULL) {
*wpi_list = MuxImageDelete(*wpi_list);
}
}
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id) {
WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth) {
assert(wpi_list);
if (!SearchImageToGetOrDelete(wpi_list, nth, id, &wpi_list)) {
if (!SearchImageToGetOrDelete(wpi_list, nth, &wpi_list)) {
return WEBP_MUX_NOT_FOUND;
}
*wpi_list = MuxImageDelete(*wpi_list);
@ -365,10 +339,10 @@ WebPMuxError MuxImageDeleteNth(WebPMuxImage** wpi_list, uint32_t nth,
// MuxImage reader methods.
WebPMuxError MuxImageGetNth(const WebPMuxImage** wpi_list, uint32_t nth,
WebPChunkId id, WebPMuxImage** wpi) {
WebPMuxImage** wpi) {
assert(wpi_list);
assert(wpi);
if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth, id,
if (!SearchImageToGetOrDelete((WebPMuxImage**)wpi_list, nth,
(WebPMuxImage***)&wpi_list)) {
return WEBP_MUX_NOT_FOUND;
}
@ -390,27 +364,46 @@ size_t MuxImageDiskSize(const WebPMuxImage* const wpi) {
size_t MuxImageListDiskSize(const WebPMuxImage* wpi_list) {
size_t size = 0;
while (wpi_list) {
while (wpi_list != NULL) {
size += MuxImageDiskSize(wpi_list);
wpi_list = wpi_list->next_;
}
return size;
}
// Special case as ANMF/FRGM chunk encapsulates other image chunks.
static uint8_t* ChunkEmitSpecial(const WebPChunk* const header,
size_t total_size, uint8_t* dst) {
const size_t header_size = header->data_.size;
const size_t offset_to_next = total_size - CHUNK_HEADER_SIZE;
assert(header->tag_ == kChunks[IDX_ANMF].tag ||
header->tag_ == kChunks[IDX_FRGM].tag);
PutLE32(dst + 0, header->tag_);
PutLE32(dst + TAG_SIZE, (uint32_t)offset_to_next);
assert(header_size == (uint32_t)header_size);
memcpy(dst + CHUNK_HEADER_SIZE, header->data_.bytes, header_size);
if (header_size & 1) {
dst[CHUNK_HEADER_SIZE + header_size] = 0; // Add padding.
}
return dst + ChunkDiskSize(header);
}
uint8_t* MuxImageEmit(const WebPMuxImage* const wpi, uint8_t* dst) {
// Ordering of chunks to be emitted is strictly as follows:
// 1. Frame/Tile chunk (if present).
// 2. Alpha chunk (if present).
// 1. ANMF/FRGM chunk (if present).
// 2. ALPH chunk (if present).
// 3. VP8/VP8L chunk.
assert(wpi);
if (wpi->header_ != NULL) dst = ChunkEmit(wpi->header_, dst);
if (wpi->header_ != NULL) {
dst = ChunkEmitSpecial(wpi->header_, MuxImageDiskSize(wpi), dst);
}
if (wpi->alpha_ != NULL) dst = ChunkEmit(wpi->alpha_, dst);
if (wpi->img_ != NULL) dst = ChunkEmit(wpi->img_, dst);
return dst;
}
uint8_t* MuxImageListEmit(const WebPMuxImage* wpi_list, uint8_t* dst) {
while (wpi_list) {
while (wpi_list != NULL) {
dst = MuxImageEmit(wpi_list, dst);
wpi_list = wpi_list->next_;
}
@ -441,11 +434,12 @@ uint8_t* MuxEmitRiffHeader(uint8_t* const data, size_t size) {
WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
assert(mux != NULL);
switch(id) {
switch (id) {
case WEBP_CHUNK_VP8X: return (WebPChunk**)&mux->vp8x_;
case WEBP_CHUNK_ICCP: return (WebPChunk**)&mux->iccp_;
case WEBP_CHUNK_LOOP: return (WebPChunk**)&mux->loop_;
case WEBP_CHUNK_META: return (WebPChunk**)&mux->meta_;
case WEBP_CHUNK_ANIM: return (WebPChunk**)&mux->anim_;
case WEBP_CHUNK_EXIF: return (WebPChunk**)&mux->exif_;
case WEBP_CHUNK_XMP: return (WebPChunk**)&mux->xmp_;
case WEBP_CHUNK_UNKNOWN: return (WebPChunk**)&mux->unknown_;
default: return NULL;
}
@ -453,17 +447,17 @@ WebPChunk** MuxGetChunkListFromId(const WebPMux* mux, WebPChunkId id) {
WebPMuxError MuxValidateForImage(const WebPMux* const mux) {
const int num_images = MuxImageCount(mux->images_, WEBP_CHUNK_IMAGE);
const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_FRAME);
const int num_tiles = MuxImageCount(mux->images_, WEBP_CHUNK_TILE);
const int num_frames = MuxImageCount(mux->images_, WEBP_CHUNK_ANMF);
const int num_fragments = MuxImageCount(mux->images_, WEBP_CHUNK_FRGM);
if (num_images == 0) {
// No images in mux.
return WEBP_MUX_NOT_FOUND;
} else if (num_images == 1 && num_frames == 0 && num_tiles == 0) {
} else if (num_images == 1 && num_frames == 0 && num_fragments == 0) {
// Valid case (single image).
return WEBP_MUX_OK;
} else {
// Frame/Tile case OR an invalid mux.
// Frame/Fragment case OR an invalid mux.
return WEBP_MUX_INVALID_ARGUMENT;
}
}
@ -494,10 +488,11 @@ static WebPMuxError ValidateChunk(const WebPMux* const mux, CHUNK_INDEX idx,
WebPMuxError MuxValidate(const WebPMux* const mux) {
int num_iccp;
int num_meta;
int num_loop_chunks;
int num_exif;
int num_xmp;
int num_anim;
int num_frames;
int num_tiles;
int num_fragments;
int num_vp8x;
int num_images;
int num_alpha;
@ -517,29 +512,33 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
err = ValidateChunk(mux, IDX_ICCP, ICCP_FLAG, flags, 1, &num_iccp);
if (err != WEBP_MUX_OK) return err;
// At most one XMP metadata.
err = ValidateChunk(mux, IDX_META, META_FLAG, flags, 1, &num_meta);
// At most one EXIF metadata.
err = ValidateChunk(mux, IDX_EXIF, EXIF_FLAG, flags, 1, &num_exif);
if (err != WEBP_MUX_OK) return err;
// Animation: ANIMATION_FLAG, loop chunk and frame chunk(s) are consistent.
// At most one loop chunk.
err = ValidateChunk(mux, IDX_LOOP, NO_FLAG, flags, 1, &num_loop_chunks);
// At most one XMP metadata.
err = ValidateChunk(mux, IDX_XMP, XMP_FLAG, flags, 1, &num_xmp);
if (err != WEBP_MUX_OK) return err;
err = ValidateChunk(mux, IDX_FRAME, NO_FLAG, flags, -1, &num_frames);
// Animation: ANIMATION_FLAG, ANIM chunk and ANMF chunk(s) are consistent.
// At most one ANIM chunk.
err = ValidateChunk(mux, IDX_ANIM, NO_FLAG, flags, 1, &num_anim);
if (err != WEBP_MUX_OK) return err;
err = ValidateChunk(mux, IDX_ANMF, NO_FLAG, flags, -1, &num_frames);
if (err != WEBP_MUX_OK) return err;
{
const int has_animation = !!(flags & ANIMATION_FLAG);
if (has_animation && (num_loop_chunks == 0 || num_frames == 0)) {
if (has_animation && (num_anim == 0 || num_frames == 0)) {
return WEBP_MUX_INVALID_ARGUMENT;
}
if (!has_animation && (num_loop_chunks == 1 || num_frames > 0)) {
if (!has_animation && (num_anim == 1 || num_frames > 0)) {
return WEBP_MUX_INVALID_ARGUMENT;
}
}
// Tiling: TILE_FLAG and tile chunk(s) are consistent.
err = ValidateChunk(mux, IDX_TILE, TILE_FLAG, flags, -1, &num_tiles);
// Fragmentation: FRAGMENTS_FLAG and FRGM chunk(s) are consistent.
err = ValidateChunk(mux, IDX_FRGM, FRAGMENTS_FLAG, flags, -1, &num_fragments);
if (err != WEBP_MUX_OK) return err;
// Verify either VP8X chunk is present OR there is only one elem in
@ -551,16 +550,18 @@ WebPMuxError MuxValidate(const WebPMux* const mux) {
if (num_vp8x == 0 && num_images != 1) return WEBP_MUX_INVALID_ARGUMENT;
// ALPHA_FLAG & alpha chunk(s) are consistent.
if (num_vp8x > 0 && MuxHasLosslessImages(mux->images_)) {
// Special case: we have a VP8X chunk as well as some lossless images.
if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
if (MuxHasLosslessImages(mux->images_)) {
if (num_vp8x > 0) {
// Special case: we have a VP8X chunk as well as some lossless images.
if (!(flags & ALPHA_FLAG)) return WEBP_MUX_INVALID_ARGUMENT;
}
} else {
err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
if (err != WEBP_MUX_OK) return err;
err = ValidateChunk(mux, IDX_ALPHA, ALPHA_FLAG, flags, -1, &num_alpha);
if (err != WEBP_MUX_OK) return err;
}
// num_tiles & num_images are consistent.
if (num_tiles > 0 && num_images != num_tiles) {
// num_fragments & num_images are consistent.
if (num_fragments > 0 && num_images != num_fragments) {
return WEBP_MUX_INVALID_ARGUMENT;
}

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Read APIs for mux.
@ -12,6 +14,7 @@
#include <assert.h>
#include "./muxi.h"
#include "../utils/utils.h"
#if defined(__cplusplus) || defined(c_plusplus)
extern "C" {
@ -41,8 +44,9 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
SWITCH_ID_LIST(IDX_VP8X, mux->vp8x_);
SWITCH_ID_LIST(IDX_ICCP, mux->iccp_);
SWITCH_ID_LIST(IDX_LOOP, mux->loop_);
SWITCH_ID_LIST(IDX_META, mux->meta_);
SWITCH_ID_LIST(IDX_ANIM, mux->anim_);
SWITCH_ID_LIST(IDX_EXIF, mux->exif_);
SWITCH_ID_LIST(IDX_XMP, mux->xmp_);
SWITCH_ID_LIST(IDX_UNKNOWN, mux->unknown_);
return WEBP_MUX_NOT_FOUND;
}
@ -50,10 +54,9 @@ static WebPMuxError MuxGet(const WebPMux* const mux, CHUNK_INDEX idx,
// Fill the chunk with the given data (includes chunk header bytes), after some
// verifications.
static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
const uint8_t* data,
size_t data_size, size_t riff_size,
int copy_data) {
static WebPMuxError ChunkVerifyAndAssign(WebPChunk* chunk,
const uint8_t* data, size_t data_size,
size_t riff_size, int copy_data) {
uint32_t chunk_size;
WebPData chunk_data;
@ -68,11 +71,74 @@ static WebPMuxError ChunkVerifyAndAssignData(WebPChunk* chunk,
}
// Data assignment.
chunk_data.bytes_ = data + CHUNK_HEADER_SIZE;
chunk_data.size_ = chunk_size;
chunk_data.bytes = data + CHUNK_HEADER_SIZE;
chunk_data.size = chunk_size;
return ChunkAssignData(chunk, &chunk_data, copy_data, GetLE32(data + 0));
}
static int MuxImageParse(const WebPChunk* const chunk, int copy_data,
WebPMuxImage* const wpi) {
const uint8_t* bytes = chunk->data_.bytes;
size_t size = chunk->data_.size;
const uint8_t* const last = bytes + size;
WebPChunk subchunk;
size_t subchunk_size;
ChunkInit(&subchunk);
assert(chunk->tag_ == kChunks[IDX_ANMF].tag ||
chunk->tag_ == kChunks[IDX_FRGM].tag);
assert(!wpi->is_partial_);
// ANMF/FRGM.
{
const size_t hdr_size = (chunk->tag_ == kChunks[IDX_ANMF].tag) ?
ANMF_CHUNK_SIZE : FRGM_CHUNK_SIZE;
const WebPData temp = { bytes, hdr_size };
// Each of ANMF and FRGM chunk contain a header at the beginning. So, its
// size should at least be 'hdr_size'.
if (size < hdr_size) goto Fail;
ChunkAssignData(&subchunk, &temp, copy_data, chunk->tag_);
}
ChunkSetNth(&subchunk, &wpi->header_, 1);
wpi->is_partial_ = 1; // Waiting for ALPH and/or VP8/VP8L chunks.
// Rest of the chunks.
subchunk_size = ChunkDiskSize(&subchunk) - CHUNK_HEADER_SIZE;
bytes += subchunk_size;
size -= subchunk_size;
while (bytes != last) {
ChunkInit(&subchunk);
if (ChunkVerifyAndAssign(&subchunk, bytes, size, size,
copy_data) != WEBP_MUX_OK) {
goto Fail;
}
switch (ChunkGetIdFromTag(subchunk.tag_)) {
case WEBP_CHUNK_ALPHA:
if (wpi->alpha_ != NULL) goto Fail; // Consecutive ALPH chunks.
if (ChunkSetNth(&subchunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Fail;
wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
break;
case WEBP_CHUNK_IMAGE:
if (ChunkSetNth(&subchunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Fail;
wpi->is_partial_ = 0; // wpi is completely filled.
break;
default:
goto Fail;
break;
}
subchunk_size = ChunkDiskSize(&subchunk);
bytes += subchunk_size;
size -= subchunk_size;
}
if (wpi->is_partial_) goto Fail;
return 1;
Fail:
ChunkRelease(&subchunk);
return 0;
}
//------------------------------------------------------------------------------
// Create a mux object from WebP-RIFF data.
@ -94,8 +160,8 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
}
if (bitstream == NULL) return NULL;
data = bitstream->bytes_;
size = bitstream->size_;
data = bitstream->bytes;
size = bitstream->size;
if (data == NULL) return NULL;
if (size < RIFF_HEADER_SIZE) return NULL;
@ -135,42 +201,48 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
// Loop over chunks.
while (data != end) {
size_t data_size;
WebPChunkId id;
WebPMuxError err;
err = ChunkVerifyAndAssignData(&chunk, data, size, riff_size, copy_data);
if (err != WEBP_MUX_OK) goto Err;
WebPChunk** chunk_list;
if (ChunkVerifyAndAssign(&chunk, data, size, riff_size,
copy_data) != WEBP_MUX_OK) {
goto Err;
}
data_size = ChunkDiskSize(&chunk);
id = ChunkGetIdFromTag(chunk.tag_);
if (IsWPI(id)) { // An image chunk (frame/tile/alpha/vp8).
WebPChunk** wpi_chunk_ptr =
MuxImageGetListFromId(wpi, id); // Image chunk to set.
assert(wpi_chunk_ptr != NULL);
if (*wpi_chunk_ptr != NULL) goto Err; // Consecutive alpha chunks or
// consecutive frame/tile chunks.
if (ChunkSetNth(&chunk, wpi_chunk_ptr, 1) != WEBP_MUX_OK) goto Err;
if (id == WEBP_CHUNK_IMAGE) {
switch (id) {
case WEBP_CHUNK_ALPHA:
if (wpi->alpha_ != NULL) goto Err; // Consecutive ALPH chunks.
if (ChunkSetNth(&chunk, &wpi->alpha_, 1) != WEBP_MUX_OK) goto Err;
wpi->is_partial_ = 1; // Waiting for a VP8 chunk.
break;
case WEBP_CHUNK_IMAGE:
if (ChunkSetNth(&chunk, &wpi->img_, 1) != WEBP_MUX_OK) goto Err;
wpi->is_partial_ = 0; // wpi is completely filled.
PushImage:
// Add this to mux->images_ list.
if (MuxImagePush(wpi, &mux->images_) != WEBP_MUX_OK) goto Err;
MuxImageInit(wpi); // Reset for reading next image.
} else {
wpi->is_partial_ = 1; // wpi is only partially filled.
}
} else { // A non-image chunk.
WebPChunk** chunk_list;
if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before
// getting all chunks of an image.
chunk_list = MuxGetChunkListFromId(mux, id); // List to add this chunk.
if (chunk_list == NULL) chunk_list = &mux->unknown_;
if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
}
{
const size_t data_size = ChunkDiskSize(&chunk);
data += data_size;
size -= data_size;
break;
case WEBP_CHUNK_ANMF:
#ifdef WEBP_EXPERIMENTAL_FEATURES
case WEBP_CHUNK_FRGM:
#endif
if (wpi->is_partial_) goto Err; // Previous wpi is still incomplete.
if (!MuxImageParse(&chunk, copy_data, wpi)) goto Err;
ChunkRelease(&chunk);
goto PushImage;
break;
default: // A non-image chunk.
if (wpi->is_partial_) goto Err; // Encountered a non-image chunk before
// getting all chunks of an image.
chunk_list = MuxGetChunkListFromId(mux, id); // List to add this chunk.
if (chunk_list == NULL) chunk_list = &mux->unknown_;
if (ChunkSetNth(&chunk, chunk_list, 0) != WEBP_MUX_OK) goto Err;
break;
}
data += data_size;
size -= data_size;
ChunkInit(&chunk);
}
@ -192,26 +264,30 @@ WebPMux* WebPMuxCreateInternal(const WebPData* bitstream, int copy_data,
WebPMuxError WebPMuxGetFeatures(const WebPMux* mux, uint32_t* flags) {
WebPData data;
WebPMuxError err;
if (mux == NULL || flags == NULL) return WEBP_MUX_INVALID_ARGUMENT;
*flags = 0;
// Check if VP8X chunk is present.
err = MuxGet(mux, IDX_VP8X, 1, &data);
if (err == WEBP_MUX_NOT_FOUND) {
// Check if VP8/VP8L chunk is present.
err = WebPMuxGetImage(mux, &data);
WebPDataClear(&data);
return err;
} else if (err != WEBP_MUX_OK) {
return err;
if (MuxGet(mux, IDX_VP8X, 1, &data) == WEBP_MUX_OK) {
if (data.size < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
*flags = GetLE32(data.bytes); // All OK. Fill up flags.
} else {
WebPMuxError err = MuxValidateForImage(mux); // Check for single image.
if (err != WEBP_MUX_OK) return err;
if (MuxHasLosslessImages(mux->images_)) {
const WebPData* const vp8l_data = &mux->images_->img_->data_;
int has_alpha = 0;
if (!VP8LGetInfo(vp8l_data->bytes, vp8l_data->size, NULL, NULL,
&has_alpha)) {
return WEBP_MUX_BAD_DATA;
}
if (has_alpha) {
*flags = ALPHA_FLAG;
}
}
}
if (data.size_ < CHUNK_SIZE_BYTES) return WEBP_MUX_BAD_DATA;
// All OK. Fill up flags.
*flags = GetLE32(data.bytes_);
return WEBP_MUX_OK;
}
@ -230,7 +306,7 @@ static uint8_t* EmitVP8XChunk(uint8_t* const dst, int width,
}
// Assemble a single image WebP bitstream from 'wpi'.
static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
static WebPMuxError SynthesizeBitstream(const WebPMuxImage* const wpi,
WebPData* const bitstream) {
uint8_t* dst;
@ -238,7 +314,7 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
const int need_vp8x = (wpi->alpha_ != NULL);
const size_t vp8x_size = need_vp8x ? CHUNK_HEADER_SIZE + VP8X_CHUNK_SIZE : 0;
const size_t alpha_size = need_vp8x ? ChunkDiskSize(wpi->alpha_) : 0;
// Note: No need to output FRM/TILE chunk for a single image.
// Note: No need to output ANMF/FRGM chunk for a single image.
const size_t size = RIFF_HEADER_SIZE + vp8x_size + alpha_size +
ChunkDiskSize(wpi->img_);
uint8_t* const data = (uint8_t*)malloc(size);
@ -265,100 +341,101 @@ static WebPMuxError SynthesizeBitstream(WebPMuxImage* const wpi,
assert(dst == data + size);
// Output.
bitstream->bytes_ = data;
bitstream->size_ = size;
bitstream->bytes = data;
bitstream->size = size;
return WEBP_MUX_OK;
}
WebPMuxError WebPMuxGetImage(const WebPMux* mux, WebPData* bitstream) {
WebPMuxError err;
WebPMuxImage* wpi = NULL;
if (mux == NULL || bitstream == NULL) {
WebPMuxError WebPMuxGetChunk(const WebPMux* mux, const char fourcc[4],
WebPData* chunk_data) {
CHUNK_INDEX idx;
if (mux == NULL || fourcc == NULL || chunk_data == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
err = MuxValidateForImage(mux);
if (err != WEBP_MUX_OK) return err;
// All well. Get the image.
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, 1, WEBP_CHUNK_IMAGE,
&wpi);
assert(err == WEBP_MUX_OK); // Already tested above.
return SynthesizeBitstream(wpi, bitstream);
idx = ChunkGetIndexFromFourCC(fourcc);
if (IsWPI(kChunks[idx].id)) { // An image chunk.
return WEBP_MUX_INVALID_ARGUMENT;
} else if (idx != IDX_UNKNOWN) { // A known chunk type.
return MuxGet(mux, idx, 1, chunk_data);
} else { // An unknown chunk type.
const WebPChunk* const chunk =
ChunkSearchList(mux->unknown_, 1, ChunkGetTagFromFourCC(fourcc));
if (chunk == NULL) return WEBP_MUX_NOT_FOUND;
*chunk_data = chunk->data_;
return WEBP_MUX_OK;
}
}
WebPMuxError WebPMuxGetMetadata(const WebPMux* mux, WebPData* metadata) {
if (mux == NULL || metadata == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxGet(mux, IDX_META, 1, metadata);
static WebPMuxError MuxGetImageInternal(const WebPMuxImage* const wpi,
WebPMuxFrameInfo* const info) {
// Set some defaults for unrelated fields.
info->x_offset = 0;
info->y_offset = 0;
info->duration = 1;
// Extract data for related fields.
info->id = ChunkGetIdFromTag(wpi->img_->tag_);
return SynthesizeBitstream(wpi, &info->bitstream);
}
WebPMuxError WebPMuxGetColorProfile(const WebPMux* mux,
WebPData* color_profile) {
if (mux == NULL || color_profile == NULL) return WEBP_MUX_INVALID_ARGUMENT;
return MuxGet(mux, IDX_ICCP, 1, color_profile);
static WebPMuxError MuxGetFrameFragmentInternal(const WebPMuxImage* const wpi,
WebPMuxFrameInfo* const frame) {
const int is_frame = (wpi->header_->tag_ == kChunks[IDX_ANMF].tag);
const CHUNK_INDEX idx = is_frame ? IDX_ANMF : IDX_FRGM;
const WebPData* frame_frgm_data;
#ifndef WEBP_EXPERIMENTAL_FEATURES
if (!is_frame) return WEBP_MUX_INVALID_ARGUMENT;
#endif
assert(wpi->header_ != NULL); // Already checked by WebPMuxGetFrame().
// Get frame/fragment chunk.
frame_frgm_data = &wpi->header_->data_;
if (frame_frgm_data->size < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
// Extract info.
frame->x_offset = 2 * GetLE24(frame_frgm_data->bytes + 0);
frame->y_offset = 2 * GetLE24(frame_frgm_data->bytes + 3);
frame->duration = is_frame ? GetLE24(frame_frgm_data->bytes + 12) : 1;
frame->dispose_method =
is_frame ? (WebPMuxAnimDispose)(frame_frgm_data->bytes[15] & 1)
: WEBP_MUX_DISPOSE_NONE;
frame->id = ChunkGetIdFromTag(wpi->header_->tag_);
return SynthesizeBitstream(wpi, &frame->bitstream);
}
WebPMuxError WebPMuxGetLoopCount(const WebPMux* mux, int* loop_count) {
WebPData image;
WebPMuxError err;
if (mux == NULL || loop_count == NULL) return WEBP_MUX_INVALID_ARGUMENT;
err = MuxGet(mux, IDX_LOOP, 1, &image);
if (err != WEBP_MUX_OK) return err;
if (image.size_ < kChunks[WEBP_CHUNK_LOOP].size) return WEBP_MUX_BAD_DATA;
*loop_count = GetLE16(image.bytes_);
return WEBP_MUX_OK;
}
static WebPMuxError MuxGetFrameTileInternal(
const WebPMux* const mux, uint32_t nth, WebPData* const bitstream,
int* const x_offset, int* const y_offset, int* const duration,
uint32_t tag) {
const WebPData* frame_tile_data;
WebPMuxError WebPMuxGetFrame(
const WebPMux* mux, uint32_t nth, WebPMuxFrameInfo* frame) {
WebPMuxError err;
WebPMuxImage* wpi;
const int is_frame = (tag == kChunks[WEBP_CHUNK_FRAME].tag) ? 1 : 0;
const CHUNK_INDEX idx = is_frame ? IDX_FRAME : IDX_TILE;
const WebPChunkId id = kChunks[idx].id;
if (mux == NULL || bitstream == NULL ||
x_offset == NULL || y_offset == NULL || (is_frame && duration == NULL)) {
// Sanity checks.
if (mux == NULL || frame == NULL) {
return WEBP_MUX_INVALID_ARGUMENT;
}
// Get the nth WebPMuxImage.
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, id, &wpi);
err = MuxImageGetNth((const WebPMuxImage**)&mux->images_, nth, &wpi);
if (err != WEBP_MUX_OK) return err;
// Get frame chunk.
assert(wpi->header_ != NULL); // As MuxImageGetNth() already checked header_.
frame_tile_data = &wpi->header_->data_;
if (frame_tile_data->size_ < kChunks[idx].size) return WEBP_MUX_BAD_DATA;
*x_offset = 2 * GetLE24(frame_tile_data->bytes_ + 0);
*y_offset = 2 * GetLE24(frame_tile_data->bytes_ + 3);
if (is_frame) *duration = 1 + GetLE24(frame_tile_data->bytes_ + 12);
return SynthesizeBitstream(wpi, bitstream);
// Get frame info.
if (wpi->header_ == NULL) {
return MuxGetImageInternal(wpi, frame);
} else {
return MuxGetFrameFragmentInternal(wpi, frame);
}
}
WebPMuxError WebPMuxGetFrame(const WebPMux* mux, uint32_t nth,
WebPData* bitstream,
int* x_offset, int* y_offset, int* duration) {
return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset,
duration, kChunks[IDX_FRAME].tag);
}
WebPMuxError WebPMuxGetAnimationParams(const WebPMux* mux,
WebPMuxAnimParams* params) {
WebPData anim;
WebPMuxError err;
WebPMuxError WebPMuxGetTile(const WebPMux* mux, uint32_t nth,
WebPData* bitstream,
int* x_offset, int* y_offset) {
return MuxGetFrameTileInternal(mux, nth, bitstream, x_offset, y_offset, NULL,
kChunks[IDX_TILE].tag);
if (mux == NULL || params == NULL) return WEBP_MUX_INVALID_ARGUMENT;
err = MuxGet(mux, IDX_ANIM, 1, &anim);
if (err != WEBP_MUX_OK) return err;
if (anim.size < kChunks[WEBP_CHUNK_ANIM].size) return WEBP_MUX_BAD_DATA;
params->bgcolor = GetLE32(anim.bytes);
params->loop_count = GetLE16(anim.bytes + 4);
return WEBP_MUX_OK;
}
// Get chunk index from chunk id. Returns IDX_NIL if not found.

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Boolean decoder
@ -15,7 +17,11 @@
extern "C" {
#endif
#define MK(X) (((bit_t)(X) << (BITS)) | (MASK))
#ifndef USE_RIGHT_JUSTIFY
#define MK(X) (((range_t)(X) << (BITS)) | (MASK))
#else
#define MK(X) ((range_t)(X))
#endif
//------------------------------------------------------------------------------
// VP8BitReader
@ -29,7 +35,7 @@ void VP8InitBitReader(VP8BitReader* const br,
br->buf_ = start;
br->buf_end_ = end;
br->value_ = 0;
br->missing_ = 8; // to load the very first 8bits
br->bits_ = -8; // to load the very first 8bits
br->eof_ = 0;
}
@ -46,7 +52,7 @@ const uint8_t kVP8Log2Range[128] = {
};
// range = (range << kVP8Log2Range[range]) + trailing 1's
const bit_t kVP8NewRange[128] = {
const range_t kVP8NewRange[128] = {
MK(127), MK(127), MK(191), MK(127), MK(159), MK(191), MK(223), MK(127),
MK(143), MK(159), MK(175), MK(191), MK(207), MK(223), MK(239), MK(127),
MK(135), MK(143), MK(151), MK(159), MK(167), MK(175), MK(183), MK(191),
@ -71,9 +77,19 @@ void VP8LoadFinalBytes(VP8BitReader* const br) {
assert(br != NULL && br->buf_ != NULL);
// Only read 8bits at a time
if (br->buf_ < br->buf_end_) {
br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 + br->missing_);
br->missing_ -= 8;
} else {
#ifndef USE_RIGHT_JUSTIFY
br->value_ |= (bit_t)(*br->buf_++) << ((BITS) - 8 - br->bits_);
#else
br->value_ = (bit_t)(*br->buf_++) | (br->value_ << 8);
#endif
br->bits_ += 8;
} else if (!br->eof_) {
#ifdef USE_RIGHT_JUSTIFY
// These are not strictly needed, but it makes the behaviour
// consistent for both USE_RIGHT_JUSTIFY and !USE_RIGHT_JUSTIFY.
br->value_ <<= 8;
br->bits_ += 8;
#endif
br->eof_ = 1;
}
}
@ -99,6 +115,10 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int bits) {
#define MAX_NUM_BIT_READ 25
#define LBITS 64 // Number of bits prefetched.
#define WBITS 32 // Minimum number of bytes needed after VP8LFillBitWindow.
#define LOG8_WBITS 4 // Number of bytes needed to store WBITS bits.
static const uint32_t kBitMask[MAX_NUM_BIT_READ] = {
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767,
65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215
@ -120,7 +140,7 @@ void VP8LInitBitReader(VP8LBitReader* const br,
br->eos_ = 0;
br->error_ = 0;
for (i = 0; i < sizeof(br->val_) && i < br->len_; ++i) {
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << (8 * i);
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (8 * i);
++br->pos_;
}
}
@ -135,91 +155,56 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
br->len_ = len;
}
// If not at EOS, reload up to LBITS byte-by-byte
static void ShiftBytes(VP8LBitReader* const br) {
while (br->bit_pos_ >= 8 && br->pos_ < br->len_) {
br->val_ >>= 8;
br->val_ |= ((uint64_t)br->buf_[br->pos_]) << 56;
br->val_ |= ((vp8l_val_t)br->buf_[br->pos_]) << (LBITS - 8);
++br->pos_;
br->bit_pos_ -= 8;
}
}
void VP8LFillBitWindow(VP8LBitReader* const br) {
if (br->bit_pos_ >= 32) {
#if defined(__x86_64__) || defined(_M_X64)
if (br->pos_ + 8 < br->len_) {
br->val_ >>= 32;
if (br->bit_pos_ >= WBITS) {
#if (defined(__x86_64__) || defined(_M_X64))
if (br->pos_ + sizeof(br->val_) < br->len_) {
br->val_ >>= WBITS;
br->bit_pos_ -= WBITS;
// The expression below needs a little-endian arch to work correctly.
// This gives a large speedup for decoding speed.
br->val_ |= *(const uint64_t *)(br->buf_ + br->pos_) << 32;
br->pos_ += 4;
br->bit_pos_ -= 32;
} else {
// Slow path.
ShiftBytes(br);
br->val_ |= *(const vp8l_val_t*)(br->buf_ + br->pos_) << (LBITS - WBITS);
br->pos_ += LOG8_WBITS;
return;
}
#else
// Always the slow path.
ShiftBytes(br);
#endif
}
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
br->eos_ = 1;
}
}
uint32_t VP8LReadOneBit(VP8LBitReader* const br) {
const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
// Flag an error at end_of_stream.
if (!br->eos_) {
++br->bit_pos_;
if (br->bit_pos_ >= 32) {
ShiftBytes(br);
}
// After this last bit is read, check if eos needs to be flagged.
if (br->pos_ == br->len_ && br->bit_pos_ == 64) {
ShiftBytes(br); // Slow path.
if (br->pos_ == br->len_ && br->bit_pos_ == LBITS) {
br->eos_ = 1;
}
} else {
br->error_ = 1;
}
return val;
}
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits) {
uint32_t val = 0;
assert(n_bits >= 0);
// Flag an error if end_of_stream or n_bits is more than allowed limit.
if (!br->eos_ && n_bits < MAX_NUM_BIT_READ) {
const uint32_t val =
(uint32_t)(br->val_ >> br->bit_pos_) & kBitMask[n_bits];
const int new_bits = br->bit_pos_ + n_bits;
br->bit_pos_ = new_bits;
// If this read is going to cross the read buffer, set the eos flag.
if (br->pos_ == br->len_) {
if ((br->bit_pos_ + n_bits) >= 64) {
if (new_bits >= LBITS) {
br->eos_ = 1;
if ((br->bit_pos_ + n_bits) > 64) return val;
}
}
val = (br->val_ >> br->bit_pos_) & kBitMask[n_bits];
br->bit_pos_ += n_bits;
if (br->bit_pos_ >= 40) {
if (br->pos_ + 5 < br->len_) {
br->val_ >>= 40;
br->val_ |=
(((uint64_t)br->buf_[br->pos_ + 0]) << 24) |
(((uint64_t)br->buf_[br->pos_ + 1]) << 32) |
(((uint64_t)br->buf_[br->pos_ + 2]) << 40) |
(((uint64_t)br->buf_[br->pos_ + 3]) << 48) |
(((uint64_t)br->buf_[br->pos_ + 4]) << 56);
br->pos_ += 5;
br->bit_pos_ -= 40;
}
if (br->bit_pos_ >= 8) {
ShiftBytes(br);
}
}
ShiftBytes(br);
return val;
} else {
br->error_ = 1;
return 0;
}
return val;
}
//------------------------------------------------------------------------------

View File

@ -1,8 +1,10 @@
// Copyright 2010 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Boolean decoder
@ -24,11 +26,80 @@
extern "C" {
#endif
#define BITS 32 // can be 32, 16 or 8
#define MASK ((((bit_t)1) << (BITS)) - 1)
#if (BITS == 32)
typedef uint64_t bit_t; // natural register type
typedef uint32_t lbit_t; // natural type for memory I/O
// The Boolean decoder needs to maintain infinite precision on the value_ field.
// However, since range_ is only 8bit, we only need an active window of 8 bits
// for value_. Left bits (MSB) gets zeroed and shifted away when value_ falls
// below 128, range_ is updated, and fresh bits read from the bitstream are
// brought in as LSB.
// To avoid reading the fresh bits one by one (slow), we cache a few of them
// ahead (actually, we cache BITS of them ahead. See below). There's two
// strategies regarding how to shift these looked-ahead fresh bits into the
// 8bit window of value_: either we shift them in, while keeping the position of
// the window fixed. Or we slide the window to the right while keeping the cache
// bits at a fixed, right-justified, position.
//
// Example, for BITS=16: here is the content of value_ for both strategies:
//
// !USE_RIGHT_JUSTIFY || USE_RIGHT_JUSTIFY
// ||
// <- 8b -><- 8b -><- BITS bits -> || <- 8b+3b -><- 8b -><- 13 bits ->
// [unused][value_][cached bits][0] || [unused...][value_][cached bits]
// [........00vvvvvvBBBBBBBBBBBBB000]LSB || [...........00vvvvvvBBBBBBBBBBBBB]
// ||
// After calling VP8Shift(), where we need to shift away two zeros:
// [........vvvvvvvvBBBBBBBBBBB00000]LSB || [.............vvvvvvvvBBBBBBBBBBB]
// ||
// Just before we need to call VP8LoadNewBytes(), the situation is:
// [........vvvvvv000000000000000000]LSB || [..........................vvvvvv]
// ||
// And just after calling VP8LoadNewBytes():
// [........vvvvvvvvBBBBBBBBBBBBBBBB]LSB || [........vvvvvvvvBBBBBBBBBBBBBBBB]
//
// -> we're back to height active 'value_' bits (marked 'v') and BITS cached
// bits (marked 'B')
//
// The right-justify strategy tends to use less shifts and is often faster.
//------------------------------------------------------------------------------
// BITS can be any multiple of 8 from 8 to 56 (inclusive).
// Pick values that fit natural register size.
#if !defined(WEBP_REFERENCE_IMPLEMENTATION)
#define USE_RIGHT_JUSTIFY
#if defined(__i386__) || defined(_M_IX86) // x86 32bit
#define BITS 16
#elif defined(__x86_64__) || defined(_M_X64) // x86 64bit
#define BITS 56
#elif defined(__arm__) || defined(_M_ARM) // ARM
#define BITS 24
#else // reasonable default
#define BITS 24
#endif
#else // reference choices
#define USE_RIGHT_JUSTIFY
#define BITS 8
#endif
//------------------------------------------------------------------------------
// Derived types and constants
// bit_t = natural register type
// lbit_t = natural type for memory I/O
#if (BITS > 32)
typedef uint64_t bit_t;
typedef uint64_t lbit_t;
#elif (BITS == 32)
typedef uint64_t bit_t;
typedef uint32_t lbit_t;
#elif (BITS == 24)
typedef uint32_t bit_t;
typedef uint32_t lbit_t;
#elif (BITS == 16)
typedef uint32_t bit_t;
typedef uint16_t lbit_t;
@ -37,8 +108,15 @@ typedef uint32_t bit_t;
typedef uint8_t lbit_t;
#endif
#ifndef USE_RIGHT_JUSTIFY
typedef bit_t range_t; // type for storing range_
#define MASK ((((bit_t)1) << (BITS)) - 1)
#else
typedef uint32_t range_t; // range_ only uses 8bits here. No need for bit_t.
#endif
//------------------------------------------------------------------------------
// Bitreader and code-tree reader
// Bitreader
typedef struct VP8BitReader VP8BitReader;
struct VP8BitReader {
@ -47,9 +125,9 @@ struct VP8BitReader {
int eof_; // true if input is exhausted
// boolean decoder
bit_t range_; // current range minus 1. In [127, 254] interval.
bit_t value_; // current value
int missing_; // number of missing bits in value_ (8bit)
range_t range_; // current range minus 1. In [127, 254] interval.
bit_t value_; // current value
int bits_; // number of valid bits left
};
// Initialize the bit reader and the boolean decoder.
@ -67,12 +145,12 @@ int32_t VP8GetSignedValue(VP8BitReader* const br, int num_bits);
// Read a bit with proba 'prob'. Speed-critical function!
extern const uint8_t kVP8Log2Range[128];
extern const bit_t kVP8NewRange[128];
extern const range_t kVP8NewRange[128];
void VP8LoadFinalBytes(VP8BitReader* const br); // special case for the tail
static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
assert(br && br->buf_);
assert(br != NULL && br->buf_ != NULL);
// Read 'BITS' bits at a time if possible.
if (br->buf_ + sizeof(lbit_t) <= br->buf_end_) {
// convert memory type to register type (with some zero'ing!)
@ -80,70 +158,125 @@ static WEBP_INLINE void VP8LoadNewBytes(VP8BitReader* const br) {
lbit_t in_bits = *(lbit_t*)br->buf_;
br->buf_ += (BITS) >> 3;
#if !defined(__BIG_ENDIAN__)
#if (BITS == 32)
#if (BITS > 32)
// gcc 4.3 has builtin functions for swap32/swap64
#if defined(__GNUC__) && \
(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
bits = (bit_t)__builtin_bswap64(in_bits);
#elif defined(_MSC_VER)
bits = (bit_t)_byteswap_uint64(in_bits);
#elif defined(__x86_64__)
__asm__ volatile("bswapq %0" : "=r"(bits) : "0"(in_bits));
#else // generic code for swapping 64-bit values (suggested by bdb@)
bits = (bit_t)in_bits;
bits = ((bits & 0xffffffff00000000ull) >> 32) |
((bits & 0x00000000ffffffffull) << 32);
bits = ((bits & 0xffff0000ffff0000ull) >> 16) |
((bits & 0x0000ffff0000ffffull) << 16);
bits = ((bits & 0xff00ff00ff00ff00ull) >> 8) |
((bits & 0x00ff00ff00ff00ffull) << 8);
#endif
bits >>= 64 - BITS;
#elif (BITS >= 24)
#if defined(__i386__) || defined(__x86_64__)
__asm__ volatile("bswap %k0" : "=r"(in_bits) : "0"(in_bits));
bits = (bit_t)in_bits; // 32b -> 64b zero-extension
bits = (bit_t)in_bits; // 24b/32b -> 32b/64b zero-extension
#elif defined(_MSC_VER)
bits = _byteswap_ulong(in_bits);
bits = (bit_t)_byteswap_ulong(in_bits);
#else
bits = (bit_t)(in_bits >> 24) | ((in_bits >> 8) & 0xff00)
| ((in_bits << 8) & 0xff0000) | (in_bits << 24);
#endif // x86
bits >>= (32 - BITS);
#elif (BITS == 16)
// gcc will recognize a 'rorw $8, ...' here:
bits = (bit_t)(in_bits >> 8) | ((in_bits & 0xff) << 8);
#else // BITS == 8
bits = (bit_t)in_bits;
#endif
#else // LITTLE_ENDIAN
#else // BIG_ENDIAN
bits = (bit_t)in_bits;
if (BITS != 8 * sizeof(bit_t)) bits >>= (8 * sizeof(bit_t) - BITS);
#endif
br->value_ |= bits << br->missing_;
br->missing_ -= (BITS);
#ifndef USE_RIGHT_JUSTIFY
br->value_ |= bits << (-br->bits_);
#else
br->value_ = bits | (br->value_ << (BITS));
#endif
br->bits_ += (BITS);
} else {
VP8LoadFinalBytes(br); // no need to be inlined
}
}
static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, bit_t split) {
const bit_t value_split = split | (MASK);
if (br->missing_ > 0) { // Make sure we have a least BITS bits in 'value_'
static WEBP_INLINE int VP8BitUpdate(VP8BitReader* const br, range_t split) {
if (br->bits_ < 0) { // Make sure we have a least BITS bits in 'value_'
VP8LoadNewBytes(br);
}
if (br->value_ > value_split) {
br->range_ -= value_split + 1;
br->value_ -= value_split + 1;
#ifndef USE_RIGHT_JUSTIFY
split |= (MASK);
if (br->value_ > split) {
br->range_ -= split + 1;
br->value_ -= split + 1;
return 1;
} else {
br->range_ = value_split;
br->range_ = split;
return 0;
}
#else
{
const int pos = br->bits_;
const range_t value = (range_t)(br->value_ >> pos);
if (value > split) {
br->range_ -= split + 1;
br->value_ -= (bit_t)(split + 1) << pos;
return 1;
} else {
br->range_ = split;
return 0;
}
}
#endif
}
static WEBP_INLINE void VP8Shift(VP8BitReader* const br) {
#ifndef USE_RIGHT_JUSTIFY
// range_ is in [0..127] interval here.
const int idx = br->range_ >> (BITS);
const bit_t idx = br->range_ >> (BITS);
const int shift = kVP8Log2Range[idx];
br->range_ = kVP8NewRange[idx];
br->value_ <<= shift;
br->missing_ += shift;
br->bits_ -= shift;
#else
const int shift = kVP8Log2Range[br->range_];
assert(br->range_ < (range_t)128);
br->range_ = kVP8NewRange[br->range_];
br->bits_ -= shift;
#endif
}
static WEBP_INLINE int VP8GetBit(VP8BitReader* const br, int prob) {
#ifndef USE_RIGHT_JUSTIFY
// It's important to avoid generating a 64bit x 64bit multiply here.
// We just need an 8b x 8b after all.
const bit_t split =
(bit_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
const range_t split =
(range_t)((uint32_t)(br->range_ >> (BITS)) * prob) << ((BITS) - 8);
const int bit = VP8BitUpdate(br, split);
if (br->range_ <= (((bit_t)0x7e << (BITS)) | (MASK))) {
if (br->range_ <= (((range_t)0x7e << (BITS)) | (MASK))) {
VP8Shift(br);
}
return bit;
#else
const range_t split = (br->range_ * prob) >> 8;
const int bit = VP8BitUpdate(br, split);
if (br->range_ <= (range_t)0x7e) {
VP8Shift(br);
}
return bit;
#endif
}
static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
const bit_t split = (br->range_ >> 1);
const range_t split = (br->range_ >> 1);
const int bit = VP8BitUpdate(br, split);
VP8Shift(br);
return bit ? -v : v;
@ -151,16 +284,18 @@ static WEBP_INLINE int VP8GetSigned(VP8BitReader* const br, int v) {
// -----------------------------------------------------------------------------
// Bitreader
// Bitreader for lossless format
typedef uint64_t vp8l_val_t; // right now, this bit-reader can only use 64bit.
typedef struct {
uint64_t val_;
const uint8_t* buf_;
size_t len_;
size_t pos_;
int bit_pos_;
int eos_;
int error_;
vp8l_val_t val_; // pre-fetched bits
const uint8_t* buf_; // input byte buffer
size_t len_; // buffer length
size_t pos_; // byte position in buf_
int bit_pos_; // current bit-reading position in val_
int eos_; // bitstream is finished
int error_; // an error occurred (buffer overflow attempt...)
} VP8LBitReader;
void VP8LInitBitReader(VP8LBitReader* const br,
@ -176,17 +311,14 @@ void VP8LBitReaderSetBuffer(VP8LBitReader* const br,
// Flags eos if this read attempt is going to cross the read buffer.
uint32_t VP8LReadBits(VP8LBitReader* const br, int n_bits);
// Reads one bit from Read Buffer. Flags an error in case end_of_stream.
// Flags eos after reading last bit from the buffer.
uint32_t VP8LReadOneBit(VP8LBitReader* const br);
// Return the prefetched bits, so they can be looked up.
static WEBP_INLINE uint32_t VP8LPrefetchBits(VP8LBitReader* const br) {
return (uint32_t)(br->val_ >> br->bit_pos_);
}
// VP8LReadOneBitUnsafe is faster than VP8LReadOneBit, but it can be called only
// 32 times after the last VP8LFillBitWindow. Any subsequent calls
// (without VP8LFillBitWindow) will return invalid data.
static WEBP_INLINE uint32_t VP8LReadOneBitUnsafe(VP8LBitReader* const br) {
const uint32_t val = (br->val_ >> br->bit_pos_) & 1;
++br->bit_pos_;
return val;
// Discard 'num_bits' bits from the cache.
static WEBP_INLINE void VP8LDiscardBits(VP8LBitReader* const br, int num_bits) {
br->bit_pos_ += num_bits;
}
// Advances the Read buffer by 4 bytes to make room for reading next 32 bits.

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Bit writing and boolean coder

View File

@ -1,8 +1,10 @@
// Copyright 2011 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Bit writing and boolean coder

View File

@ -1,8 +1,10 @@
// Copyright 2012 Google Inc. All Rights Reserved.
//
// This code is licensed under the same terms as WebM:
// Software License Agreement: http://www.webmproject.org/license/software/
// Additional IP Rights Grant: http://www.webmproject.org/license/additional/
// Use of this source code is governed by a BSD-style license
// that can be found in the COPYING file in the root of the source
// tree. An additional intellectual property rights grant can be found
// in the file PATENTS. All contributing project authors may
// be found in the AUTHORS file in the root of the source tree.
// -----------------------------------------------------------------------------
//
// Color Cache for WebP Lossless

Some files were not shown because too many files have changed in this diff Show More