diff --git a/AUTHORS b/AUTHORS index 9686ac13e..4ab68811d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -2,3 +2,6 @@ # Name or Organization Google Inc. +The Mozilla Foundation +Timothy B. Terriberry +The Xiph.Org Foundation diff --git a/examples.mk b/examples.mk index f05fd818e..0054f295d 100644 --- a/examples.mk +++ b/examples.mk @@ -19,8 +19,9 @@ ivfdec.SRCS += args.c args.h vpx_ports/config.h ivfdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 ivfdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += ivfenc.c -ivfenc.SRCS += args.c args.h vpx_ports/config.h -ivfenc.SRCS += vpx_ports/mem_ops.h vpx_ports/mem_ops_aligned.h +ivfenc.SRCS += args.c args.h y4minput.c y4minput.h +ivfenc.SRCS += vpx_ports/config.h vpx_ports/mem_ops.h +ivfenc.SRCS += vpx_ports/mem_ops_aligned.h ivfenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 ivfenc.DESCRIPTION = Full featured encoder diff --git a/ivfenc.c b/ivfenc.c index 4dc708251..4d96d468a 100644 --- a/ivfenc.c +++ b/ivfenc.c @@ -32,6 +32,7 @@ #include "vpx/vp8cx.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" +#include "y4minput.h" static const char *exec_name; @@ -217,49 +218,65 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats) return stats->buf; } +enum video_file_type +{ + FILE_TYPE_RAW, + FILE_TYPE_IVF, + FILE_TYPE_Y4M +}; + #define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */ -static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf) +static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type, + y4m_input *y4m) { int plane = 0; - if (is_ivf) + if (file_type == FILE_TYPE_Y4M) { - char junk[IVF_FRAME_HDR_SZ]; - - /* Skip the frame header. We know how big the frame should be. See - * write_ivf_frame_header() for documentation on the frame header - * layout. - */ - fread(junk, 1, IVF_FRAME_HDR_SZ, f); + if (y4m_input_fetch_frame(y4m, f, img) < 0) + return 0; } - - for (plane = 0; plane < 3; plane++) + else { - unsigned char *ptr; - int w = (plane ? (1 + img->d_w) / 2 : img->d_w); - int h = (plane ? (1 + img->d_h) / 2 : img->d_h); - int r; - - /* Determine the correct plane based on the image format. The for-loop - * always counts in Y,U,V order, but this may not match the order of - * the data on disk. - */ - switch (plane) + if (file_type == FILE_TYPE_IVF) { - case 1: - ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U]; - break; - case 2: - ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V]; - break; - default: - ptr = img->planes[plane]; + char junk[IVF_FRAME_HDR_SZ]; + + /* Skip the frame header. We know how big the frame should be. See + * write_ivf_frame_header() for documentation on the frame header + * layout. + */ + fread(junk, 1, IVF_FRAME_HDR_SZ, f); } - for (r = 0; r < h; r++) + for (plane = 0; plane < 3; plane++) { - fread(ptr, 1, w, f); - ptr += img->stride[plane]; + unsigned char *ptr; + int w = (plane ? (1 + img->d_w) / 2 : img->d_w); + int h = (plane ? (1 + img->d_h) / 2 : img->d_h); + int r; + + /* Determine the correct plane based on the image format. The for-loop + * always counts in Y,U,V order, but this may not match the order of + * the data on disk. + */ + switch (plane) + { + case 1: + ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U]; + break; + case 2: + ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V]; + break; + default: + ptr = img->planes[plane]; + } + + for (r = 0; r < h; r++) + { + fread(ptr, 1, w, f); + ptr += img->stride[plane]; + } } } @@ -267,6 +284,20 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf) } +unsigned int file_is_y4m(FILE *infile, + y4m_input *y4m) +{ + char raw_hdr[4]; + if (fread(raw_hdr, 1, 4, infile) == 4 && + memcmp(raw_hdr, "YUV4", 4) == 0 && + y4m_input_open(y4m, infile, raw_hdr, 4) >= 0) + { + return 1; + } + rewind(infile); + return 0; +} + #define IVF_FILE_HDR_SZ (32) unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc, @@ -568,8 +599,10 @@ int main(int argc, const char **argv_) static const int *ctrl_args_map = NULL; int verbose = 0, show_psnr = 0; int arg_use_i420 = 1; + int arg_have_timebase = 0; unsigned long cx_time = 0; - unsigned int is_ivf, fourcc; + unsigned int file_type, fourcc; + y4m_input y4m; exec_name = argv_[0]; @@ -686,7 +719,10 @@ int main(int argc, const char **argv_) else if (arg_match(&arg, &height, argi)) cfg.g_h = arg_parse_uint(&arg); else if (arg_match(&arg, &timebase, argi)) + { cfg.g_timebase = arg_parse_rational(&arg); + arg_have_timebase = 1; + } else if (arg_match(&arg, &error_resilient, argi)) cfg.g_error_resilient = arg_parse_uint(&arg); else if (arg_match(&arg, &lag_in_frames, argi)) @@ -808,10 +844,24 @@ int main(int argc, const char **argv_) return EXIT_FAILURE; } - is_ivf = file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h); - - if (is_ivf) + if (file_is_y4m(infile, &y4m)) { + file_type = FILE_TYPE_Y4M; + cfg.g_w = y4m.pic_w; + cfg.g_h = y4m.pic_h; + /* Use the frame rate from the file only if none was specified on the + * command-line. + */ + if (!arg_have_timebase) + { + cfg.g_timebase.num = y4m.fps_d; + cfg.g_timebase.den = y4m.fps_n; + } + arg_use_i420 = 0; + } + else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h)) + { + file_type = FILE_TYPE_IVF; switch (fourcc) { case 0x32315659: @@ -825,6 +875,8 @@ int main(int argc, const char **argv_) return EXIT_FAILURE; } } + else + file_type = FILE_TYPE_RAW; fclose(infile); @@ -869,8 +921,14 @@ int main(int argc, const char **argv_) SHOW(kf_max_dist); } - vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12, - cfg.g_w, cfg.g_h, 1); + if (file_type == FILE_TYPE_Y4M) + /*The Y4M reader does its own allocation. + Just initialize this here to avoid problems if we never read any + frames.*/ + memset(&raw, 0, sizeof(raw)); + else + vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12, + cfg.g_w, cfg.g_h, 1); // This was added so that ivfenc will create monotically increasing // timestamps. Since we create new timestamps for alt-reference frames @@ -894,6 +952,18 @@ int main(int argc, const char **argv_) return EXIT_FAILURE; } + /*Skip the file header.*/ + if (file_type == FILE_TYPE_IVF) + { + char raw_hdr[IVF_FILE_HDR_SZ]; + (void)fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile); + } + else if(file_type == FILE_TYPE_Y4M) + { + char buffer[80]; + (void)fgets(buffer, sizeof(buffer)/sizeof(*buffer) - 1, infile); + } + outfile = fopen(out_fn, "wb"); if (!outfile) @@ -966,7 +1036,7 @@ int main(int argc, const char **argv_) if (!arg_limit || frames_in < arg_limit) { - frame_avail = read_frame(infile, &raw, is_ivf); + frame_avail = read_frame(infile, &raw, file_type, &y4m); if (frame_avail) frames_in++; diff --git a/y4minput.c b/y4minput.c new file mode 100644 index 000000000..f1f50bc79 --- /dev/null +++ b/y4minput.c @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license and patent + * grant that can be found in the LICENSE file in the root of the source + * tree. All contributing project authors may be found in the AUTHORS + * file in the root of the source tree. + * + * Based on code from the OggTheora software codec source code, + * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. + */ +#include +#include +#include "y4minput.h" + +static int y4m_parse_tags(y4m_input *_y4m,char *_tags){ + int got_w; + int got_h; + int got_fps; + int got_interlace; + int got_par; + int got_chroma; + char *p; + char *q; + got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0; + for(p=_tags;;p=q){ + /*Skip any leading spaces.*/ + while(*p==' ')p++; + /*If that's all we have, stop.*/ + if(p[0]=='\0')break; + /*Find the end of this tag.*/ + for(q=p+1;*q!='\0'&&*q!=' ';q++); + /*Process the tag.*/ + switch(p[0]){ + case 'W':{ + if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1; + got_w=1; + }break; + case 'H':{ + if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1; + got_h=1; + }break; + case 'F':{ + if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){ + return -1; + } + got_fps=1; + }break; + case 'I':{ + _y4m->interlace=p[1]; + got_interlace=1; + }break; + case 'A':{ + if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){ + return -1; + } + got_par=1; + }break; + case 'C':{ + if(q-p>16)return -1; + memcpy(_y4m->chroma_type,p+1,q-p-1); + _y4m->chroma_type[q-p-1]='\0'; + got_chroma=1; + }break; + /*Ignore unknown tags.*/ + } + } + if(!got_w||!got_h||!got_fps)return -1; + if(!got_interlace)_y4m->interlace='?'; + if(!got_par)_y4m->par_n=_y4m->par_d=0; + /*Chroma-type is not specified in older files, e.g., those generated by + mplayer.*/ + if(!got_chroma)strcpy(_y4m->chroma_type,"420"); + return 0; +} + + + +/*All anti-aliasing filters in the following conversion functions are based on + one of two window functions: + The 6-tap Lanczos window (for down-sampling and shifts): + sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t) + 0, |t|>=3 + The 4-tap Mitchell window (for up-sampling): + 7|t|^3-12|t|^2+16/3, |t|<1 + -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2 + 0, |t|>=2 + The number of taps is intentionally kept small to reduce computational + overhead and limit ringing. + + The taps from these filters are scaled so that their sum is 1, and the result + is scaled by 128 and rounded to integers to create a filter whose + intermediate values fit inside 16 bits. + Coefficients are rounded in such a way as to ensure their sum is still 128, + which is usually equivalent to normal rounding. + + Conversions which require both horizontal and vertical filtering could + have these steps pipelined, for less memory consumption and better cache + performance, but we do them separately for simplicity.*/ + +#define OC_MINI(_a,_b) ((_a)>(_b)?(_b):(_a)) +#define OC_MAXI(_a,_b) ((_a)<(_b)?(_b):(_a)) +#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c))) + +/*420jpeg chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + 420mpeg2 chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + BR | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + BR | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + We use a resampling filter to shift the site locations one quarter pixel (at + the chroma plane's resolution) to the right. + The 4:2:2 modes look exactly the same, except there are twice as many chroma + lines, and they are vertically co-sited with the luma samples in both the + mpeg2 and jpeg cases (thus requiring no vertical resampling).*/ +static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst, + const unsigned char *_src,int _c_w,int _c_h){ + int pli; + int y; + int x; + for(y=0;y<_c_h;y++){ + /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos + window.*/ + for(x=0;x>7,255); + } + for(;x<_c_w-3;x++){ + _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+ + 114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255); + } + for(;x<_c_w;x++){ + _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+ + 114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+ + _src[_c_w-1]+64)>>7,255); + } + _dst+=_c_w; + _src+=_c_w; + } +} + +/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/ +static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + int c_w; + int c_h; + int c_sz; + int pli; + int y; + int x; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + for(pli=1;pli<3;pli++){ + y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h); + _dst+=c_sz; + _aux+=c_sz; + } +} + +/*This format is only used for interlaced content, but is included for + completeness. + + 420jpeg chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + 420paldv chroma samples are sited like: + YR------Y-------YR------Y------- + | | | | + | | | | + | | | | + YB------Y-------YB------Y------- + | | | | + | | | | + | | | | + YR------Y-------YR------Y------- + | | | | + | | | | + | | | | + YB------Y-------YB------Y------- + | | | | + | | | | + | | | | + + We use a resampling filter to shift the site locations one quarter pixel (at + the chroma plane's resolution) to the right. + Then we use another filter to move the C_r location down one quarter pixel, + and the C_b location up one quarter pixel.*/ +static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + unsigned char *tmp; + int c_w; + int c_h; + int c_sz; + int pli; + int y; + int x; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+1)/2; + c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + c_sz=c_w*c_h; + tmp=_aux+2*c_sz; + for(pli=1;pli<3;pli++){ + /*First do the horizontal re-sampling. + This is the same as the mpeg2 case, except that after the horizontal + case, we need to apply a second vertical filter.*/ + y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h); + _aux+=c_sz; + switch(pli){ + case 1:{ + /*Slide C_b up a quarter-pel. + This is the same filter used above, but in the other order.*/ + for(x=0;x>7,255); + } + for(;y>7,255); + } + for(;y>7,255); + } + _dst++; + tmp++; + } + _dst+=c_sz-c_w; + tmp-=c_w; + }break; + case 2:{ + /*Slide C_r down a quarter-pel. + This is the same as the horizontal filter.*/ + for(x=0;x>7,255); + } + for(;y>7,255); + } + for(;y>7,255); + } + _dst++; + tmp++; + } + }break; + } + /*For actual interlaced material, this would have to be done separately on + each field, and the shift amounts would be different. + C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8, + C_b up 1/8 in the bottom field. + The corresponding filters would be: + Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128 + Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/ + } +} + +/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0. + This is used as a helper by several converation routines.*/ +static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst, + const unsigned char *_src,int _c_w,int _c_h){ + int y; + int x; + /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/ + for(x=0;x<_c_w;x++){ + for(y=0;y>1)*_c_w]=OC_CLAMPI(0,(64*_src[0] + +78*_src[OC_MINI(1,_c_h-1)*_c_w] + -17*_src[OC_MINI(2,_c_h-1)*_c_w] + +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255); + } + for(;y<_c_h-3;y+=2){ + _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w]) + -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w]) + +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255); + } + for(;y<_c_h;y+=2){ + _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w] + +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w] + +_src[OC_MINI(y+2,_c_h-1)*_c_w]) + +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255); + } + _src++; + _dst++; + } +} + +/*420jpeg chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + 422jpeg chroma samples are sited like: + Y---BR--Y-------Y---BR--Y------- + | | | | + | | | | + | | | | + Y---BR--Y-------Y---BR--Y------- + | | | | + | | | | + | | | | + Y---BR--Y-------Y---BR--Y------- + | | | | + | | | | + | | | | + Y---BR--Y-------Y---BR--Y------- + | | | | + | | | | + | | | | + + We use a resampling filter to decimate the chroma planes by two in the + vertical direction.*/ +static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + int c_w; + int c_h; + int c_sz; + int dst_c_w; + int dst_c_h; + int dst_c_sz; + int tmp_sz; + int pic_sz; + int pli; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h; + c_h=_y4m->pic_h; + dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + dst_c_sz=dst_c_w*dst_c_h; + for(pli=1;pli<3;pli++){ + y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h); + _aux+=c_sz; + _dst+=dst_c_sz; + } +} + +/*420jpeg chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + 422 chroma samples are sited like: + YBR-----Y-------YBR-----Y------- + | | | | + | | | | + | | | | + YBR-----Y-------YBR-----Y------- + | | | | + | | | | + | | | | + YBR-----Y-------YBR-----Y------- + | | | | + | | | | + | | | | + YBR-----Y-------YBR-----Y------- + | | | | + | | | | + | | | | + + We use a resampling filter to shift the original site locations one quarter + pixel (at the original chroma resolution) to the right. + Then we use a second resampling filter to decimate the chroma planes by two + in the vertical direction.*/ +static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + unsigned char *tmp; + int c_w; + int c_h; + int c_sz; + int dst_c_w; + int dst_c_h; + int dst_c_sz; + int pli; + int y; + int x; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h; + c_h=_y4m->pic_h; + dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + dst_c_sz=c_w*dst_c_h; + tmp=_aux+2*c_sz; + for(pli=1;pli<3;pli++){ + /*In reality, the horizontal and vertical steps could be pipelined, for + less memory consumption and better cache performance, but we do them + separately for simplicity.*/ + /*First do horizontal filtering (convert to 422jpeg)*/ + y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h); + /*Now do the vertical filtering.*/ + y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h); + _aux+=c_sz; + _dst+=dst_c_sz; + } +} + +/*420jpeg chroma samples are sited like: + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | BR | | BR | + | | | | + Y-------Y-------Y-------Y------- + | | | | + | | | | + | | | | + + 411 chroma samples are sited like: + YBR-----Y-------Y-------Y------- + | | | | + | | | | + | | | | + YBR-----Y-------Y-------Y------- + | | | | + | | | | + | | | | + YBR-----Y-------Y-------Y------- + | | | | + | | | | + | | | | + YBR-----Y-------Y-------Y------- + | | | | + | | | | + | | | | + + We use a filter to resample at site locations one eighth pixel (at the source + chroma plane's horizontal resolution) and five eighths of a pixel to the + right. + Then we use another filter to decimate the planes by 2 in the vertical + direction.*/ +static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + unsigned char *tmp; + int c_w; + int c_h; + int c_sz; + int dst_c_w; + int dst_c_h; + int dst_c_sz; + int tmp_sz; + int pli; + int y; + int x; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h; + c_h=_y4m->pic_h; + dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + dst_c_sz=dst_c_w*dst_c_h; + tmp_sz=dst_c_w*c_h; + tmp=_aux+2*c_sz; + for(pli=1;pli<3;pli++){ + /*In reality, the horizontal and vertical steps could be pipelined, for + less memory consumption and better cache performance, but we do them + separately for simplicity.*/ + /*First do horizontal filtering (convert to 422jpeg)*/ + for(y=0;y>7,255); + tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0] + +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255); + } + for(;x>7,255); + tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x] + +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255); + } + for(;x>7,255); + if((x<<1|1)>7,255); + } + } + tmp+=dst_c_w; + _aux+=c_w; + } + tmp-=tmp_sz; + /*Now do the vertical filtering.*/ + y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h); + _dst+=dst_c_sz; + } +} + +/*Convert 444 to 420jpeg.*/ +static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + unsigned char *tmp; + int c_w; + int c_h; + int c_sz; + int dst_c_w; + int dst_c_h; + int dst_c_sz; + int tmp_sz; + int pli; + int y; + int x; + /*Skip past the luma data.*/ + _dst+=_y4m->pic_w*_y4m->pic_h; + /*Compute the size of each chroma plane.*/ + c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h; + c_h=_y4m->pic_h; + dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + dst_c_sz=dst_c_w*dst_c_h; + tmp_sz=dst_c_w*c_h; + tmp=_aux+2*c_sz; + for(pli=1;pli<3;pli++){ + /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/ + for(y=0;y>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)] + -17*_aux[OC_MINI(2,c_w-1)] + +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255); + } + for(;x>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3]) + -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255); + } + for(;x>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])- + 17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+ + 78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255); + } + tmp+=dst_c_w; + _aux+=c_w; + } + tmp-=tmp_sz; + /*Now do the vertical filtering.*/ + y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h); + _dst+=dst_c_sz; + } +} + +/*The image is padded with empty chroma components at 4:2:0.*/ +static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ + int c_sz; + _dst+=_y4m->pic_w*_y4m->pic_h; + c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)* + ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v); + memset(_dst,128,c_sz*2); +} + +/*No conversion function needed.*/ +static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst, + unsigned char *_aux){ +} + +int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){ + char buffer[80]; + int ret; + int i; + /*Read until newline, or 80 cols, whichever happens first.*/ + for(i=0;i<79;i++){ + if(_nskip>0){ + buffer[i]=*_skip++; + _nskip--; + } + else{ + ret=fread(buffer+i,1,1,_fin); + if(ret<1)return -1; + } + if(buffer[i]=='\n')break; + } + /*We skipped too much header data.*/ + if(_nskip>0)return -1; + if(i==79){ + fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n"); + return -1; + } + buffer[i]='\0'; + if(memcmp(buffer,"YUV4MPEG",8)){ + fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n"); + return -1; + } + if(buffer[8]!='2'){ + fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n"); + } + ret=y4m_parse_tags(_y4m,buffer+5); + if(ret<0){ + fprintf(stderr,"Error parsing YUV4MPEG2 header.\n"); + return ret; + } + if(_y4m->interlace=='?'){ + fprintf(stderr,"Warning: Input video interlacing format unknown; " + "assuming progressive scan.\n"); + } + else if(_y4m->interlace!='p'){ + fprintf(stderr,"Input video is interlaced; " + "Only progressive scan handled.\n"); + return -1; + } + if(strcmp(_y4m->chroma_type,"420")==0|| + strcmp(_y4m->chroma_type,"420jpeg")==0){ + _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h + +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2); + /*Natively supported: no conversion required.*/ + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0; + _y4m->convert=y4m_convert_null; + } + else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){ + _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first.*/ + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz= + 2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2); + _y4m->convert=y4m_convert_42xmpeg2_42xjpeg; + } + else if(strcmp(_y4m->chroma_type,"420paldv")==0){ + _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2); + _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2); + _y4m->convert=y4m_convert_42xpaldv_42xjpeg; + } + else if(strcmp(_y4m->chroma_type,"422jpeg")==0){ + _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2; + _y4m->src_c_dec_v=1; + _y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first.*/ + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h; + _y4m->convert=y4m_convert_422jpeg_420jpeg; + } + else if(strcmp(_y4m->chroma_type,"422")==0){ + _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2; + _y4m->src_c_dec_v=1; + _y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h; + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h; + _y4m->convert=y4m_convert_422_420jpeg; + } + else if(strcmp(_y4m->chroma_type,"411")==0){ + _y4m->src_c_dec_h=4; + _y4m->dst_c_dec_h=2; + _y4m->src_c_dec_v=1; + _y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h; + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h; + _y4m->convert=y4m_convert_411_420jpeg; + } + else if(strcmp(_y4m->chroma_type,"444")==0){ + _y4m->src_c_dec_h=1; + _y4m->dst_c_dec_h=2; + _y4m->src_c_dec_v=1; + _y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer.*/ + _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h; + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h; + _y4m->convert=y4m_convert_444_420jpeg; + } + else if(strcmp(_y4m->chroma_type,"444alpha")==0){ + _y4m->src_c_dec_h=1; + _y4m->dst_c_dec_h=2; + _y4m->src_c_dec_v=1; + _y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*Chroma filter required: read into the aux buf first. + We need to make two filter passes, so we need some extra space in the + aux buffer. + The extra plane also gets read into the aux buf. + It will be discarded.*/ + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h; + _y4m->convert=y4m_convert_444_420jpeg; + } + else if(strcmp(_y4m->chroma_type,"mono")==0){ + _y4m->src_c_dec_h=_y4m->src_c_dec_v=0; + _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2; + _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h; + /*No extra space required, but we need to clear the chroma planes.*/ + _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0; + _y4m->convert=y4m_convert_mono_420jpeg; + } + else{ + fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type); + return -1; + } + /*The size of the final frame buffers is always computed from the + destination chroma decimation type.*/ + _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h + +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)* + ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v); + _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz); + _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz); + return 0; +} + +void y4m_input_close(y4m_input *_y4m){ + free(_y4m->dst_buf); + free(_y4m->aux_buf); +} + +int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){ + char frame[6]; + int pic_sz; + int frame_c_w; + int frame_c_h; + int c_w; + int c_h; + int c_sz; + int ret; + /*Read and skip the frame header.*/ + ret=fread(frame,1,6,_fin); + if(ret<6)return 0; + if(memcmp(frame,"FRAME",5)){ + fprintf(stderr,"Loss of framing in Y4M input data\n"); + return -1; + } + if(frame[5]!='\n'){ + char c; + int j; + for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++); + if(j==79){ + fprintf(stderr,"Error parsing Y4M frame header\n"); + return -1; + } + } + /*Read the frame data that needs no conversion.*/ + if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){ + fprintf(stderr,"Error reading Y4M frame data.\n"); + return -1; + } + /*Read the frame data that does need conversion.*/ + if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){ + fprintf(stderr,"Error reading Y4M frame data.\n"); + return -1; + } + /*Now convert the just read frame.*/ + (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf); + /*Fill in the frame buffer pointers. + We don't use vpx_img_wrap() because it forces padding for odd picture + sizes, which would require a separate fread call for every row.*/ + memset(_img,0,sizeof(*_img)); + /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/ + _img->fmt=IMG_FMT_I420; + _img->w=_img->d_w=_y4m->pic_w; + _img->h=_img->d_h=_y4m->pic_h; + /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/ + _img->x_chroma_shift=1; + _img->y_chroma_shift=1; + _img->bps=12; + /*Set up the buffer pointers.*/ + pic_sz=_y4m->pic_w*_y4m->pic_h; + c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h; + c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v; + c_sz=c_w*c_h; + _img->stride[PLANE_Y]=_y4m->pic_w; + _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w; + _img->planes[PLANE_Y]=_y4m->dst_buf; + _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz; + _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz; + return 0; +} diff --git a/y4minput.h b/y4minput.h new file mode 100644 index 000000000..153487504 --- /dev/null +++ b/y4minput.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2010 The VP8 project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license and patent + * grant that can be found in the LICENSE file in the root of the source + * tree. All contributing project authors may be found in the AUTHORS + * file in the root of the source tree. + * + * Based on code from the OggTheora software codec source code, + * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. + */ +#if !defined(_y4minput_H) +# define _y4minput_H (1) +# include +# include "vpx/vpx_image.h" + + + +typedef struct y4m_input y4m_input; + + + +/*The function used to perform chroma conversion.*/ +typedef void (*y4m_convert_func)(y4m_input *_y4m, + unsigned char *_dst,unsigned char *_src); + + + +struct y4m_input{ + int pic_w; + int pic_h; + int fps_n; + int fps_d; + int par_n; + int par_d; + char interlace; + int src_c_dec_h; + int src_c_dec_v; + int dst_c_dec_h; + int dst_c_dec_v; + char chroma_type[16]; + /*The size of each converted frame buffer.*/ + size_t dst_buf_sz; + /*The amount to read directly into the converted frame buffer.*/ + size_t dst_buf_read_sz; + /*The size of the auxilliary buffer.*/ + size_t aux_buf_sz; + /*The amount to read into the auxilliary buffer.*/ + size_t aux_buf_read_sz; + y4m_convert_func convert; + unsigned char *dst_buf; + unsigned char *aux_buf; +}; + +int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip); +void y4m_input_close(y4m_input *_y4m); +int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *img); + +#endif