id3v2: Add support for UTF-16 encoding.
patch by Anton Khirnov, wyskas gmail com Originally committed as revision 20006 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
9aa1bcce51
commit
20c6837880
@ -81,6 +81,7 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
|
|||||||
char *q, dst[512];
|
char *q, dst[512];
|
||||||
int len, dstlen = sizeof(dst) - 1;
|
int len, dstlen = sizeof(dst) - 1;
|
||||||
unsigned genre;
|
unsigned genre;
|
||||||
|
unsigned int (*get)(ByteIOContext*) = get_be16;
|
||||||
|
|
||||||
dst[0] = 0;
|
dst[0] = 0;
|
||||||
if (taglen < 1)
|
if (taglen < 1)
|
||||||
@ -99,11 +100,38 @@ static void read_ttag(AVFormatContext *s, int taglen, const char *key)
|
|||||||
*q = 0;
|
*q = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 1: /* UTF-16 with BOM */
|
||||||
|
taglen -= 2;
|
||||||
|
switch (get_be16(s->pb)) {
|
||||||
|
case 0xfffe:
|
||||||
|
get = get_le16;
|
||||||
|
case 0xfeff:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// fall-through
|
||||||
|
|
||||||
|
case 2: /* UTF-16BE without BOM */
|
||||||
|
q = dst;
|
||||||
|
while (taglen > 1 && q - dst < dstlen - 7) {
|
||||||
|
uint32_t ch;
|
||||||
|
uint8_t tmp;
|
||||||
|
|
||||||
|
GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(s->pb) : 0), break;)
|
||||||
|
PUT_UTF8(ch, tmp, *q++ = tmp;)
|
||||||
|
}
|
||||||
|
*q = 0;
|
||||||
|
break;
|
||||||
|
|
||||||
case 3: /* UTF-8 */
|
case 3: /* UTF-8 */
|
||||||
len = FFMIN(taglen, dstlen - 1);
|
len = FFMIN(taglen, dstlen - 1);
|
||||||
get_buffer(s->pb, dst, len);
|
get_buffer(s->pb, dst, len);
|
||||||
dst[len] = 0;
|
dst[len] = 0;
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s\n.", key);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!strcmp(key, "genre")
|
if (!strcmp(key, "genre")
|
||||||
|
@ -265,6 +265,30 @@ static inline av_const int av_ceil_log2(int x)
|
|||||||
}\
|
}\
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \def GET_UTF16(val, GET_16BIT, ERROR)
|
||||||
|
* Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form
|
||||||
|
* \param val is the output and should be of type uint32_t. It holds the converted
|
||||||
|
* UCS-4 character and should be a left value.
|
||||||
|
* \param GET_16BIT gets two bytes of UTF-16 encoded data converted to native endianness.
|
||||||
|
* It can be a function or a statement whose return value or evaluated value is of type
|
||||||
|
* uint16_t. It will be executed up to 2 times.
|
||||||
|
* \param ERROR action that should be taken when an invalid UTF-16 surrogate is
|
||||||
|
* returned from GET_BYTE. It should be a statement that jumps out of the macro,
|
||||||
|
* like exit(), goto, return, break, or continue.
|
||||||
|
*/
|
||||||
|
#define GET_UTF16(val, GET_16BIT, ERROR)\
|
||||||
|
val = GET_16BIT;\
|
||||||
|
{\
|
||||||
|
unsigned int hi = val - 0xD800;\
|
||||||
|
if (hi < 0x800) {\
|
||||||
|
val = GET_16BIT - 0xDC00;\
|
||||||
|
if (val > 0x3FFU || hi > 0x3FFU)\
|
||||||
|
ERROR\
|
||||||
|
val += (hi<<10) + 0x10000;\
|
||||||
|
}\
|
||||||
|
}\
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* \def PUT_UTF8(val, tmp, PUT_BYTE)
|
* \def PUT_UTF8(val, tmp, PUT_BYTE)
|
||||||
* Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
|
* Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
|
||||||
|
Loading…
x
Reference in New Issue
Block a user