bss_file.c: refine UTF-8 logic on Windows.

This commit is contained in:
Andy Polyakov 2010-04-28 20:02:28 +00:00
parent 5e19ee96f6
commit bb92e2c89b
2 changed files with 39 additions and 18 deletions

View File

@ -118,28 +118,45 @@ static BIO_METHOD methods_filep=
BIO *BIO_new_file(const char *filename, const char *mode) BIO *BIO_new_file(const char *filename, const char *mode)
{ {
BIO *ret; BIO *ret;
FILE *file; FILE *file=NULL;
file=fopen(filename,mode);
#if defined(_WIN32) && defined(CP_UTF8) #if defined(_WIN32) && defined(CP_UTF8)
if (file==NULL && errno==ENOENT) /* see if filename is UTF-8 encoded */ int sz, len_0 = (int)strlen(filename)+1;
{
int sz,len_0 = (int)strlen(filename)+1;
if ((sz=MultiByteToWideChar(CP_UTF8,0,filename,len_0,
NULL,0))>0)
{
WCHAR wmode[8];
WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
if (MultiByteToWideChar(CP_UTF8,0,filename,len_0, /*
wfilename,sz) && * Basically there are three cases to cover: a) filename is
MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1, * pure ASCII string; b) actual UTF-8 encoded string and
wmode,sizeof(wmode)/sizeof(wmode[0])) * c) locale-ized string, i.e. one containing 8-bit
) * characters that are meaningful in current system locale.
file = _wfopen(wfilename,wmode); * If filename is pure ASCII or real UTF-8 encoded string,
} * MultiByteToWideChar succeeds and _wfopen works. If
* filename is locale-ized string, chances are that
* MultiByteToWideChar fails reporting
* ERROR_NO_UNICODE_TRANSLATION, in which case we fall
* back to fopen...
*/
if ((sz=MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
filename,len_0,NULL,0))>0)
{
WCHAR wmode[8];
WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
if (MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
filename,len_0,wfilename,sz) &&
MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
wmode,sizeof(wmode)/sizeof(wmode[0])) &&
(file=_wfopen(wfilename,wmode))==NULL && errno==ENOENT
) /* UTF-8 decode succeeded, but no file, filename
* could still have been locale-ized... */
file = fopen(filename,mode);
} }
else if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION)
{
file = fopen(filename,mode);
}
#else
file=fopen(filename,mode);
#endif #endif
if (file == NULL) if (file == NULL)
{ {

View File

@ -76,6 +76,10 @@ normally be closed so the BIO_NOCLOSE flag should be set.
Because the file BIO calls the underlying stdio functions any quirks Because the file BIO calls the underlying stdio functions any quirks
in stdio behaviour will be mirrored by the corresponding BIO. in stdio behaviour will be mirrored by the corresponding BIO.
On Windows BIO_new_files reserves for the filename argument to be
UTF-8 encoded. In other words if you have to make it work in multi-
lingual environment, encode file names in UTF-8.
=head1 EXAMPLES =head1 EXAMPLES
File BIO "hello world": File BIO "hello world":