bss_file.c: refine UTF-8 logic on Windows.

This commit is contained in:
Andy Polyakov 2010-04-28 20:02:28 +00:00
parent 5e19ee96f6
commit bb92e2c89b
2 changed files with 39 additions and 18 deletions

View File

@ -119,27 +119,44 @@ static BIO_METHOD methods_filep=
BIO *BIO_new_file(const char *filename, const char *mode)
{
BIO *ret;
FILE *file;
FILE *file=NULL;
file=fopen(filename,mode);
#if defined(_WIN32) && defined(CP_UTF8)
if (file==NULL && errno==ENOENT) /* see if filename is UTF-8 encoded */
{
int sz,len_0 = (int)strlen(filename)+1;
if ((sz=MultiByteToWideChar(CP_UTF8,0,filename,len_0,
NULL,0))>0)
int sz, len_0 = (int)strlen(filename)+1;
/*
* Basically there are three cases to cover: a) filename is
* pure ASCII string; b) actual UTF-8 encoded string and
* c) locale-ized string, i.e. one containing 8-bit
* characters that are meaningful in current system locale.
* If filename is pure ASCII or real UTF-8 encoded string,
* MultiByteToWideChar succeeds and _wfopen works. If
* filename is locale-ized string, chances are that
* MultiByteToWideChar fails reporting
* ERROR_NO_UNICODE_TRANSLATION, in which case we fall
* back to fopen...
*/
if ((sz=MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
filename,len_0,NULL,0))>0)
{
WCHAR wmode[8];
WCHAR *wfilename = _alloca(sz*sizeof(WCHAR));
if (MultiByteToWideChar(CP_UTF8,0,filename,len_0,
wfilename,sz) &&
if (MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS,
filename,len_0,wfilename,sz) &&
MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1,
wmode,sizeof(wmode)/sizeof(wmode[0]))
)
file = _wfopen(wfilename,wmode);
wmode,sizeof(wmode)/sizeof(wmode[0])) &&
(file=_wfopen(wfilename,wmode))==NULL && errno==ENOENT
) /* UTF-8 decode succeeded, but no file, filename
* could still have been locale-ized... */
file = fopen(filename,mode);
}
else if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION)
{
file = fopen(filename,mode);
}
#else
file=fopen(filename,mode);
#endif
if (file == NULL)
{

View File

@ -76,6 +76,10 @@ normally be closed so the BIO_NOCLOSE flag should be set.
Because the file BIO calls the underlying stdio functions any quirks
in stdio behaviour will be mirrored by the corresponding BIO.
On Windows BIO_new_files reserves for the filename argument to be
UTF-8 encoded. In other words if you have to make it work in multi-
lingual environment, encode file names in UTF-8.
=head1 EXAMPLES
File BIO "hello world":