From bb92e2c89b4aee9e1d1bb27a4a6da3817c66d005 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Wed, 28 Apr 2010 20:02:28 +0000 Subject: [PATCH] bss_file.c: refine UTF-8 logic on Windows. --- crypto/bio/bss_file.c | 53 ++++++++++++++++++++++++++------------- doc/crypto/BIO_s_file.pod | 4 +++ 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/crypto/bio/bss_file.c b/crypto/bio/bss_file.c index 3f458a0c7..8bfa0bcd9 100644 --- a/crypto/bio/bss_file.c +++ b/crypto/bio/bss_file.c @@ -118,28 +118,45 @@ static BIO_METHOD methods_filep= BIO *BIO_new_file(const char *filename, const char *mode) { - BIO *ret; - FILE *file; + BIO *ret; + FILE *file=NULL; - file=fopen(filename,mode); #if defined(_WIN32) && defined(CP_UTF8) - if (file==NULL && errno==ENOENT) /* see if filename is UTF-8 encoded */ - { - int sz,len_0 = (int)strlen(filename)+1; - if ((sz=MultiByteToWideChar(CP_UTF8,0,filename,len_0, - NULL,0))>0) - { - WCHAR wmode[8]; - WCHAR *wfilename = _alloca(sz*sizeof(WCHAR)); + int sz, len_0 = (int)strlen(filename)+1; - if (MultiByteToWideChar(CP_UTF8,0,filename,len_0, - wfilename,sz) && - MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1, - wmode,sizeof(wmode)/sizeof(wmode[0])) - ) - file = _wfopen(wfilename,wmode); - } + /* + * Basically there are three cases to cover: a) filename is + * pure ASCII string; b) actual UTF-8 encoded string and + * c) locale-ized string, i.e. one containing 8-bit + * characters that are meaningful in current system locale. + * If filename is pure ASCII or real UTF-8 encoded string, + * MultiByteToWideChar succeeds and _wfopen works. If + * filename is locale-ized string, chances are that + * MultiByteToWideChar fails reporting + * ERROR_NO_UNICODE_TRANSLATION, in which case we fall + * back to fopen... + */ + if ((sz=MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS, + filename,len_0,NULL,0))>0) + { + WCHAR wmode[8]; + WCHAR *wfilename = _alloca(sz*sizeof(WCHAR)); + + if (MultiByteToWideChar(CP_UTF8,MB_ERR_INVALID_CHARS, + filename,len_0,wfilename,sz) && + MultiByteToWideChar(CP_UTF8,0,mode,strlen(mode)+1, + wmode,sizeof(wmode)/sizeof(wmode[0])) && + (file=_wfopen(wfilename,wmode))==NULL && errno==ENOENT + ) /* UTF-8 decode succeeded, but no file, filename + * could still have been locale-ized... */ + file = fopen(filename,mode); } + else if (GetLastError()==ERROR_NO_UNICODE_TRANSLATION) + { + file = fopen(filename,mode); + } +#else + file=fopen(filename,mode); #endif if (file == NULL) { diff --git a/doc/crypto/BIO_s_file.pod b/doc/crypto/BIO_s_file.pod index b2a29263f..188aea347 100644 --- a/doc/crypto/BIO_s_file.pod +++ b/doc/crypto/BIO_s_file.pod @@ -76,6 +76,10 @@ normally be closed so the BIO_NOCLOSE flag should be set. Because the file BIO calls the underlying stdio functions any quirks in stdio behaviour will be mirrored by the corresponding BIO. +On Windows BIO_new_files reserves for the filename argument to be +UTF-8 encoded. In other words if you have to make it work in multi- +lingual environment, encode file names in UTF-8. + =head1 EXAMPLES File BIO "hello world":