Click here to Skip to main content
65,938 articles
CodeProject is changing. Read more.
Articles
(untagged)

BUG in _com_util::ConvertStringToBSTR and _com_util::ConvertBSTRToString

0.00/5 (No votes)
16 Jul 2002 1  
How MS implements them and why they do not want you to see the source code of those functions.

Introduction

Did you ever wonder why you get stack overflow exception when using _bstr_t and _variant_t COM utility classes? If you never had this problem, try this code:

#include "stdafx.h"

#include <stdio.h>

#include <eh.h>

#include <comdef.h>


#define BUG_ARR_MAX 2000000


class Exception
{
private:
    unsigned int m_n;
public:
    Exception( unsigned int n ) : m_n( n ) {}
    ~Exception() {}
    unsigned int GetNumber() { return m_n; }
};

void trans_func( unsigned int, EXCEPTION_POINTERS* pe)
{
    throw Exception(pe->ExceptionRecord->ExceptionCode);
};



int main(int argc, char* argv[])
{
    char *szBuf = new char;

    memset(szBuf, 'X', BUG_ARR_MAX);
    szBuf1> = '\0';

    //set translation function to 

    //translate structured exceptions 

    _set_se_translator( trans_func );

    try
    {

        _bstr_t bstrString(szBuf);

    }catch(Exception e)
    {
        if(EXCEPTION_STACK_OVERFLOW == e.GetNumber())
            printf("Stack overflow exception!\n");
        else
            printf("Exception: %d\n",e.GetNumber());
    }

    _bstr_t bstrString("Some string");
    char *szResult = (char*)bstrString;

    return 0;
}

What I do here is, setting exception handler to handle structured exception. Why there is stack overflow exception raised when constructor is called for bstrString? Let's go to _bstr_t implementation and we will see that MS uses _com_util::ConvertStringToBSTR(s) function in the constructor and that function raises the exception.

inline _bstr_t::Data_t::Data_t(const char* s) throw(_com_error)
: m_str(NULL), m_RefCount(1)
{
    m_wstr = _com_util::ConvertStringToBSTR(s);

    if (m_wstr == NULL && s != NULL) {
        _com_issue_error(E_OUTOFMEMORY);
    }
}

In order to investigate the issue here, let's go to assembler code of that function:

_com_util::ConvertStringToBSTR:
0040F9D8   push        ebp
0040F9D9   mov         ebp,esp
0040F9DB   cmp         dword ptr [ebp+8],0
0040F9DF   push        esi
0040F9E0   push        edi
0040F9E1   jne         _com_util::ConvertStringToBSTR+0Fh (0040f9e7)
0040F9E3   xor         eax,eax
0040F9E5   jmp         _com_util::ConvertStringToBSTR+6Ch (0040fa44)
0040F9E7   push        dword ptr [pSrc]
0040F9EA   call        dword ptr [__imp__lstrlenA@4 (0042e28c)]
0040F9F0   mov         esi,eax
0040F9F2   inc         esi
0040F9F3   lea         eax,[esi+esi]
0040F9F6   add         eax,3
0040F9F9   and         al,0FCh
// this is the call to _alloca_probe function.
// What is the reason to use stack??
// try to get the memory from
// the stack! not SysAllocString()!
// but at the end they call SysAllocString()
// to reallocate the memory!!!

//exception is here, which is never caught
0040F9FB   call        $$$00001 (0040b980)
0040FA00   mov         edi,esp
0040FA02   push        esi
0040FA03   push        edi
0040FA04   push        0FFh
0040FA06   push        dword ptr [pSrc]
0040FA09   and         word ptr [edi],0
0040FA0D   push        0
0040FA0F   push        0
0040FA11   call        dword ptr [__imp__MultiByteToWideChar@24 (0042e24c)]
0040FA17   test        eax,eax
0040FA19   jne         _com_util::ConvertStringToBSTR+65h (0040fa3d)
0040FA1B   mov         esi,dword ptr [__imp__GetLastError@0 (0042e270)]
0040FA21   call        esi
0040FA23   test        eax,eax
0040FA25   je          _com_util::ConvertStringToBSTR+5Dh (0040fa35)
0040FA27   call        esi
0040FA29   and         eax,0FFFFh
0040FA2E   or          eax,80070000h
0040FA33   jmp         _com_util::ConvertStringToBSTR+5Fh (0040fa37)
0040FA35   xor         eax,eax
0040FA37   push        eax
0040FA38   call        _com_issue_error (0040f2e2)
0040FA3D   push        edi
0040FA3E   call        dword ptr [__imp__SysAllocString@4 (0042e320)]
0040FA44   lea         esp,[ebp-8]
0040FA47   pop         edi
0040FA48   pop         esi
0040FA49   pop         ebp
0040FA4A   ret         4

Now we can see what is going on inside that function.

  1. Get length of char string.
  2. Allocate memory on the stack using alloca() function. This is the place where it fails if char string is too long for the stack. And MS coders do not care if there is not enough memory on the stack. And finally why use stack here at all?
  3. String is converted using MultiByteToWideChar()
  4. If conversion failed, raise COM exception.
  5. And now SysAllocString() is called.

I hope MS coders do not use that function for their product development, otherwise we can crash almost every product they have produced according to this _com_util::ConvertStringToBSTR() implementation.

Now let's talk about _com_util::ConvertBSTRToString(). Try to type cast _bstr_t object to char pointer:

    _bstr_t bstrString("Some string");
    char *szResult = (char*)bstrString;

and this function will be called:

inline const char* _bstr_t::Data_t::GetString() 
                      const throw(_com_error)
{
    if (m_str == NULL) {
        m_str = _com_util::ConvertBSTRToString(m_wstr);

        if (m_str == NULL && m_wstr != NULL) {
            _com_issue_error(E_OUTOFMEMORY);
        }
    }

    return m_str;
}

As you can see, in this code, MS coders use _com_util::ConvertBSTRToString() to convert char string to BSTR string. Another "devil". At least that function does not use stack. But internally it allocates two times more memory than we need. Here is the assembler code of that function:

_com_util::ConvertBSTRToString:
0040FA4D   push        ebx
0040FA4E   push        ebp
0040FA4F   mov         ebp,dword ptr [esp+0Ch]
0040FA53   xor         ebx,ebx
0040FA55   cmp         ebp,ebx
0040FA57   jne         _com_util::ConvertBSTRToString+10h (0040fa5d)
0040FA59   xor         eax,eax
0040FA5B   jmp         _com_util::ConvertBSTRToString+6Fh (0040fabc)
0040FA5D   push        esi
0040FA5E   push        edi
0040FA5F   push        ebp
0040FA60   call        wcslen (0040fe80)
// allocate size of the memory as much
// as twice of what we need !!
// What is the reason ??

// wcslen returned number of characters in eax register
0040FA65   lea         edi,[eax+eax+2]
0040FA69   push        edi
0040FA6A   call        operator new (00401fb0)
0040FA6F   mov         esi,eax
0040FA71   pop         ecx
0040FA72   cmp         esi,ebx
0040FA74   pop         ecx
0040FA75   jne         _com_util::ConvertBSTRToString+34h (0040fa81)
0040FA77   push        8007000Eh
0040FA7C   call        _com_issue_error (0040f2e2)
0040FA81   push        ebx
0040FA82   push        ebx
0040FA83   push        edi
0040FA84   push        esi
0040FA85   push        0FFh
0040FA87   push        ebp
0040FA88   push        ebx
0040FA89   push        ebx
0040FA8A   mov         byte ptr [esi],bl
0040FA8C   call        dword ptr [__imp__WideCharToMultiByte@32 (0042e234)]
0040FA92   test        eax,eax
0040FA94   jne         _com_util::ConvertBSTRToString+6Bh (0040fab8)
0040FA96   mov         edi,dword ptr [__imp__GetLastError@0 (0042e270)]
0040FA9C   call        edi
0040FA9E   test        eax,eax
0040FAA0   je          _com_util::ConvertBSTRToString+63h (0040fab0)
0040FAA2   call        edi
0040FAA4   and         eax,0FFFFh
0040FAA9   or          eax,80070000h
0040FAAE   jmp         _com_util::ConvertBSTRToString+65h (0040fab2)
0040FAB0   xor         eax,eax
0040FAB2   push        eax
0040FAB3   call        _com_issue_error (0040f2e2)
0040FAB8   mov         eax,esi
0040FABA   pop         edi
0040FABB   pop         esi
0040FABC   pop         ebp
0040FABD   pop         ebx
0040FABE   ret         4

From this assembler listing, we can see that MS coders do not care about memory. They follow strange rules: "Allocate memory as much as twice of what we need".

Here is my FIX of that problem. I have re-implemented those two functions. Replace all _com_util::ConvertBSTRToString with ConvertBSTRToString and _com_util::ConvertStringToBSTR with ConvertStringToBSTR. And finally you must rebuild your project by "Rebuild All" from build menu.

Source code of new conversion functions:

//implement our own conversion functions


//------------------------//

// Convert char * to BSTR //

//------------------------//

inline BSTR ConvertStringToBSTR(const char* pSrc)
{
    if(!pSrc) return NULL;

    DWORD cwch;

    BSTR wsOut(NULL);

    if(cwch = ::MultiByteToWideChar(CP_ACP, 0, pSrc, 
         -1, NULL, 0))//get size minus NULL terminator

    {
                cwch--;
            wsOut = ::SysAllocStringLen(NULL, cwch);

        if(wsOut)
        {
            if(!::MultiByteToWideChar(CP_ACP, 
                     0, pSrc, -1, wsOut, cwch))
            {
                if(ERROR_INSUFFICIENT_BUFFER == ::GetLastError())
                    return wsOut;
                ::SysFreeString(wsOut);//must clean up

                wsOut = NULL;
            }
        }

    };

    return wsOut;
};

//------------------------//

// Convert BSTR to char * //

//------------------------//

inline char* ConvertBSTRToString(BSTR pSrc)
{
    if(!pSrc) return NULL;

    //convert even embeded NULL

    DWORD cb,cwch = ::SysStringLen(pSrc);

    char *szOut = NULL;

    if(cb = ::WideCharToMultiByte(CP_ACP, 0, 
               pSrc, cwch + 1, NULL, 0, 0, 0))
    {
        szOut = new char[cb];
        if(szOut)
        {
            szOut[cb - 1]  = '\0';

            if(!::WideCharToMultiByte(CP_ACP, 0, 
                pSrc, cwch + 1, szOut, cb, 0, 0))
            {
                delete []szOut;//clean up if failed;

                szOut = NULL;
            }
        }
    }

    return szOut;
};

For your convenience, I reflect all those changes in comutil.h header file which is posted here.

Execution speed test

Here is the result of my test program. This program runs those functions in the loop 100000 times and prints out the result. In order to get the correct results, you have to run each test independently.

#include "stdio.h"

#include "comdef.h"

int main(int argc, char* argv[])
{

    char *szIn = "This is the test string to convert to BSTR";
    BSTR wsOut = ::_com_util_fix::ConvertStringToBSTR(szIn);
    int i,imax = 100000;

    DWORD dwTime;

    switch((int)*argv[1])
    {
        case '0':
            dwTime = ::GetTickCount();
            for(i = 0 ; i < imax; ++i)
            {
                wsOut = _com_util::ConvertStringToBSTR(szIn);
            }
            dwTime = ::GetTickCount() - dwTime;

            printf("%d times of _com_util::ConvertStringToBSTR 
                                  - %d msec\n", imax, dwTime);
            break;
        case '1':
            dwTime = ::GetTickCount();
            for(i = 0 ; i < imax; ++i)
            {
                wsOut = _com_util_fix::ConvertStringToBSTR(szIn);
            }
            dwTime = ::GetTickCount() - dwTime;

            printf("%d times of /*fixed*/  
               ConvertStringToBSTR - %d msec\n", imax, dwTime);
            break;
        case '2':
            dwTime = ::GetTickCount();
            for(i = 0 ; i < imax; ++i)
            {
                szIn = ::_com_util::ConvertBSTRToString(wsOut);
            }
            dwTime = ::GetTickCount() - dwTime;
            printf("%d times of _com_util::ConvertBSTRToString 
                                   - %d msec\n", imax, dwTime);
            break;
        case '3':
            dwTime = ::GetTickCount();
            for(i = 0 ; i < imax; ++i)
            {
                szIn = _com_util_fix::ConvertBSTRToString(wsOut);
            }
            dwTime = ::GetTickCount() - dwTime;
            printf("%d times of /*fixed*/  
               ConvertBSTRToString - %d msec\n", imax, dwTime);
            break;
        default:
        printf("command: temp.exe num\n\n\tnum == 
            0 - tests _com_util::ConvertStringToBSTR\n\
            \r\tnum == 1 - tests /*fixed*/  ConvertStringToBSTR\n\
            \r\tnum == 2 - tests _com_util::ConvertBSTRToString\n\
            \r\tnum == 3 - tests /*fixed*/  
            ConvertBSTRToString\n\n Example: temp.exe 2\n\n");
    }

    return 0;
}

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here