/** \file
 * Implementation of the ANTLR3 string and string factory classes
 */

// [The "BSD licence"]
// Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
// http://www.temporal-wave.com
// http://www.linkedin.com/in/jimidle
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include    <antlr3string.h>

/* Factory API
 */
static    pANTLR3_STRING    newRaw8     (pANTLR3_STRING_FACTORY factory);
static    pANTLR3_STRING    newRawUTF16 (pANTLR3_STRING_FACTORY factory);
static    pANTLR3_STRING    newSize8    (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newSizeUTF16        (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtr8     (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtrUTF16_8       (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newPtrUTF16_UTF16   (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string, ANTLR3_UINT32 size);
static    pANTLR3_STRING    newStr8     (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    pANTLR3_STRING    newStrUTF16_8       (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    pANTLR3_STRING    newStrUTF16_UTF16   (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 string);
static    void              destroy     (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    pANTLR3_STRING    printable8  (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    pANTLR3_STRING    printableUTF16      (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string);
static    void              closeFactory(pANTLR3_STRING_FACTORY factory);

/* String API
 */
static    pANTLR3_UINT8     set8        (pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8     setUTF16_8  (pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8     setUTF16_UTF16      (pANTLR3_STRING string, const char * chars);
static    pANTLR3_UINT8     append8     (pANTLR3_STRING string, const char * newbit);
static    pANTLR3_UINT8     appendUTF16_8       (pANTLR3_STRING string, const char * newbit);
static    pANTLR3_UINT8     appendUTF16_UTF16   (pANTLR3_STRING string, const char * newbit);
static    pANTLR3_UINT8     insert8     (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static    pANTLR3_UINT8     insertUTF16_8       (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);
static    pANTLR3_UINT8     insertUTF16_UTF16   (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit);

static    pANTLR3_UINT8     setS        (pANTLR3_STRING string, pANTLR3_STRING chars);
static    pANTLR3_UINT8     appendS     (pANTLR3_STRING string, pANTLR3_STRING newbit);
static    pANTLR3_UINT8     insertS     (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit);

static    pANTLR3_UINT8     addc8       (pANTLR3_STRING string, ANTLR3_UINT32 c);
static    pANTLR3_UINT8     addcUTF16   (pANTLR3_STRING string, ANTLR3_UINT32 c);
static    pANTLR3_UINT8     addi8       (pANTLR3_STRING string, ANTLR3_INT32 i);
static    pANTLR3_UINT8     addiUTF16   (pANTLR3_STRING string, ANTLR3_INT32 i);
static    pANTLR3_UINT8     inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);
static    pANTLR3_UINT8     insertiUTF16        (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i);

static    ANTLR3_UINT32     compare8    (pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareUTF16_8      (pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareUTF16_UTF16(pANTLR3_STRING string, const char * compStr);
static    ANTLR3_UINT32     compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr);
static    ANTLR3_UCHAR      charAt8     (pANTLR3_STRING string, ANTLR3_UINT32 offset);
static    ANTLR3_UCHAR      charAtUTF16 (pANTLR3_STRING string, ANTLR3_UINT32 offset);
static    pANTLR3_STRING    subString8  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static    pANTLR3_STRING    subStringUTF16      (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex);
static    ANTLR3_INT32      toInt32_8   (pANTLR3_STRING string);
static    ANTLR3_INT32      toInt32_UTF16  (pANTLR3_STRING string);
static    pANTLR3_STRING    to8_8               (pANTLR3_STRING string);
static    pANTLR3_STRING    to8_UTF16           (pANTLR3_STRING string);
static  pANTLR3_STRING          toUTF8_8        (pANTLR3_STRING string);
static  pANTLR3_STRING          toUTF8_UTF16    (pANTLR3_STRING string);

/* Local helpers
 */
static  void                    stringInit8     (pANTLR3_STRING string);
static  void                    stringInitUTF16 (pANTLR3_STRING string);
static  void    ANTLR3_CDECL    stringFree      (pANTLR3_STRING string);

ANTLR3_API pANTLR3_STRING_FACTORY
antlr3StringFactoryNew(ANTLR3_UINT32 encoding)
{
        pANTLR3_STRING_FACTORY  factory;

        /* Allocate memory
        */
        factory = (pANTLR3_STRING_FACTORY) ANTLR3_CALLOC(1, sizeof(ANTLR3_STRING_FACTORY));

        if      (factory == NULL)
        {
                return  NULL;
        }

        /* Now we make a new list to track the strings.
        */
        factory->strings        = antlr3VectorNew(0);
        factory->index  = 0;

        if      (factory->strings == NULL)
        {
                ANTLR3_FREE(factory);
                return  NULL;
        }

    // Install the API
    //
    // TODO: These encodings need equivalent functions to
    // UTF16 and 8Bit if I am going to support those encodings in the STRING stuff.
        // The STRING stuff was intended as a quick and dirty hack for people that did not
        // want to worry about memory and performance very much, but nobody ever reads the
        // notes or comments or uses the email list search. I want to discourage using these
        // interfaces as it is much more efficient to use the pointers within the tokens
        // directly, so I am not implementing the string stuff for the newer encodings.
    // We install the standard 8 and 16 bit functions for the UTF 8 and 16 but they
        // will not be useful beyond returning the text.
        //
    switch(encoding)
    {
                case    ANTLR3_ENC_UTF32:
                        break;

                case    ANTLR3_ENC_UTF32BE:
                        break;

                case    ANTLR3_ENC_UTF32LE:
                        break;

                case    ANTLR3_ENC_UTF16BE:
                case    ANTLR3_ENC_UTF16LE:
                case    ANTLR3_ENC_UTF16:

                        factory->newRaw     =  newRawUTF16;
                        factory->newSize        =  newSizeUTF16;
                        factory->newPtr     =  newPtrUTF16_UTF16;
                        factory->newPtr8        =  newPtrUTF16_8;
                        factory->newStr     =  newStrUTF16_UTF16;
                        factory->newStr8        =  newStrUTF16_8;
                        factory->printable      =  printableUTF16;
                        factory->destroy        =  destroy;
                        factory->close      =  closeFactory;
                        break;

                case    ANTLR3_ENC_UTF8:
                case    ANTLR3_ENC_EBCDIC:
                case    ANTLR3_ENC_8BIT:
                default:

                        factory->newRaw     =  newRaw8;
                        factory->newSize        =  newSize8;
                        factory->newPtr     =  newPtr8;
                        factory->newPtr8        =  newPtr8;
                        factory->newStr     =  newStr8;
                        factory->newStr8        =  newStr8;
                        factory->printable      =  printable8;
                        factory->destroy        =  destroy;
                        factory->close      =  closeFactory;
                        break;
    }
        return  factory;
}


/**
 *
 * \param factory
 * \return
 */
static    pANTLR3_STRING
newRaw8 (pANTLR3_STRING_FACTORY factory)
{
    pANTLR3_STRING  string;

    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));

    if  (string == NULL)
    {
                return  NULL;
    }

    /* Structure is allocated, now fill in the API etc.
     */
    stringInit8(string);
    string->factory = factory;

    /* Add the string into the allocated list
     */
    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    string->index   = factory->index++;

    return string;
}
/**
 *
 * \param factory
 * \return
 */
static    pANTLR3_STRING
newRawUTF16     (pANTLR3_STRING_FACTORY factory)
{
    pANTLR3_STRING  string;

    string  = (pANTLR3_STRING) ANTLR3_MALLOC(sizeof(ANTLR3_STRING));

    if  (string == NULL)
    {
                return  NULL;
    }

    /* Structure is allocated, now fill in the API etc.
     */
    stringInitUTF16(string);
    string->factory = factory;

    /* Add the string into the allocated list
     */
    factory->strings->set(factory->strings, factory->index, (void *) string, (void (ANTLR3_CDECL *)(void *))(stringFree), ANTLR3_TRUE);
    string->index   = factory->index++;

    return string;
}
static
void    ANTLR3_CDECL stringFree  (pANTLR3_STRING string)
{
    /* First free the string itself if there was anything in it
     */
    if  (string->chars)
    {
        ANTLR3_FREE(string->chars);
    }

    /* Now free the space for this string
     */
    ANTLR3_FREE(string);

    return;
}
/**
 *
 * \param string
 * \return
 */
static  void
stringInit8  (pANTLR3_STRING string)
{
    string->len                 = 0;
    string->size                = 0;
    string->chars               = NULL;
    string->encoding    = ANTLR3_ENC_8BIT ;

    /* API for 8 bit strings*/

    string->set         = set8;
    string->set8        = set8;
    string->append      = append8;
    string->append8     = append8;
    string->insert      = insert8;
    string->insert8     = insert8;
    string->addi        = addi8;
    string->inserti     = inserti8;
    string->addc        = addc8;
    string->charAt      = charAt8;
    string->compare     = compare8;
    string->compare8    = compare8;
    string->subString   = subString8;
    string->toInt32     = toInt32_8;
    string->to8         = to8_8;
    string->toUTF8      = toUTF8_8;
    string->compareS    = compareS;
    string->setS        = setS;
    string->appendS     = appendS;
    string->insertS     = insertS;

}
/**
 *
 * \param string
 * \return
 */
static  void
stringInitUTF16  (pANTLR3_STRING string)
{
    string->len         = 0;
    string->size        = 0;
    string->chars       = NULL;
    string->encoding    = ANTLR3_ENC_8BIT;

    /* API for UTF16 strings */

    string->set         = setUTF16_UTF16;
    string->set8        = setUTF16_8;
    string->append      = appendUTF16_UTF16;
    string->append8     = appendUTF16_8;
    string->insert      = insertUTF16_UTF16;
    string->insert8     = insertUTF16_8;
    string->addi        = addiUTF16;
    string->inserti     = insertiUTF16;
    string->addc        = addcUTF16;
    string->charAt      = charAtUTF16;
    string->compare     = compareUTF16_UTF16;
    string->compare8    = compareUTF16_8;
    string->subString   = subStringUTF16;
    string->toInt32     = toInt32_UTF16;
    string->to8         = to8_UTF16;
    string->toUTF8      = toUTF8_UTF16;

    string->compareS    = compareS;
    string->setS        = setS;
    string->appendS     = appendS;
    string->insertS     = insertS;
}
/**
 *
 * \param string
 * \return
 * TODO: Implement UTF-8
 */
static  void
stringInitUTF8  (pANTLR3_STRING string)
{
    string->len     = 0;
    string->size    = 0;
    string->chars   = NULL;

    /* API */

}

// Convert an 8 bit string into a UTF8 representation, which is in fact just the string itself
// a memcpy as we make no assumptions about the 8 bit encoding.
//
static  pANTLR3_STRING
toUTF8_8        (pANTLR3_STRING string)
{
        return string->factory->newPtr(string->factory, (pANTLR3_UINT8)(string->chars), string->len);
}

// Convert a UTF16 string into a UTF8 representation using the Unicode.org
// supplied C algorithms, which are now contained within the ANTLR3 C runtime
// as permitted by the Unicode license (within the source code antlr3convertutf.c/.h
// UCS2 has the same encoding as UTF16 so we can use UTF16 converter.
//
static  pANTLR3_STRING
toUTF8_UTF16    (pANTLR3_STRING string)
{

    UTF8              * outputEnd;
    UTF16             * inputEnd;
    pANTLR3_STRING      utf8String;

    ConversionResult    cResult;

    // Allocate the output buffer, which needs to accommodate potentially
    // 3X (in bytes) the input size (in chars).
    //
    utf8String  = string->factory->newStr8(string->factory, (pANTLR3_UINT8)"");

    if  (utf8String != NULL)
    {
        // Free existing allocation
        //
        ANTLR3_FREE(utf8String->chars);

        // Reallocate according to maximum expected size
        //
        utf8String->size        = string->len *3;
        utf8String->chars       = (pANTLR3_UINT8)ANTLR3_MALLOC(utf8String->size +1);

        if      (utf8String->chars != NULL)
        {
            inputEnd  = (UTF16 *)       (string->chars);
            outputEnd = (UTF8 *)        (utf8String->chars);

            // Call the Unicode converter
            //
            cResult =  ConvertUTF16toUTF8
                (
                (const UTF16**)&inputEnd,
                ((const UTF16 *)(string->chars)) + string->len,
                &outputEnd,
                outputEnd + utf8String->size - 1,
                lenientConversion
                );

            // We don't really care if things failed or not here, we just converted
            // everything that was vaguely possible and stopped when it wasn't. It is
            // up to the grammar programmer to verify that the input is sensible.
            //
            utf8String->len = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)outputEnd) - utf8String->chars);

            *(outputEnd+1) = '\0';              // Always null terminate
        }
    }
    return utf8String;
}

/**
 * Creates a new string with enough capacity for size 8 bit characters plus a terminator.
 *
 * \param[in] factory - Pointer to the string factory that owns strings
 * \param[in] size - In characters
 * \return pointer to the new string.
 */
static    pANTLR3_STRING
newSize8        (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
    pANTLR3_STRING  string;

    string  = factory->newRaw(factory);

    if  (string == NULL)
    {
        return  string;
    }

    /* Always add one more byte for a terminator ;-)
    */
    string->chars       = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT8) * (size+1)));
        if (string->chars != NULL)
    {
                *(string->chars)        = '\0';
                string->size    = size + 1;
        }

    return string;
}
/**
 * Creates a new string with enough capacity for size UTF16 characters plus a terminator.
 *
 * \param[in] factory - Pointer to the string factory that owns strings
 * \param[in] size - In characters (count double for surrogate pairs!!!)
 * \return pointer to the new string.
 */
static    pANTLR3_STRING
newSizeUTF16    (pANTLR3_STRING_FACTORY factory, ANTLR3_UINT32 size)
{
    pANTLR3_STRING  string;

    string  = factory->newRaw(factory);

    if  (string == NULL)
    {
        return  string;
    }

    /* Always add one more byte for a terminator ;-)
    */
    string->chars       = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(sizeof(ANTLR3_UINT16) * (size+1)));
    if (string->chars != NULL)
        {
                *(string->chars)        = '\0';
                string->size    = size+1;       /* Size is always in characters, as is len */
        }

    return string;
}

/** Creates a new 8 bit string initialized with the 8 bit characters at the
 *  supplied ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to 8 bit encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING
newPtr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
        pANTLR3_STRING  string;

        string  = factory->newSize(factory, size);

        if      (string == NULL)
        {
                return  NULL;
        }

        if      (size <= 0)
        {
                return  string;
        }

        if      (ptr != NULL)
        {
                ANTLR3_MEMMOVE(string->chars, (const void *)ptr, size);
                *(string->chars + size) = '\0';     /* Terminate, these strings are usually used for Token streams and printing etc.    */
                string->len = size;
        }

        return  string;
}

/** Creates a new UTF16 string initialized with the 8 bit characters at the
 *  supplied 8 bit character ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to 8 bit encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING
newPtrUTF16_8   (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
        pANTLR3_STRING  string;

        /* newSize accepts size in characters, not bytes
        */
        string  = factory->newSize(factory, size);

        if      (string == NULL)
        {
                return  NULL;
        }

        if      (size <= 0)
        {
                return  string;
        }

        if      (ptr != NULL)
        {
                pANTLR3_UINT16  out;
                ANTLR3_INT32    inSize;

                out = (pANTLR3_UINT16)(string->chars);
                inSize  = size;

                while   (inSize-- > 0)
                {
                        *out++ = (ANTLR3_UINT16)(*ptr++);
                }

                /* Terminate, these strings are usually used for Token streams and printing etc.
                */
                *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';

                string->len = size;
        }

        return  string;
}

/** Creates a new UTF16 string initialized with the UTF16 characters at the
 *  supplied ptr, of pre-determined size.
 * \param[in] factory - Pointer to the string factory that owns the strings
 * \param[in] ptr - Pointer to UTF16 encoded characters
 * \return pointer to the new string
 */
static    pANTLR3_STRING
newPtrUTF16_UTF16       (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr, ANTLR3_UINT32 size)
{
        pANTLR3_STRING  string;

        string  = factory->newSize(factory, size);

        if      (string == NULL)
        {
                return  NULL;
        }

        if      (size <= 0)
        {
                return  string;
        }

        if      (ptr != NULL)
        {
                ANTLR3_MEMMOVE(string->chars, (const void *)ptr, (size * sizeof(ANTLR3_UINT16)));

                /* Terminate, these strings are usually used for Token streams and printing etc.
                */
                *(((pANTLR3_UINT16)(string->chars)) + size) = '\0';
                string->len = size;
        }

        return  string;
}

/** Create a new 8 bit string from the supplied, null terminated, 8 bit string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the 8 bit encoded string
 * \return Pointer to the newly initialized string
 */
static    pANTLR3_STRING
newStr8 (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}

/** Create a new UTF16 string from the supplied, null terminated, 8 bit string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the 8 bit encoded string
 * \return POinter to the newly initialized string
 */
static    pANTLR3_STRING
newStrUTF16_8   (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    return factory->newPtr8(factory, ptr, (ANTLR3_UINT32)strlen((const char *)ptr));
}

/** Create a new UTF16 string from the supplied, null terminated, UTF16 string pointer.
 * \param[in] factory - Pointer to the string factory that owns strings.
 * \param[in] ptr - Pointer to the UTF16 encoded string
 * \return Pointer to the newly initialized string
 */
static    pANTLR3_STRING
newStrUTF16_UTF16       (pANTLR3_STRING_FACTORY factory, pANTLR3_UINT8 ptr)
{
    pANTLR3_UINT16  in;
    ANTLR3_UINT32   count;

    /** First, determine the length of the input string
     */
    in      = (pANTLR3_UINT16)ptr;
    count   = 0;

    while   (*in++ != '\0')
    {
                count++;
    }
    return factory->newPtr(factory, ptr, count);
}

static    void
destroy (pANTLR3_STRING_FACTORY factory, pANTLR3_STRING string)
{
    // Record which string we are deleting
    //
    ANTLR3_UINT32 strIndex = string->index;

    // Ensure that the string was not factory made, or we would try
    // to delete memory that wasn't allocated outside the factory
    // block.
    // Remove the specific indexed string from the vector
    //
    factory->strings->del(factory->strings, strIndex);

    // One less string in the vector, so decrement the factory index
    // so that the next string allocated is indexed correctly with
    // respect to the vector.
    //
    factory->index--;

    // Now we have to reindex the strings in the vector that followed
    // the one we just deleted. We only do this if the one we just deleted
    // was not the last one.
    //
    if  (strIndex< factory->index)
    {
        // We must reindex the strings after the one we just deleted.
        // The one that follows the one we just deleted is also out
        // of whack, so we start there.
        //
        ANTLR3_UINT32 i;

        for (i = strIndex; i < factory->index; i++)
        {
            // Renumber the entry
            //
            ((pANTLR3_STRING)(factory->strings->elements[i].element))->index = i;
        }
    }

    // The string has been destroyed and the elements of the factory are reindexed.
    //

}

static    pANTLR3_STRING
printable8(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
    pANTLR3_STRING  string;

    /* We don't need to be too efficient here, this is mostly for error messages and so on.
     */
    pANTLR3_UINT8   scannedText;
    ANTLR3_UINT32   i;

    /* Assume we need as much as twice as much space to parse out the control characters
     */
    string  = factory->newSize(factory, instr->len *2 + 1);

    /* Scan through and replace unprintable (in terms of this routine)
     * characters
     */
    scannedText = string->chars;

    for (i = 0; i < instr->len; i++)
    {
                if (*(instr->chars + i) == '\n')
                {
                        *scannedText++ = '\\';
                        *scannedText++ = 'n';
                }
                else if (*(instr->chars + i) == '\r')
                {
                        *scannedText++ = '\\';
                        *scannedText++ = 'r';
                }
                else if (!isprint(*(instr->chars +i)))
                {
                        *scannedText++ = '?';
                }
                else
                {
                        *scannedText++ = *(instr->chars + i);
                }
    }
    *scannedText  = '\0';

    string->len = (ANTLR3_UINT32)(scannedText - string->chars);

    return  string;
}

static    pANTLR3_STRING
printableUTF16(pANTLR3_STRING_FACTORY factory, pANTLR3_STRING instr)
{
    pANTLR3_STRING  string;

    /* We don't need to be too efficient here, this is mostly for error messages and so on.
     */
    pANTLR3_UINT16  scannedText;
    pANTLR3_UINT16  inText;
    ANTLR3_UINT32   i;
    ANTLR3_UINT32   outLen;

    /* Assume we need as much as twice as much space to parse out the control characters
     */
    string  = factory->newSize(factory, instr->len *2 + 1);

    /* Scan through and replace unprintable (in terms of this routine)
     * characters
     */
    scannedText = (pANTLR3_UINT16)(string->chars);
    inText      = (pANTLR3_UINT16)(instr->chars);
    outLen      = 0;

    for (i = 0; i < instr->len; i++)
    {
                if (*(inText + i) == '\n')
                {
                        *scannedText++   = '\\';
                        *scannedText++   = 'n';
                        outLen      += 2;
                }
                else if (*(inText + i) == '\r')
                {
                        *scannedText++   = '\\';
                        *scannedText++   = 'r';
                        outLen      += 2;
                }
                else if (!isprint(*(inText +i)))
                {
                        *scannedText++ = '?';
                        outLen++;
                }
                else
                {
                        *scannedText++ = *(inText + i);
                        outLen++;
                }
    }
    *scannedText  = '\0';

    string->len = outLen;

    return  string;
}

/** Fascist Capitalist Pig function created
 *  to oppress the workers comrade.
 */
static    void
closeFactory    (pANTLR3_STRING_FACTORY factory)
{
    /* Delete the vector we were tracking the strings with, this will
     * causes all the allocated strings to be deallocated too
     */
    factory->strings->free(factory->strings);

    /* Delete the space for the factory itself
     */
    ANTLR3_FREE((void *)factory);
}

static    pANTLR3_UINT8
append8 (pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32 len;

    len = (ANTLR3_UINT32)strlen(newbit);

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing
     */
    ANTLR3_MEMMOVE((void *)(string->chars + string->len), newbit, (ANTLR3_UINT32)(len+1));
    string->len += len;

    return string->chars;
}

static    pANTLR3_UINT8
appendUTF16_8   (pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32   len;
    pANTLR3_UINT16  apPoint;
    ANTLR3_UINT32   count;

    len = (ANTLR3_UINT32)strlen(newbit);

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)((sizeof(ANTLR3_UINT16)*(string->len + len + 1))));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    apPoint = ((pANTLR3_UINT16)string->chars) + string->len;
    string->len += len;

    for (count = 0; count < len; count++)
    {
                *apPoint++   = *(newbit + count);
    }
    *apPoint = '\0';

    return string->chars;
}

static    pANTLR3_UINT8
appendUTF16_UTF16       (pANTLR3_STRING string, const char * newbit)
{
    ANTLR3_UINT32 len;
    pANTLR3_UINT16  in;

    /** First, determine the length of the input string
     */
    in      = (pANTLR3_UINT16)newbit;
    len   = 0;

    while   (*in++ != '\0')
    {
                len++;
    }

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)( sizeof(ANTLR3_UINT16) *(string->len + len + 1) ));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + string->len), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len+1)));
    string->len += len;

    return string->chars;
}

static    pANTLR3_UINT8
set8    (pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32       len;

    len = (ANTLR3_UINT32)strlen(chars);
    if  (string->size < len + 1)
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(len + 1));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
     */
    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)(len+1));
    string->len     = len;

    return  string->chars;

}

static    pANTLR3_UINT8
setUTF16_8      (pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32       len;
    ANTLR3_UINT32       count;
    pANTLR3_UINT16      apPoint;

    len = (ANTLR3_UINT32)strlen(chars);
    if  (string->size < len + 1)
        {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = len + 1;
    }
    apPoint = ((pANTLR3_UINT16)string->chars);
    string->len = len;

    for (count = 0; count < string->len; count++)
    {
                *apPoint++   = *(chars + count);
    }
    *apPoint = '\0';

    return  string->chars;
}

static    pANTLR3_UINT8
setUTF16_UTF16    (pANTLR3_STRING string, const char * chars)
{
    ANTLR3_UINT32   len;
    pANTLR3_UINT16  in;

    /** First, determine the length of the input string
     */
    in      = (pANTLR3_UINT16)chars;
    len   = 0;

    while   (*in++ != '\0')
    {
                len++;
    }

    if  (string->size < len + 1)
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len + 1)));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = len + 1;
    }

    /* Note we copy one more byte than the strlen in order to get the trailing '\0'
     */
    ANTLR3_MEMMOVE((void *)(string->chars), chars, (ANTLR3_UINT32)((len+1) * sizeof(ANTLR3_UINT16)));
    string->len     = len;

    return  string->chars;

}

static    pANTLR3_UINT8
addc8   (pANTLR3_STRING string, ANTLR3_UINT32 c)
{
    if  (string->size < string->len + 2)
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + 2));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + 2;
    }
    *(string->chars + string->len)      = (ANTLR3_UINT8)c;
    *(string->chars + string->len + 1)  = '\0';
    string->len++;

    return  string->chars;
}

static    pANTLR3_UINT8
addcUTF16       (pANTLR3_STRING string, ANTLR3_UINT32 c)
{
    pANTLR3_UINT16  ptr;

    if  (string->size < string->len + 2)
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16) * (string->len + 2)));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + 2;
    }
    ptr = (pANTLR3_UINT16)(string->chars);

    *(ptr + string->len)        = (ANTLR3_UINT16)c;
    *(ptr + string->len + 1)    = '\0';
    string->len++;

    return  string->chars;
}

static    pANTLR3_UINT8
addi8   (pANTLR3_STRING string, ANTLR3_INT32 i)
{
    ANTLR3_UINT8            newbit[32];

    sprintf((char *)newbit, "%d", i);

    return  string->append8(string, (const char *)newbit);
}
static    pANTLR3_UINT8
addiUTF16       (pANTLR3_STRING string, ANTLR3_INT32 i)
{
    ANTLR3_UINT8            newbit[32];

    sprintf((char *)newbit, "%d", i);

    return  string->append8(string, (const char *)newbit);
}

static    pANTLR3_UINT8
inserti8    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
    ANTLR3_UINT8            newbit[32];

    sprintf((char *)newbit, "%d", i);
    return  string->insert8(string, point, (const char *)newbit);
}
static    pANTLR3_UINT8
insertiUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 point, ANTLR3_INT32 i)
{
    ANTLR3_UINT8            newbit[32];

    sprintf((char *)newbit, "%d", i);
    return  string->insert8(string, point, (const char *)newbit);
}

static  pANTLR3_UINT8
insert8 (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32       len;

    if  (point >= string->len)
    {
                return  string->append(string, newbit);
    }

    len = (ANTLR3_UINT32)strlen(newbit);

    if  (len == 0)
    {
                return  string->chars;
    }

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(string->len + len + 1));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(string->chars + point + len), (void *)(string->chars + point), (ANTLR3_UINT32)(string->len - point + 1));

    /* Note we copy the exact number of bytes
     */
    ANTLR3_MEMMOVE((void *)(string->chars + point), newbit, (ANTLR3_UINT32)(len));

    string->len += len;

    return  string->chars;
}

static  pANTLR3_UINT8
insertUTF16_8   (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32       len;
    ANTLR3_UINT32       count;
    pANTLR3_UINT16      inPoint;

    if  (point >= string->len)
    {
                return  string->append8(string, newbit);
    }

    len = (ANTLR3_UINT32)strlen(newbit);

    if  (len == 0)
    {
                return  string->chars;
    }

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));

    string->len += len;

    inPoint = ((pANTLR3_UINT16)(string->chars))+point;
    for (count = 0; count<len; count++)
    {
                *(inPoint + count) = (ANTLR3_UINT16)(*(newbit+count));
    }

    return  string->chars;
}

static  pANTLR3_UINT8
insertUTF16_UTF16       (pANTLR3_STRING string, ANTLR3_UINT32 point, const char * newbit)
{
    ANTLR3_UINT32       len;
    pANTLR3_UINT16      in;

    if  (point >= string->len)
    {
                return  string->append(string, newbit);
    }

    /** First, determine the length of the input string
     */
    in      = (pANTLR3_UINT16)newbit;
    len     = 0;

    while   (*in++ != '\0')
    {
                len++;
    }

    if  (len == 0)
    {
                return  string->chars;
    }

    if  (string->size < (string->len + len + 1))
    {
                pANTLR3_UINT8 newAlloc = (pANTLR3_UINT8) ANTLR3_REALLOC((void *)string->chars, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len + len + 1)));
                if (newAlloc == NULL)
                {
                        return NULL;
                }
                string->chars   = newAlloc;
                string->size    = string->len + len + 1;
    }

    /* Move the characters we are inserting before, including the delimiter
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point + len), (void *)(((pANTLR3_UINT16)string->chars) + point), (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(string->len - point + 1)));


    /* Note we copy the exact number of characters
     */
    ANTLR3_MEMMOVE((void *)(((pANTLR3_UINT16)string->chars) + point), newbit, (ANTLR3_UINT32)(sizeof(ANTLR3_UINT16)*(len)));

    string->len += len;

    return  string->chars;
}

static    pANTLR3_UINT8     setS        (pANTLR3_STRING string, pANTLR3_STRING chars)
{
    return  string->set(string, (const char *)(chars->chars));
}

static    pANTLR3_UINT8     appendS     (pANTLR3_STRING string, pANTLR3_STRING newbit)
{
    /* We may be passed an empty string, in which case we just return the current pointer
     */
    if  (newbit == NULL || newbit->len == 0 || newbit->size == 0 || newbit->chars == NULL)
    {
                return  string->chars;
    }
    else
    {
                return  string->append(string, (const char *)(newbit->chars));
    }
}

static    pANTLR3_UINT8     insertS     (pANTLR3_STRING string, ANTLR3_UINT32 point, pANTLR3_STRING newbit)
{
    return  string->insert(string, point, (const char *)(newbit->chars));
}

/* Function that compares the text of a string to the supplied
 * 8 bit character string and returns a result a la strcmp()
 */
static ANTLR3_UINT32
compare8        (pANTLR3_STRING string, const char * compStr)
{
    return  strcmp((const char *)(string->chars), compStr);
}

/* Function that compares the text of a string with the supplied character string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32
compareUTF16_8  (pANTLR3_STRING string, const char * compStr)
{
    pANTLR3_UINT16  ourString;
    ANTLR3_UINT32   charDiff;

    ourString   = (pANTLR3_UINT16)(string->chars);

    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*compStr) != '\0'))
    {
                charDiff = *ourString - *compStr;
                if  (charDiff != 0)
                {
                        return charDiff;
                }
                ourString++;
                compStr++;
    }

    /* At this point, one of the strings was terminated
     */
    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));

}

/* Function that compares the text of a string with the supplied character string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32
compareUTF16_UTF16      (pANTLR3_STRING string, const char * compStr8)
{
    pANTLR3_UINT16  ourString;
    pANTLR3_UINT16  compStr;
    ANTLR3_UINT32   charDiff;

    ourString   = (pANTLR3_UINT16)(string->chars);
    compStr     = (pANTLR3_UINT16)(compStr8);

    while   (((ANTLR3_UCHAR)(*ourString) != '\0') && ((ANTLR3_UCHAR)(*((pANTLR3_UINT16)compStr)) != '\0'))
    {
                charDiff = *ourString - *compStr;
                if  (charDiff != 0)
                {
                        return charDiff;
                }
                ourString++;
                compStr++;
    }

    /* At this point, one of the strings was terminated
     */
    return (ANTLR3_UINT32)((ANTLR3_UCHAR)(*ourString) - (ANTLR3_UCHAR)(*compStr));
}

/* Function that compares the text of a string with the supplied string
 * (which is assumed to be in the same encoding as the string itself) and returns a result
 * a la strcmp()
 */
static ANTLR3_UINT32
compareS    (pANTLR3_STRING string, pANTLR3_STRING compStr)
{
    return  string->compare(string, (const char *)compStr->chars);
}


/* Function that returns the character indexed at the supplied
 * offset as a 32 bit character.
 */
static ANTLR3_UCHAR
charAt8     (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
    if  (offset > string->len)
    {
                return (ANTLR3_UCHAR)'\0';
    }
    else
    {
                return  (ANTLR3_UCHAR)(*(string->chars + offset));
    }
}

/* Function that returns the character indexed at the supplied
 * offset as a 32 bit character.
 */
static ANTLR3_UCHAR
charAtUTF16    (pANTLR3_STRING string, ANTLR3_UINT32 offset)
{
    if  (offset > string->len)
    {
                return (ANTLR3_UCHAR)'\0';
    }
    else
    {
                return  (ANTLR3_UCHAR)(*((pANTLR3_UINT16)(string->chars) + offset));
    }
}

/* Function that returns a substring of the supplied string a la .subString(s,e)
 * in java runtimes.
 */
static pANTLR3_STRING
subString8   (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
    pANTLR3_STRING newStr;

    if  (endIndex > string->len)
    {
                endIndex = string->len + 1;
    }
    newStr  = string->factory->newPtr(string->factory, string->chars + startIndex, endIndex - startIndex);

    return newStr;
}

/* Returns a substring of the supplied string a la .subString(s,e)
 * in java runtimes.
 */
static pANTLR3_STRING
subStringUTF16  (pANTLR3_STRING string, ANTLR3_UINT32 startIndex, ANTLR3_UINT32 endIndex)
{
    pANTLR3_STRING newStr;

    if  (endIndex > string->len)
    {
                endIndex = string->len + 1;
    }
    newStr  = string->factory->newPtr(string->factory, (pANTLR3_UINT8)((pANTLR3_UINT16)(string->chars) + startIndex), endIndex - startIndex);

    return newStr;
}

/* Function that can convert the characters in the string to an integer
 */
static ANTLR3_INT32
toInt32_8           (struct ANTLR3_STRING_struct * string)
{
    return  atoi((const char *)(string->chars));
}

/* Function that can convert the characters in the string to an integer
 */
static ANTLR3_INT32
toInt32_UTF16       (struct ANTLR3_STRING_struct * string)
{
    pANTLR3_UINT16  input;
    ANTLR3_INT32   value;
    ANTLR3_BOOLEAN  negate;

    value   = 0;
    input   = (pANTLR3_UINT16)(string->chars);
    negate  = ANTLR3_FALSE;

    if  (*input == (ANTLR3_UCHAR)'-')
    {
                negate = ANTLR3_TRUE;
                input++;
    }
    else if (*input == (ANTLR3_UCHAR)'+')
    {
                input++;
    }

    while   (*input != '\0' && isdigit(*input))
    {
                value    = value * 10;
                value   += ((ANTLR3_UINT32)(*input) - (ANTLR3_UINT32)'0');
                input++;
    }

    return negate ? -value : value;
}

/* Function that returns a pointer to an 8 bit version of the string,
 * which in this case is just the string as this is
 * 8 bit encodiing anyway.
 */
static    pANTLR3_STRING            to8_8       (pANTLR3_STRING string)
{
    return  string;
}

/* Function that returns an 8 bit version of the string,
 * which in this case is returning all the UTF16 characters
 * narrowed back into 8 bits, with characters that are too large
 * replaced with '_'
 */
static    pANTLR3_STRING    to8_UTF16   (pANTLR3_STRING string)
{
        pANTLR3_STRING  newStr;
        ANTLR3_UINT32   i;

        /* Create a new 8 bit string
        */
        newStr  = newRaw8(string->factory);

        if      (newStr == NULL)
        {
                return  NULL;
        }

        /* Always add one more byte for a terminator
        */
        newStr->chars   = (pANTLR3_UINT8) ANTLR3_MALLOC((size_t)(string->len + 1));
        if (newStr->chars != NULL)
        {
                newStr->size    = string->len + 1;
                newStr->len         = string->len;

                /* Now copy each UTF16 charActer , making it an 8 bit character of
                * some sort.
                */
                for     (i=0; i<string->len; i++)
                {
                        ANTLR3_UCHAR    c;

                        c = *(((pANTLR3_UINT16)(string->chars)) + i);

                        *(newStr->chars + i) = (ANTLR3_UINT8)(c > 255 ? '_' : c);
                }

                /* Terminate
                */
                *(newStr->chars + newStr->len) = '\0';
        }

        return newStr;
}
