HOME

Stream

UTF8 Stream object. It uses memory to keep the text.

Can load files or use strings.

Can track the position in memory. (line col bytepos)

Sample

#include  "Stream.h"
#include <assert.h>
#include <fcntl.h>
#include <ctype.h>
#include <stddef.h>
#include <stdio.h>

#if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__)
#include <io.h>
#include <windows.h>
#endif

void PrintLine(const char* source, int pos, int col)
{
    const char* pHead = source + pos;
    while (pHead > source)
    {
        if (*pHead == '\n')
        {
            pHead++;
            break;
        }
        pHead--;
    }

    const char* pTail = source + pos;
    while (*pTail)
    {
        if (*pTail == '\n')
            break;
        pTail++;
    }

#if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__)
       UINT oldcp = GetConsoleOutputCP();
        SetConsoleOutputCP(CP_UTF8);
#endif
        printf("%.*s\n", (int) (pTail - pHead), pHead);
        for (int i = 1; i < pTail - pHead; i++)
        {
            if (i < col)
                printf(" ");            
            else
            {
                printf("^");
                break;
            }
        }
        
#if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__)
        SetConsoleOutputCP(oldcp);
#endif
    //}
}


void Test3()
{
    struct Stream stream = STREAM_INIT;

    assert(stream.CurrentCol == 0);
    assert(stream.CurrentLine == 0);
    assert(stream.CurrentChar == 0);
    assert(stream.NextBytePos == 0);
    assert(stream.data == NULL);

    //Stream_Set(&stream, u8"maçã");
    if (Stream_Set(&stream, u8"ábç\n dêf\n"))
    {
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 0);
        assert(stream.CurrentChar == 0);
        assert(stream.NextBytePos == 0);
        assert(stream.data != NULL);


        Stream_Match(&stream);

        assert(stream.CurrentCol == 1);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'á');
        assert(stream.NextBytePos == 2);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 2);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'b');
        assert(stream.NextBytePos == 3);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 3);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'ç');
        assert(stream.NextBytePos == 5);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 2);
        assert(stream.CurrentChar == L'\n');
        assert(stream.NextBytePos == 6);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 1);
        assert(stream.CurrentLine == 2);
        assert(stream.CurrentChar == L' ');
        assert(stream.NextBytePos == 7);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 2);
        assert(stream.CurrentLine == 2);
        assert(stream.CurrentChar == L'd');
        assert(stream.NextBytePos == 8);

        PrintLine(stream.data, stream.CurrentBytePos, stream.CurrentCol);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 3);
        assert(stream.CurrentLine == 2);
        assert(stream.CurrentChar == L'ê');
        assert(stream.NextBytePos == 10);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 4);
        assert(stream.CurrentLine == 2);
        assert(stream.CurrentChar == L'f');
        assert(stream.NextBytePos == 11);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 3);
        assert(stream.CurrentChar == L'\n');
        assert(stream.NextBytePos == 12);

    }

    Stream_Close(&stream);
}



void Test1()
{
    struct Stream stream = STREAM_INIT;

    //Stream_Set(&stream, u8"maçã");
    if (Stream_Open(&stream, "utf8maca.txt"))
    {
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 0);
        assert(stream.CurrentChar == 0);
        assert(stream.NextBytePos == 0);
        assert(stream.data != NULL);


        Stream_Match(&stream);

        assert(stream.CurrentCol == 1);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == 'm');
        assert(stream.CurrentBytePos == 0);
        assert(stream.NextBytePos == 1);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 2);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == 'a');        
        assert(stream.CurrentBytePos == 1);
        assert(stream.NextBytePos == 2);

        assert(Stream_LookAhead(&stream) == L'ç');

        Stream_Match(&stream);
        assert(stream.CurrentCol == 3);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'ç');
        assert(stream.CurrentBytePos == 2);
        assert(stream.NextBytePos == 4);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 4);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'ã');

        Stream_Match(&stream);
        assert(stream.CurrentCol == 4);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == (wchar_t)EOF);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 4);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == (wchar_t)EOF);
    }

    Stream_Close(&stream);
}


void Test1L()
{
    struct Stream stream = STREAM_INIT;


    if (Stream_Set(&stream, u8"maçã"))
    {
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 0);
        assert(stream.CurrentChar == 0);
        assert(stream.NextBytePos == 0);
        assert(stream.data != NULL);


        Stream_Match(&stream);

        assert(stream.CurrentCol == 1);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == 'm');
        assert(stream.CurrentBytePos == 0);
        assert(stream.NextBytePos == 1);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 2);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == 'a');
        assert(stream.CurrentBytePos == 1);
        assert(stream.NextBytePos == 2);

        assert(Stream_LookAhead(&stream) == L'ç');


        wchar_t ch2;
        wchar_t ch1=  Stream_LookAhead2(&stream, &ch2);
        assert(ch1 == L'ç' && ch2 == L'ã');

        Stream_Match(&stream);
        assert(stream.CurrentCol == 3);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'ç');

    
        ch1 = Stream_LookAhead2(&stream, &ch2);
        assert(ch1 == L'ã' && ch2 == WEOF);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 4);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == L'ã');
        assert(stream.CurrentBytePos== 4);
        assert(stream.NextBytePos == 6);


        ch1 = Stream_LookAhead2(&stream, &ch2);
        assert(ch1 == WEOF && ch2 == WEOF);

        Stream_Match(&stream);
        assert(stream.CurrentCol == 5);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == (wchar_t)EOF);
        assert(stream.CurrentBytePos == 6);
        assert(stream.NextBytePos == 6);


        Stream_Match(&stream);
        assert(stream.CurrentCol == 5);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == (wchar_t)EOF);
        assert(stream.CurrentBytePos == 6);
        assert(stream.NextBytePos == 6);
    }

    Stream_Close(&stream);
}

void Test2()
{
    struct Stream stream = STREAM_INIT;
    
    if (Stream_Open(&stream, "utf8maca.txt"))
    {
        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 0);
        assert(stream.CurrentChar == 0);
        assert(stream.NextBytePos== 0);

        Stream_Match(&stream);

        assert(stream.CurrentCol == 0);
        assert(stream.CurrentLine == 1);
        assert(stream.CurrentChar == EOF);
        assert(stream.NextBytePos == 0);
    }

    Stream_Close(&stream);
}

void EmptyTest()
{
    struct Stream stream = STREAM_INIT;

    assert(stream.CurrentCol == 0);
    assert(stream.CurrentLine == 0);
    assert(stream.CurrentChar == 0);
    assert(stream.CurrentBytePos== 0);
    assert(stream.NextBytePos == 0);
    assert(stream.data == 0);

    Stream_Match(&stream);

    assert(stream.CurrentCol == 0);
    assert(stream.CurrentLine == 0);
    assert(stream.CurrentChar == 0);
    assert(stream.CurrentBytePos == 0);
    assert(stream.NextBytePos == 0);
    assert(stream.data == 0);


    
    assert(Stream_LookAhead(&stream) == (wchar_t)EOF);

    Stream_Close(&stream);

    assert(stream.CurrentCol == 0);
    assert(stream.CurrentLine == 0);
    assert(stream.CurrentChar == 0);
    assert(stream.CurrentBytePos == 0);
    assert(stream.NextBytePos == 0);
    assert(stream.data == 0);
}



int main()
{
    //_setmode(_fileno(stdout), _O_U16TEXT);
    
    EmptyTest();
    Test1();
    Test1L();
    Test2();
    Test3();


   //wprintf(L"maçã");
}
#include <stdbool.h>
#include <stddef.h>

struct Stream
{
    const char* data; //utf8 encoded

    wchar_t CurrentChar;
    int CurrentLine;
    int CurrentCol;
    int CurrentBytePos;
    int NextBytePos;
};


#define STREAM_INIT {0}

/*
Description:
    The Stream_Match function updates the values of CurrentLine
    CurrentCol, CurrentBytePos and NextBytePos to represent the 
    next character.
    For convenience the updated CurrentChar is returned.
*/
wchar_t Stream_Match(struct Stream* stream);

/*
Description:
    The Stream_Close reset the  Stream object for this initial state.
*/
void Stream_Close(struct Stream* stream);

void Stream_Attach(struct Stream* stream, const char* text);
bool Stream_Set(struct Stream* stream, const char* text);
bool Stream_Open(struct Stream* stream, const char* path);
wchar_t Stream_LookAhead(const struct Stream* stream);
wchar_t Stream_LookAhead2(const struct Stream* stream, wchar_t* ch2);

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <assert.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>

#include "Stream.h"


#if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__)
#define stat _stat
#define strdup _strdup
#endif


void Stream_Attach(struct Stream* stream, const char* text)
{
    Stream_Close(stream);
    stream->data = text;        
}

bool Stream_Set(struct Stream* stream, const char* text)
{
    char* data = strdup(text);
    if (data)
    {
        Stream_Attach(stream, data);
    }
    return data != NULL;
}

bool Stream_Open(struct Stream* stream, const char* path)
{
    bool result = false;
    struct stat info;
    int r = stat(
        path,
        &info);
    if (r == 0)
    {
        char* data = (char*)malloc(sizeof(char) * info.st_size + 1);
        if (data != NULL)
        {
            FILE* file = fopen(path, "r");
            if (file != NULL)
            {
                //SKIP BOM
                if (info.st_size >= 3)
                {
                    fread(data, 1, 3, file);
                    if (data[0] == (char)0xEF &&
                        data[1] == (char)0xBB &&
                        data[2] == (char)0xBF)
                    {
                        size_t n = fread(data, 1, info.st_size - 3, file);
                        data[n] = 0;
                    }
                    else
                    {
                        size_t n = fread(data + 3, 1, info.st_size - 3, file);
                        data[3 + n] = 0;
                    }
                }
                else
                {
                    size_t n = fread(data, 1, info.st_size, file);
                    data[n] = 0;
                }

                fclose(file);
                result = true;
                Stream_Attach(stream, data);
            }
        }
    }
    return result;
}

static wchar_t ReadNextChar(const char* data, int currentPos, int* bytes)
{
    //https://www.ietf.org/rfc/rfc3629.txt
    //https://www.fileformat.info/info/unicode/utf8.htm

    int pos = currentPos;

    unsigned u = EOF;
    
    if (data != NULL)
    {
        int c = data[pos];

        if (c == '\0' /*EOF*/)
        {
            u = EOF;
        }
        else if ((c & 0x80) == 0)
        {
            pos++;
            u = c;
        }
        else if ((c & 0xC0) == 0x80)
        {
            u = EOF;
        }
        else
        {
            pos++;
            u = (c & 0xE0) == 0xC0 ? (c & 0x1F)
                : (c & 0xF0) == 0xE0 ? (c & 0x0F)
                : (c & 0xF8) == 0xF0 ? (c & 0x07)
                : 0;

            if (u == 0)
            {
                u = EOF;
            }
            else
            {
                for (;;)
                {
                    c = data[pos];
                    pos++;

                    if (c == EOF)
                    {
                        break;
                    }
                    else if ((c & 0xC0) == 0x80)
                    {
                        u = (u << 6) | (c & 0x3F);
                    }
                    else
                    {
                        pos--;
                        break;
                    }
                }
            }
        }
    }

    *bytes = pos - currentPos;
    return u;
}

wchar_t Stream_LookAhead(const struct Stream* stream)
{
    int bytes = 0;
    wchar_t ch =
        ReadNextChar(stream->data, stream->NextBytePos, &bytes);

    return ch;
}

wchar_t Stream_LookAhead2(const struct Stream* stream, wchar_t* ch2)
{
    *ch2 = WEOF;

    int bytes = 0;
    wchar_t ch =
        ReadNextChar(stream->data, stream->NextBytePos, &bytes);

    if (bytes > 0)
    {
        *ch2 = ReadNextChar(stream->data, stream->NextBytePos + bytes, &bytes);
    }

    return ch;
}

wchar_t Stream_Match(struct Stream* stream)
{
    //if (stream->CurrentChar)

    int bytes = 0;
    wchar_t ch =
        ReadNextChar(stream->data, stream->NextBytePos, &bytes);


    if (bytes > 0)
    {
        if (stream->CurrentLine == 0)
        {
            stream->CurrentLine = 1;
        }

        stream->CurrentBytePos = stream->NextBytePos;
        stream->NextBytePos += bytes;
        stream->CurrentCol++;
        
        if (ch == '\n') //fopen on windows automatically removes \r
        {
            stream->CurrentLine++;
            stream->CurrentCol = 0;
        }
        stream->CurrentChar = ch;
    }
    else if (ch == (wchar_t)EOF)
    {
        if (stream->CurrentBytePos != stream->NextBytePos)
        {
            stream->CurrentBytePos = stream->NextBytePos;
            stream->CurrentCol++;
            stream->CurrentChar = ch;
        }
    }
    
    return ch;
}

void Stream_Close(struct Stream* stream)
{
    free((void*)stream->data);
    stream->CurrentCol = 0;
    stream->CurrentLine = 0;
    stream->NextBytePos = 0;
    stream->CurrentBytePos = 0;
    stream->CurrentChar = 0;
}