Stream
UTF8 Stream object. It uses memory to keep the text.
Can load files or use strings.
Can track the position in memory. (line col bytepos)
Sample
#include "Stream.h" #include <assert.h> #include <fcntl.h> #include <ctype.h> #include <stddef.h> #include <stdio.h> #if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__) #include <io.h> #include <windows.h> #endif void PrintLine(const char* source, int pos, int col) { const char* pHead = source + pos; while (pHead > source) { if (*pHead == '\n') { pHead++; break; } pHead--; } const char* pTail = source + pos; while (*pTail) { if (*pTail == '\n') break; pTail++; } #if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__) UINT oldcp = GetConsoleOutputCP(); SetConsoleOutputCP(CP_UTF8); #endif printf("%.*s\n", (int) (pTail - pHead), pHead); for (int i = 1; i < pTail - pHead; i++) { if (i < col) printf(" "); else { printf("^"); break; } } #if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__) SetConsoleOutputCP(oldcp); #endif //} } void Test3() { struct Stream stream = STREAM_INIT; assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.NextBytePos == 0); assert(stream.data == NULL); //Stream_Set(&stream, u8"maçã"); if (Stream_Set(&stream, u8"ábç\n dêf\n")) { assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.NextBytePos == 0); assert(stream.data != NULL); Stream_Match(&stream); assert(stream.CurrentCol == 1); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'á'); assert(stream.NextBytePos == 2); Stream_Match(&stream); assert(stream.CurrentCol == 2); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'b'); assert(stream.NextBytePos == 3); Stream_Match(&stream); assert(stream.CurrentCol == 3); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'ç'); assert(stream.NextBytePos == 5); Stream_Match(&stream); assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 2); assert(stream.CurrentChar == L'\n'); assert(stream.NextBytePos == 6); Stream_Match(&stream); assert(stream.CurrentCol == 1); assert(stream.CurrentLine == 2); assert(stream.CurrentChar == L' '); assert(stream.NextBytePos == 7); Stream_Match(&stream); assert(stream.CurrentCol == 2); assert(stream.CurrentLine == 2); assert(stream.CurrentChar == L'd'); assert(stream.NextBytePos == 8); PrintLine(stream.data, stream.CurrentBytePos, stream.CurrentCol); Stream_Match(&stream); assert(stream.CurrentCol == 3); assert(stream.CurrentLine == 2); assert(stream.CurrentChar == L'ê'); assert(stream.NextBytePos == 10); Stream_Match(&stream); assert(stream.CurrentCol == 4); assert(stream.CurrentLine == 2); assert(stream.CurrentChar == L'f'); assert(stream.NextBytePos == 11); Stream_Match(&stream); assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 3); assert(stream.CurrentChar == L'\n'); assert(stream.NextBytePos == 12); } Stream_Close(&stream); } void Test1() { struct Stream stream = STREAM_INIT; //Stream_Set(&stream, u8"maçã"); if (Stream_Open(&stream, "utf8maca.txt")) { assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.NextBytePos == 0); assert(stream.data != NULL); Stream_Match(&stream); assert(stream.CurrentCol == 1); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == 'm'); assert(stream.CurrentBytePos == 0); assert(stream.NextBytePos == 1); Stream_Match(&stream); assert(stream.CurrentCol == 2); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == 'a'); assert(stream.CurrentBytePos == 1); assert(stream.NextBytePos == 2); assert(Stream_LookAhead(&stream) == L'ç'); Stream_Match(&stream); assert(stream.CurrentCol == 3); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'ç'); assert(stream.CurrentBytePos == 2); assert(stream.NextBytePos == 4); Stream_Match(&stream); assert(stream.CurrentCol == 4); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'ã'); Stream_Match(&stream); assert(stream.CurrentCol == 4); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == (wchar_t)EOF); Stream_Match(&stream); assert(stream.CurrentCol == 4); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == (wchar_t)EOF); } Stream_Close(&stream); } void Test1L() { struct Stream stream = STREAM_INIT; if (Stream_Set(&stream, u8"maçã")) { assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.NextBytePos == 0); assert(stream.data != NULL); Stream_Match(&stream); assert(stream.CurrentCol == 1); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == 'm'); assert(stream.CurrentBytePos == 0); assert(stream.NextBytePos == 1); Stream_Match(&stream); assert(stream.CurrentCol == 2); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == 'a'); assert(stream.CurrentBytePos == 1); assert(stream.NextBytePos == 2); assert(Stream_LookAhead(&stream) == L'ç'); wchar_t ch2; wchar_t ch1= Stream_LookAhead2(&stream, &ch2); assert(ch1 == L'ç' && ch2 == L'ã'); Stream_Match(&stream); assert(stream.CurrentCol == 3); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'ç'); ch1 = Stream_LookAhead2(&stream, &ch2); assert(ch1 == L'ã' && ch2 == WEOF); Stream_Match(&stream); assert(stream.CurrentCol == 4); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == L'ã'); assert(stream.CurrentBytePos== 4); assert(stream.NextBytePos == 6); ch1 = Stream_LookAhead2(&stream, &ch2); assert(ch1 == WEOF && ch2 == WEOF); Stream_Match(&stream); assert(stream.CurrentCol == 5); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == (wchar_t)EOF); assert(stream.CurrentBytePos == 6); assert(stream.NextBytePos == 6); Stream_Match(&stream); assert(stream.CurrentCol == 5); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == (wchar_t)EOF); assert(stream.CurrentBytePos == 6); assert(stream.NextBytePos == 6); } Stream_Close(&stream); } void Test2() { struct Stream stream = STREAM_INIT; if (Stream_Open(&stream, "utf8maca.txt")) { assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.NextBytePos== 0); Stream_Match(&stream); assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 1); assert(stream.CurrentChar == EOF); assert(stream.NextBytePos == 0); } Stream_Close(&stream); } void EmptyTest() { struct Stream stream = STREAM_INIT; assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.CurrentBytePos== 0); assert(stream.NextBytePos == 0); assert(stream.data == 0); Stream_Match(&stream); assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.CurrentBytePos == 0); assert(stream.NextBytePos == 0); assert(stream.data == 0); assert(Stream_LookAhead(&stream) == (wchar_t)EOF); Stream_Close(&stream); assert(stream.CurrentCol == 0); assert(stream.CurrentLine == 0); assert(stream.CurrentChar == 0); assert(stream.CurrentBytePos == 0); assert(stream.NextBytePos == 0); assert(stream.data == 0); } int main() { //_setmode(_fileno(stdout), _O_U16TEXT); EmptyTest(); Test1(); Test1L(); Test2(); Test3(); //wprintf(L"maçã"); }
#include <stdbool.h> #include <stddef.h> struct Stream { const char* data; //utf8 encoded wchar_t CurrentChar; int CurrentLine; int CurrentCol; int CurrentBytePos; int NextBytePos; }; #define STREAM_INIT {0} /* Description: The Stream_Match function updates the values of CurrentLine CurrentCol, CurrentBytePos and NextBytePos to represent the next character. For convenience the updated CurrentChar is returned. */ wchar_t Stream_Match(struct Stream* stream); /* Description: The Stream_Close reset the Stream object for this initial state. */ void Stream_Close(struct Stream* stream); void Stream_Attach(struct Stream* stream, const char* text); bool Stream_Set(struct Stream* stream, const char* text); bool Stream_Open(struct Stream* stream, const char* path); wchar_t Stream_LookAhead(const struct Stream* stream); wchar_t Stream_LookAhead2(const struct Stream* stream, wchar_t* ch2);
#include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> #include <assert.h> #include <string.h> #include <assert.h> #include <ctype.h> #include "Stream.h" #if defined(_WIN32) || defined(_WIN64) || defined(__WIN32__) || defined(__WINDOWS__) #define stat _stat #define strdup _strdup #endif void Stream_Attach(struct Stream* stream, const char* text) { Stream_Close(stream); stream->data = text; } bool Stream_Set(struct Stream* stream, const char* text) { char* data = strdup(text); if (data) { Stream_Attach(stream, data); } return data != NULL; } bool Stream_Open(struct Stream* stream, const char* path) { bool result = false; struct stat info; int r = stat( path, &info); if (r == 0) { char* data = (char*)malloc(sizeof(char) * info.st_size + 1); if (data != NULL) { FILE* file = fopen(path, "r"); if (file != NULL) { //SKIP BOM if (info.st_size >= 3) { fread(data, 1, 3, file); if (data[0] == (char)0xEF && data[1] == (char)0xBB && data[2] == (char)0xBF) { size_t n = fread(data, 1, info.st_size - 3, file); data[n] = 0; } else { size_t n = fread(data + 3, 1, info.st_size - 3, file); data[3 + n] = 0; } } else { size_t n = fread(data, 1, info.st_size, file); data[n] = 0; } fclose(file); result = true; Stream_Attach(stream, data); } } } return result; } static wchar_t ReadNextChar(const char* data, int currentPos, int* bytes) { //https://www.ietf.org/rfc/rfc3629.txt //https://www.fileformat.info/info/unicode/utf8.htm int pos = currentPos; unsigned u = EOF; if (data != NULL) { int c = data[pos]; if (c == '\0' /*EOF*/) { u = EOF; } else if ((c & 0x80) == 0) { pos++; u = c; } else if ((c & 0xC0) == 0x80) { u = EOF; } else { pos++; u = (c & 0xE0) == 0xC0 ? (c & 0x1F) : (c & 0xF0) == 0xE0 ? (c & 0x0F) : (c & 0xF8) == 0xF0 ? (c & 0x07) : 0; if (u == 0) { u = EOF; } else { for (;;) { c = data[pos]; pos++; if (c == EOF) { break; } else if ((c & 0xC0) == 0x80) { u = (u << 6) | (c & 0x3F); } else { pos--; break; } } } } } *bytes = pos - currentPos; return u; } wchar_t Stream_LookAhead(const struct Stream* stream) { int bytes = 0; wchar_t ch = ReadNextChar(stream->data, stream->NextBytePos, &bytes); return ch; } wchar_t Stream_LookAhead2(const struct Stream* stream, wchar_t* ch2) { *ch2 = WEOF; int bytes = 0; wchar_t ch = ReadNextChar(stream->data, stream->NextBytePos, &bytes); if (bytes > 0) { *ch2 = ReadNextChar(stream->data, stream->NextBytePos + bytes, &bytes); } return ch; } wchar_t Stream_Match(struct Stream* stream) { //if (stream->CurrentChar) int bytes = 0; wchar_t ch = ReadNextChar(stream->data, stream->NextBytePos, &bytes); if (bytes > 0) { if (stream->CurrentLine == 0) { stream->CurrentLine = 1; } stream->CurrentBytePos = stream->NextBytePos; stream->NextBytePos += bytes; stream->CurrentCol++; if (ch == '\n') //fopen on windows automatically removes \r { stream->CurrentLine++; stream->CurrentCol = 0; } stream->CurrentChar = ch; } else if (ch == (wchar_t)EOF) { if (stream->CurrentBytePos != stream->NextBytePos) { stream->CurrentBytePos = stream->NextBytePos; stream->CurrentCol++; stream->CurrentChar = ch; } } return ch; } void Stream_Close(struct Stream* stream) { free((void*)stream->data); stream->CurrentCol = 0; stream->CurrentLine = 0; stream->NextBytePos = 0; stream->CurrentBytePos = 0; stream->CurrentChar = 0; }