/** * Jingga * * @copyright Jingga * @license OMS License 2.0 * @version 1.0.0 * @link https://jingga.app */ #ifndef COMS_UTILS_REGEX_SIMPLIFIED_H #define COMS_UTILS_REGEX_SIMPLIFIED_H #include "../stdlib/Types.h" #include "StringUtils.h" struct SimplifiedRegexParser { const char *pattern; int32 pos; }; struct MatchResult { bool matched; int32 length; }; static void regex_skip_whitespace(SimplifiedRegexParser *parser) { while (parser->pattern[parser->pos] == ' ') { parser->pos++; } } MatchResult regex_match_char(SimplifiedRegexParser *parser, const char *text) { regex_skip_whitespace(parser); MatchResult result = {false, 0}; if (parser->pattern[parser->pos] == '\0' || *text == '\0') { return result; } if (parser->pattern[parser->pos] == '\\') { // Handle escape sequences parser->pos++; if (parser->pattern[parser->pos] == 'd') { if (str_is_num(*text)) { result.matched = true; result.length = 1; parser->pos++; } } else if (parser->pattern[parser->pos] == *text) { result.matched = true; result.length = 1; parser->pos++; } } else if (parser->pattern[parser->pos] == '.') { // Match any character result.matched = true; result.length = 1; parser->pos++; } else if (parser->pattern[parser->pos] == *text) { // Match literal character result.matched = true; result.length = 1; parser->pos++; } else if (parser->pattern[parser->pos] == 'a' && parser->pattern[parser->pos+1] == '-' && parser->pattern[parser->pos+2] == 'z') { // Match a-z range if (*text >= 'a' && *text <= 'z') { result.matched = true; result.length = 1; parser->pos += 3; } } else if (parser->pattern[parser->pos] == 'A' && parser->pattern[parser->pos+1] == '-' && parser->pattern[parser->pos+2] == 'Z') { // Match A-Z range if (*text >= 'A' && *text <= 'Z') { result.matched = true; result.length = 1; parser->pos += 3; } } else if (parser->pattern[parser->pos] == '0' && parser->pattern[parser->pos+1] == '-' && parser->pattern[parser->pos+2] == '9') { // Match 0-9 range if (str_is_num(*text)) { result.matched = true; result.length = 1; parser->pos += 3; } } return result; } int regex_parse_number(SimplifiedRegexParser *parser) { int32 num = 0; while (str_is_num(parser->pattern[parser->pos])) { num = num * 10 + (parser->pattern[parser->pos] - '0'); parser->pos++; } return num; } MatchResult regex_match_pattern(SimplifiedRegexParser *parser, const char *text); MatchResult regex_match_atom(SimplifiedRegexParser *parser, const char *text) { regex_skip_whitespace(parser); MatchResult result = {false, 0}; if (parser->pattern[parser->pos] == '(') { // Handle group int32 saved_pos = parser->pos; parser->pos++; result = regex_match_pattern(parser, text); if (parser->pattern[parser->pos] == ')') { parser->pos++; } else { // Group not properly closed, backtrack parser->pos = saved_pos; result = regex_match_char(parser, text); } } else { // Handle single character result = regex_match_char(parser, text); } return result; } MatchResult regex_match_repetition(SimplifiedRegexParser *parser, const char *text, MatchResult atom_result) { MatchResult result = {false, 0}; parser->pos++; // Skip '{' regex_skip_whitespace(parser); int32 min = regex_parse_number(parser); regex_skip_whitespace(parser); int32 max = min; if (parser->pattern[parser->pos] == ',') { parser->pos++; regex_skip_whitespace(parser); if (parser->pattern[parser->pos] == '}') { // {x,} means x or more (no max) max = -1; } else { max = regex_parse_number(parser); } } regex_skip_whitespace(parser); if (parser->pattern[parser->pos] != '}') { // Invalid repetition syntax return result; } parser->pos++; // Skip '}' if (min < 0 || (max != -1 && max < min)) { // Invalid range return result; } // Try to match exactly min times first int32 count = 0; int32 total_length = 0; const char *current_text = text; while (true) { if (max != -1 && count >= max) break; MatchResult next_result = regex_match_atom(parser, current_text); if (!next_result.matched) break; count++; total_length += next_result.length; current_text += next_result.length; } if (count >= min && (max == -1 || count <= max)) { result.matched = true; result.length = total_length; } return result; } MatchResult regex_match_element(SimplifiedRegexParser *parser, const char *text) { MatchResult atom_result = regex_match_atom(parser, text); if (!atom_result.matched) { return atom_result; } regex_skip_whitespace(parser); char quantifier = parser->pattern[parser->pos]; if (quantifier == '*') { // Zero or more parser->pos++; int32 consumed = atom_result.length; const char *remaining_text = text + consumed; MatchResult star_result = {true, consumed}; while (true) { MatchResult next_result = regex_match_atom(parser, remaining_text); if (!next_result.matched) break; consumed += next_result.length; remaining_text += next_result.length; star_result.length = consumed; } return star_result; } else if (quantifier == '+') { // One or more parser->pos++; int32 consumed = atom_result.length; const char *remaining_text = text + consumed; MatchResult plus_result = {true, consumed}; while (true) { MatchResult next_result = regex_match_atom(parser, remaining_text); if (!next_result.matched) break; consumed += next_result.length; remaining_text += next_result.length; plus_result.length = consumed; } return plus_result; } else if (quantifier == '?') { // Zero or one parser->pos++; return atom_result; } else if (quantifier == '{') { // Min/max repetition {x,y} return regex_match_repetition(parser, text, atom_result); } else { // No quantifier return atom_result; } } MatchResult regex_match_pattern(SimplifiedRegexParser *parser, const char *text) { MatchResult result = regex_match_element(parser, text); regex_skip_whitespace(parser); if (parser->pattern[parser->pos] == '|') { parser->pos++; MatchResult alternative = regex_match_pattern(parser, text); if (alternative.matched) { return alternative; } } return result; } bool regex_simplified_validate(const char* pattern, const char* text) { SimplifiedRegexParser parser = {pattern, 0}; bool starts_with = false; bool ends_with = false; // Check for ^ and $ anchors if (parser.pattern[parser.pos] == '^') { starts_with = true; parser.pos++; } MatchResult result = regex_match_pattern(&parser, text); if (parser.pattern[parser.pos] == '$') { ends_with = true; parser.pos++; } // Check if we consumed the entire pattern if (parser.pattern[parser.pos] != '\0') { return false; } // Check anchors if (starts_with && ends_with) { return result.matched && (result.length == str_length(text)); } else if (starts_with) { return result.matched && (result.length > 0); } else if (ends_with) { return result.matched && (text[result.length] == '\0'); } else { return result.matched; } } /* // Test function void test_regex(const char *pattern, const char *text, bool expected) { bool result = regex_simplified_validate(pattern, text); printf("Pattern: '%-10s'\tText: '%-6s'\tExpected: %-5s\tActual: %-5s\t%s\n", pattern, text, expected ? "true" : "false", result ? "true" : "false", (result == expected) ? "✓" : "✗"); } int main() { // Test cases printf("Enhanced Regex Validator Tests\n"); printf("=============================\n"); // Basic tests test_regex("abc", "abc", true); test_regex("^abc$", "abc", true); test_regex("^abc$", "abcd", false); // Character classes test_regex("a-z", "a", true); test_regex("a-z", "z", true); test_regex("a-z", "A", false); test_regex("A-Z", "Z", true); test_regex("A-Z", "a", false); test_regex("0-9", "5", true); test_regex("0-9", "a", false); test_regex("\\d", "5", true); test_regex("\\d", "a", false); // Quantifiers test_regex("a*", "", true); test_regex("a*", "a", true); test_regex("a*", "aaa", true); test_regex("a+", "", false); test_regex("a+", "a", true); test_regex("a+", "aaa", true); test_regex("a?b", "b", true); test_regex("a?b", "ab", true); test_regex("a?b", "aab", false); // Groups and alternation test_regex("(a|b)c", "ac", true); test_regex("(a|b)c", "bc", true); test_regex("(a|b)c", "cc", false); test_regex("(a-z)+", "abc", true); test_regex("(A-Z)+", "ABC", true); test_regex("(0-9)+", "123", true); // Escape sequences test_regex("\\.", ".", true); test_regex("\\.", "a", false); test_regex("a\\db", "a0b", true); test_regex("a\\db", "a9b", true); test_regex("a\\db", "aab", false); // Any character test_regex("a.b", "a b", true); test_regex("a.b", "a0b", true); test_regex("a.b", "a\nb", true); test_regex("a.b", "ab", false); // Repetition tests test_regex("a{2}", "aa", true); test_regex("a{2}", "a", false); test_regex("a{2}", "aaa", true); // More than min is allowed test_regex("a{2,4}", "aa", true); test_regex("a{2,4}", "aaa", true); test_regex("a{2,4}", "aaaa", true); test_regex("a{2,4}", "a", false); test_regex("a{2,4}", "aaaaa", false); test_regex("a{2,}", "aa", true); test_regex("a{2,}", "aaaaa", true); test_regex("a{2,}", "a", false); test_regex("(a-z){3}", "abc", true); test_regex("(a-z){3}", "ab", false); test_regex("(a-z){2,4}", "ab", true); test_regex("(a-z){2,4}", "abcd", true); test_regex("(a-z){2,4}", "abcde", false); test_regex("\\d{3}-\\d{2}", "123-45", true); test_regex("\\d{3}-\\d{2}", "12-345", false); // Combined tests test_regex("^a{2}b{1,3}c$", "aabbc", true); test_regex("^a{2}b{1,3}c$", "aabbbc", true); test_regex("^a{2}b{1,3}c$", "aabc", true); test_regex("^a{2}b{1,3}c$", "aabbbbc", false); test_regex("^a{2}b{1,3}c$", "abbc", false); return 0; } */ #endif