Fri Feb 24 10:33:02 PST 2006
- Previous message: [Slony1-commit] By cbbrowne: New Directory
- Next message: [Slony1-commit] By cbbrowne: Improve description of MERGE SET; it didn't make it clear
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
Log Message: ----------- Add in scanner code for splitting DDL into individual statements. This includes a test bed and a small test suite. Added Files: ----------- slony1-engine/src/parsestatements: Makefile (r1.1) README (r1.1) emptytestresult.expected (r1.1) scanner.c (r1.1) scanner.h (r1.1) test-scanner.c (r1.1) test_sql.expected (r1.1) test_sql.sql (r1.1) -------------- next part -------------- --- /dev/null +++ src/parsestatements/test_sql.expected @@ -0,0 +1,111 @@ +Input: select * from foo; select * from bar; select * from frobozz; +alter table foo add column c integer; +alter table foo alter column c set not null; + +-- Comment line that should hid a whole bunch of quoting... ;; $$ '"''"; "\"\"\"$$ ;"\""" + +-- Here is an old-style pl/pgsql function +create function foo (text) returns integer as ' + declare + rc record; + begin + select * into rc from foo where name = ''Some Favored name''; + return NULL; + end;' language plpgsql; + +select * from foo; select * from bar; select * from frobozz; + +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = 'Some Favored name'; + return NULL; + end; +$$ language plpgsql; + +select * from foo; select * from bar; select * from frobozz; + +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = $23$Some Favored name$23$; + -- Use a secondary bit of quoting to make sure that nesting works... + select $24$ another thing $24$; + return NULL; + end; +$$ language plpgsql; + + +statement 0 +------------------------------------------- +select * from foo; +statement 1 +------------------------------------------- + select * from bar; +statement 2 +------------------------------------------- + select * from frobozz; +statement 3 +------------------------------------------- + +alter table foo add column c integer; +statement 4 +------------------------------------------- + +alter table foo alter column c set not null; +statement 5 +------------------------------------------- + + +-- Comment line that should hid a whole bunch of quoting... ;; $$ '"''"; "\"\"\"$$ ;"\""" + +-- Here is an old-style pl/pgsql function +create function foo (text) returns integer as ' + declare + rc record; + begin + select * into rc from foo where name = ''Some Favored name''; + return NULL; + end;' language plpgsql; +statement 6 +------------------------------------------- + + +select * from foo; +statement 7 +------------------------------------------- + select * from bar; +statement 8 +------------------------------------------- + select * from frobozz; +statement 9 +------------------------------------------- + + +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = 'Some Favored name'; + return NULL; + end; +$$ language plpgsql; +statement 10 +------------------------------------------- + + +select * from foo; +statement 11 +------------------------------------------- + select * from bar; +statement 12 +------------------------------------------- + select * from frobozz; +statement 13 +------------------------------------------- + + +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = $23$Some Favored name$23$; + -- Use a secondary bit of quoting to make sure that nesting works... + select $24$ another thing $24$; + return NULL; + end; +$$ language plpgsql; \ No newline at end of file --- /dev/null +++ src/parsestatements/scanner.c @@ -0,0 +1,164 @@ +/* $Id: scanner.c,v 1.1 2006/02/24 18:33:02 cbbrowne Exp $ */ +#include <stdio.h> +#include "scanner.h" + +int STMTS[MAXSTATEMENTS]; +int scan_for_statements (const char *extended_statement) { + int cpos; + int bquote; + int bpos; + enum quote_states state; + char cchar; + int d1start, d1end, d2start, d2end, d1stemp, j; + int statements; + + /* Initialize */ + cpos = 0; + statements = 0; + bquote = 0; + bpos = 0; /* Location of last backquote */ + state = Q_NORMAL_STATE; + + while (state != Q_DONE) { + cchar = extended_statement[cpos]; + switch (cchar) { + case '\0': + state = Q_DONE; + break; + case '/': + if (state == Q_NORMAL_STATE) { + state = Q_HOPE_TO_CCOMMENT; + break; + } + if (state == Q_HOPE_CEND) { + state = Q_NORMAL_STATE; + break; + } + break; + case '*': + if (state == Q_HOPE_TO_CCOMMENT) { + state = Q_CCOMMENT; + break; + } + break; + case '\\': + if ((state == Q_DOUBLE_QUOTING) || (state == Q_SINGLE_QUOTING)) { + if (bquote == 0) { + bquote = 1; + bpos = cpos; + break; + } + } + + break; + case '$': + if (state == Q_NORMAL_STATE) { + d1start = cpos; + state = Q_DOLLAR_BUILDING; + break; + } + if (state == Q_DOLLAR_BUILDING) { + d1end = cpos; + state = Q_DOLLAR_QUOTING; + break; + } + if (state == Q_DOLLAR_QUOTING) { + d2start = cpos; + state = Q_DOLLAR_UNBUILDING; + break; + } + if (state == Q_DOLLAR_UNBUILDING) { + d2end = cpos; + /* Compare strings - is this the delimiter the imperials are looking for? */ + if ((d1end - d1start) != (d2end - d2start)) { + /* Lengths don't even match - these aren't the droids we're looking for */ + state = Q_DOLLAR_QUOTING; /* Return to dollar quoting mode */ + break; + } + int d1stemp = d1start; + while (d1stemp < d1end) { + if (extended_statement[d1stemp] != extended_statement[d2start]) { + /* mismatch - these aren't the droids... */ + state = Q_DOLLAR_QUOTING; + break; + } + d1stemp++; /* Step forward to the next character */ + d2start++; + } + if ((d1stemp >= d1end) && (state == Q_DOLLAR_UNBUILDING)) { /* No mismatches */ + state = Q_NORMAL_STATE; + break; + } + } + break; + case '"': + if (state == Q_NORMAL_STATE) { + state = Q_DOUBLE_QUOTING; + break; + } + if (state == Q_DOUBLE_QUOTING) { + /* But a backquote hides this! */ + if ((bquote == 1) && (bpos == cpos -1)) { + break; /* Ignore the quote */ + } + state = Q_NORMAL_STATE; + break; + } + break; + case '\'': + if (state == Q_NORMAL_STATE) { + state = Q_SINGLE_QUOTING; + break; + } + if (state == Q_SINGLE_QUOTING) { + /* But a backquote hides this! */ + if ((bquote == 1) && (bpos == cpos -1)) { + break; /* Ignore the quote */ + } + state = Q_NORMAL_STATE; + break; + } + break; + case '-': + if (state == Q_NORMAL_STATE) { + state = Q_HOPE_TO_DASH; + break; + } + if (state == Q_HOPE_TO_DASH) { + state = Q_DASHING_STATE; + break; + } + break; + case '\n': + if (state == Q_DASHING_STATE) { + state = Q_NORMAL_STATE; + } + if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE; + if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING; + break; + case '\r': + if (state == Q_DASHING_STATE) { + state = Q_NORMAL_STATE; + } + if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE; + if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING; + break; + case ' ': + if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE; + if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING; + break; + case ';': + if (state == Q_NORMAL_STATE) { + STMTS[statements++] = ++cpos; + if (statements >= MAXSTATEMENTS) { + return statements; + } + } + break; + default: + break; + } + cpos++; + } + return statements; +} --- /dev/null +++ src/parsestatements/test-scanner.c @@ -0,0 +1,25 @@ +#include <stdio.h> +#include <stdlib.h> +#include "scanner.h" + +char foo[65536]; +extern int STMTS[1024]; +extern int statements; + +int main (int argc, char *const argv[]) { + int nstatements = 0; + fread(foo, sizeof(char), 65536, stdin); + printf("Input: %s\n", foo); + + nstatements = scan_for_statements (foo); + + int i, j, START; + START = 0; + for (i = 0; i < nstatements; i++) { + printf("\nstatement %d\n-------------------------------------------\n", i); + for (j = START; j < STMTS[i]; j++) { + printf("%c", foo[j]); + } + START = STMTS[i]; + } +} --- /dev/null +++ src/parsestatements/emptytestresult.expected @@ -0,0 +1 @@ +Input: --- /dev/null +++ src/parsestatements/test_sql.sql @@ -0,0 +1,37 @@ +select * from foo; select * from bar; select * from frobozz; +alter table foo add column c integer; +alter table foo alter column c set not null; + +-- Comment line that should hide a whole bunch of quoting... ;; $$ +-- '"''"; "\"\"\"$$ ;"\""" + +-- Here is an old-style pl/pgsql function using heavy quoting +create function foo (text) returns integer as ' + declare + rc record; + begin + select * into rc from foo where name = ''Some Favored name''; + return NULL; + end;' language plpgsql; + +select * from foo; select * from bar; select * from frobozz; + +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = 'Some Favored name'; + return NULL; + end; +$$ language plpgsql; + +select * from foo; select * from bar; select * from frobozz; + +-- This isn't actually a particularly well-framed stored function +-- but it abuses $$dollar quoting$$ quite nicely... +create or replace function foo (text) returns integer as $$ + begin + select * into rc from foo where name = $23$Some Favored name$23$; + -- Use a secondary bit of quoting to make sure that nesting works... + select $24$ -- another " " thing ' ' \\\'\$ $24$; + return NULL; + end; +$$ language plpgsql; --- /dev/null +++ src/parsestatements/scanner.h @@ -0,0 +1,17 @@ +/* $Id: scanner.h,v 1.1 2006/02/24 18:33:02 cbbrowne Exp $ */ +#define MAXSTATEMENTS 1000 +enum quote_states { + Q_NORMAL_STATE, + Q_HOPE_TO_DASH, /* If next char is -, then start a -- comment 'til the end of the line */ + Q_DASHING_STATE, /* comment using -- to the end of the line */ + Q_HOPE_TO_CCOMMENT, /* If next char is *, then start a C-style comment */ + Q_CCOMMENT, /* Inside a C-style comment */ + Q_HOPE_CEND, /* expecting the end of a C-style comment */ + Q_DOUBLE_QUOTING, /* inside a "double-quoted" quoting */ + Q_SINGLE_QUOTING, /* inside a 'single-quoted' quoting */ + Q_DOLLAR_QUOTING, /* inside a $doll$ dollar quoted $doll$ section */ + Q_DOLLAR_BUILDING, /* inside the $doll$ of a dollar quoted section */ + Q_DOLLAR_UNBUILDING, /* inside a possible closing $doll$ of a dollar quoted section */ + Q_DONE /* NULL ends it all... */ +}; + --- /dev/null +++ src/parsestatements/Makefile @@ -0,0 +1,17 @@ +PROG=test-scanner +FLEX=flex + +all: $(PROG) test + +scanner.o: scanner.c scanner.h + +$(PROG): $(PROG).o scanner.o + $(CC) $(CFLAGS) -o $(PROG) test-scanner.c scanner.o + +test-scanner.o: test-scanner.c + +test: test-scanner + ./test-scanner < /dev/null > emptytestresult.log + cmp ./emptytestresult.log emptytestresult.expected + ./test-scanner < ./test_sql.sql > test_sql.log + cmp ./test_sql.log ./test_sql.expected \ No newline at end of file --- /dev/null +++ src/parsestatements/README @@ -0,0 +1,10 @@ +scanner.c is a scanner which uses a state machine to walk through a +set of SQL statements and identify where the ends of statements are +(e.g. - relevant ";"'s). + +It needs to handle various forms of quoting, notably with "double +quotes", 'single quotes', and $dol$dollar quoting$dol$. + +This will need to get integrated into both slon and slonik in order +to split DDL requests being submitted via EXECUTE SCRIPT into +individual statements.
- Previous message: [Slony1-commit] By cbbrowne: New Directory
- Next message: [Slony1-commit] By cbbrowne: Improve description of MERGE SET; it didn't make it clear
- Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the Slony1-commit mailing list