CVS User Account cvsuser
Fri Feb 24 10:33:02 PST 2006
Log Message:
-----------
Add in scanner code for splitting DDL into individual statements.

This includes a test bed and a small test suite.

Added Files:
-----------
    slony1-engine/src/parsestatements:
        Makefile (r1.1)
        README (r1.1)
        emptytestresult.expected (r1.1)
        scanner.c (r1.1)
        scanner.h (r1.1)
        test-scanner.c (r1.1)
        test_sql.expected (r1.1)
        test_sql.sql (r1.1)

-------------- next part --------------
--- /dev/null
+++ src/parsestatements/test_sql.expected
@@ -0,0 +1,111 @@
+Input: select * from foo;  select * from bar;  select * from frobozz;
+alter table foo add column c integer;
+alter table foo alter column c set not null; 
+
+-- Comment line that should hid a whole bunch of quoting... ;; $$ '"''"; "\"\"\"$$ ;"\"""
+
+-- Here is an old-style pl/pgsql function
+create function foo (text) returns integer as '
+  declare
+     rc record;
+  begin
+    select * into rc from foo where name = ''Some Favored name'';
+    return NULL;
+  end;' language plpgsql;
+
+select * from foo;  select * from bar;  select * from frobozz;
+
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = 'Some Favored name';
+    return NULL;
+  end;
+$$ language plpgsql;
+
+select * from foo;  select * from bar;  select * from frobozz;
+
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = $23$Some Favored name$23$;
+    -- Use a secondary bit of quoting to make sure that nesting works...
+    select $24$ another thing $24$;
+    return NULL;
+  end;
+$$ language plpgsql;
+
+
+statement 0
+-------------------------------------------
+select * from foo;
+statement 1
+-------------------------------------------
+  select * from bar;
+statement 2
+-------------------------------------------
+  select * from frobozz;
+statement 3
+-------------------------------------------
+
+alter table foo add column c integer;
+statement 4
+-------------------------------------------
+
+alter table foo alter column c set not null;
+statement 5
+-------------------------------------------
+ 
+
+-- Comment line that should hid a whole bunch of quoting... ;; $$ '"''"; "\"\"\"$$ ;"\"""
+
+-- Here is an old-style pl/pgsql function
+create function foo (text) returns integer as '
+  declare
+     rc record;
+  begin
+    select * into rc from foo where name = ''Some Favored name'';
+    return NULL;
+  end;' language plpgsql;
+statement 6
+-------------------------------------------
+
+
+select * from foo;
+statement 7
+-------------------------------------------
+  select * from bar;
+statement 8
+-------------------------------------------
+  select * from frobozz;
+statement 9
+-------------------------------------------
+
+
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = 'Some Favored name';
+    return NULL;
+  end;
+$$ language plpgsql;
+statement 10
+-------------------------------------------
+
+
+select * from foo;
+statement 11
+-------------------------------------------
+  select * from bar;
+statement 12
+-------------------------------------------
+  select * from frobozz;
+statement 13
+-------------------------------------------
+
+
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = $23$Some Favored name$23$;
+    -- Use a secondary bit of quoting to make sure that nesting works...
+    select $24$ another thing $24$;
+    return NULL;
+  end;
+$$ language plpgsql;
\ No newline at end of file
--- /dev/null
+++ src/parsestatements/scanner.c
@@ -0,0 +1,164 @@
+/* $Id: scanner.c,v 1.1 2006/02/24 18:33:02 cbbrowne Exp $ */
+#include <stdio.h>
+#include "scanner.h"
+
+int STMTS[MAXSTATEMENTS];
+int scan_for_statements (const char *extended_statement) {
+  int cpos;
+  int bquote;
+  int bpos;
+  enum quote_states state;
+  char cchar;
+  int d1start, d1end, d2start, d2end, d1stemp, j;
+  int statements;
+  
+  /* Initialize */
+  cpos = 0;
+  statements = 0;
+  bquote = 0;
+  bpos = 0;  /* Location of last backquote */
+  state = Q_NORMAL_STATE;
+  
+  while (state != Q_DONE) {
+    cchar = extended_statement[cpos];
+    switch (cchar) {
+    case '\0':
+      state = Q_DONE;
+      break;
+    case '/':
+      if (state == Q_NORMAL_STATE) {
+	state = Q_HOPE_TO_CCOMMENT;
+	break;
+      }
+      if (state == Q_HOPE_CEND) {
+	state = Q_NORMAL_STATE;
+	break;
+      }
+      break;
+    case '*':
+      if (state == Q_HOPE_TO_CCOMMENT) {
+	state = Q_CCOMMENT;
+	break;
+      }
+      break;
+    case '\\':
+      if ((state == Q_DOUBLE_QUOTING) || (state == Q_SINGLE_QUOTING)) {
+	if (bquote == 0) {
+	  bquote = 1;
+	  bpos = cpos;
+	  break;
+	}
+      } 
+
+      break;
+    case '$':
+      if (state == Q_NORMAL_STATE) {
+	d1start = cpos;
+	state = Q_DOLLAR_BUILDING;
+	break;
+      }
+      if (state == Q_DOLLAR_BUILDING) {
+	d1end = cpos;
+	state = Q_DOLLAR_QUOTING;
+	break;
+      }
+      if (state == Q_DOLLAR_QUOTING) {
+	d2start = cpos;
+	state = Q_DOLLAR_UNBUILDING;
+	break;
+      }
+      if (state == Q_DOLLAR_UNBUILDING) {
+	d2end = cpos;
+	/* Compare strings - is this the delimiter the imperials are looking for? */
+	if ((d1end - d1start) != (d2end - d2start)) {
+	  /* Lengths don't even match - these aren't the droids we're looking for */
+	  state = Q_DOLLAR_QUOTING;   /* Return to dollar quoting mode */
+	  break;
+	}
+	int d1stemp = d1start;
+	while (d1stemp < d1end) {
+	  if (extended_statement[d1stemp] != extended_statement[d2start]) {
+	    /* mismatch - these aren't the droids... */
+	    state = Q_DOLLAR_QUOTING;
+	    break;
+	  }
+	  d1stemp++;   /* Step forward to the next character */
+	  d2start++;
+	}
+	if ((d1stemp >= d1end) && (state == Q_DOLLAR_UNBUILDING)) {  /* No mismatches */
+	  state = Q_NORMAL_STATE;
+	  break;
+	}
+      }
+      break;
+    case '"':
+      if (state == Q_NORMAL_STATE) {
+	state = Q_DOUBLE_QUOTING;
+	break;
+      }
+      if (state == Q_DOUBLE_QUOTING) {
+	/* But a backquote hides this! */
+	if ((bquote == 1) && (bpos == cpos -1)) {
+	  break;   /* Ignore the quote */
+	}
+	state = Q_NORMAL_STATE;
+	break;
+      }
+      break;
+    case '\'':
+      if (state == Q_NORMAL_STATE) {
+	state = Q_SINGLE_QUOTING;
+	break;
+      }
+      if (state == Q_SINGLE_QUOTING) {
+	/* But a backquote hides this! */
+	if ((bquote == 1) && (bpos == cpos -1)) {
+	  break;   /* Ignore the quote */
+	}
+	state = Q_NORMAL_STATE;
+	break;
+      }
+      break;
+    case '-':
+      if (state == Q_NORMAL_STATE) {
+	state = Q_HOPE_TO_DASH;
+	break;
+      }
+      if (state == Q_HOPE_TO_DASH) {
+	state = Q_DASHING_STATE;
+	break;
+      }
+      break;
+    case '\n':
+      if (state == Q_DASHING_STATE) {
+	state = Q_NORMAL_STATE;
+      }
+      if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE;
+      if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING;
+      break;
+    case '\r':
+      if (state == Q_DASHING_STATE) {
+	state = Q_NORMAL_STATE;
+      }
+      if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE;
+      if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING;
+      break;
+    case ' ':
+      if (state == Q_DOLLAR_BUILDING) state = Q_NORMAL_STATE;
+      if (state == Q_DOLLAR_UNBUILDING) state = Q_DOLLAR_QUOTING;
+      break;
+    case ';':
+      if (state == Q_NORMAL_STATE) {
+	STMTS[statements++] = ++cpos;
+	if (statements >= MAXSTATEMENTS) {
+	  return statements;
+	}
+      }
+      break;
+    default:
+      break;
+    }
+    cpos++;
+  }
+  return statements;
+}
--- /dev/null
+++ src/parsestatements/test-scanner.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "scanner.h"
+
+char foo[65536];
+extern int STMTS[1024];
+extern int statements;
+
+int main (int argc, char *const argv[]) {
+  int nstatements = 0;
+  fread(foo, sizeof(char), 65536, stdin);
+  printf("Input: %s\n", foo);
+  
+  nstatements = scan_for_statements (foo);
+  
+  int i, j, START;
+  START = 0;
+  for (i = 0; i < nstatements; i++) {
+    printf("\nstatement %d\n-------------------------------------------\n", i);
+    for (j = START; j < STMTS[i]; j++) {
+      printf("%c", foo[j]);
+    }
+    START = STMTS[i];
+  }
+}
--- /dev/null
+++ src/parsestatements/emptytestresult.expected
@@ -0,0 +1 @@
+Input: 
--- /dev/null
+++ src/parsestatements/test_sql.sql
@@ -0,0 +1,37 @@
+select * from foo;  select * from bar;  select * from frobozz;
+alter table foo add column c integer;
+alter table foo alter column c set not null; 
+
+-- Comment line that should hide a whole bunch of quoting... ;; $$
+-- '"''"; "\"\"\"$$ ;"\"""
+
+-- Here is an old-style pl/pgsql function using heavy quoting
+create function foo (text) returns integer as '
+  declare
+     rc record;
+  begin
+    select * into rc from foo where name = ''Some Favored name'';
+    return NULL;
+  end;' language plpgsql;
+
+select * from foo;  select * from bar;  select * from frobozz;
+
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = 'Some Favored name';
+    return NULL;
+  end;
+$$ language plpgsql;
+
+select * from foo;  select * from bar;  select * from frobozz;
+
+-- This isn't actually a particularly well-framed stored function
+-- but it abuses $$dollar quoting$$ quite nicely...
+create or replace function foo (text) returns integer as $$
+  begin
+    select * into rc from foo where name = $23$Some Favored name$23$;
+    -- Use a secondary bit of quoting to make sure that nesting works...
+    select $24$ -- another " " thing ' ' \\\'\$ $24$;
+    return NULL;
+  end;
+$$ language plpgsql;
--- /dev/null
+++ src/parsestatements/scanner.h
@@ -0,0 +1,17 @@
+/* $Id: scanner.h,v 1.1 2006/02/24 18:33:02 cbbrowne Exp $ */
+#define MAXSTATEMENTS 1000
+enum quote_states {
+	Q_NORMAL_STATE,
+	Q_HOPE_TO_DASH,      /* If next char is -, then start a -- comment 'til the end of the line */
+	Q_DASHING_STATE,     /* comment using -- to the end of the line */
+	Q_HOPE_TO_CCOMMENT,  /* If next char is *, then start a C-style comment */
+	Q_CCOMMENT,          /* Inside a C-style comment */
+	Q_HOPE_CEND,         /* expecting the end of a C-style comment */
+	Q_DOUBLE_QUOTING,    /* inside a "double-quoted" quoting */
+	Q_SINGLE_QUOTING,    /* inside a 'single-quoted' quoting */
+	Q_DOLLAR_QUOTING,    /* inside a $doll$ dollar quoted $doll$ section */
+	Q_DOLLAR_BUILDING,   /* inside the $doll$ of a dollar quoted section */
+	Q_DOLLAR_UNBUILDING, /* inside a possible closing $doll$ of a dollar quoted section */
+	Q_DONE               /* NULL ends it all... */
+};
+
--- /dev/null
+++ src/parsestatements/Makefile
@@ -0,0 +1,17 @@
+PROG=test-scanner
+FLEX=flex
+
+all: $(PROG) test
+
+scanner.o: scanner.c scanner.h
+
+$(PROG): $(PROG).o scanner.o
+	$(CC) $(CFLAGS) -o $(PROG) test-scanner.c scanner.o
+
+test-scanner.o: test-scanner.c
+
+test: test-scanner
+	./test-scanner < /dev/null > emptytestresult.log
+	cmp ./emptytestresult.log emptytestresult.expected
+	./test-scanner < ./test_sql.sql > test_sql.log
+	cmp ./test_sql.log ./test_sql.expected
\ No newline at end of file
--- /dev/null
+++ src/parsestatements/README
@@ -0,0 +1,10 @@
+scanner.c is a scanner which uses a state machine to walk through a
+set of SQL statements and identify where the ends of statements are
+(e.g. - relevant ";"'s).
+
+It needs to handle various forms of quoting, notably with "double
+quotes", 'single quotes', and $dol$dollar quoting$dol$.
+
+This will need to get integrated into both slon and slonik in order
+to split DDL requests being submitted via EXECUTE SCRIPT into
+individual statements.



More information about the Slony1-commit mailing list