From 7dbf688ae99f05c5f85200614f626c85498b76f5 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 3 Jan 2022 21:55:07 +0000 Subject: [PATCH 01/10] Escape string and blob results from dump more correctly dumpTables currently badly handles BLOB and TEXT data containing control characters: * MySQL will interpret and unescape string literals e.g.`\r` will become carriage return. * Postgres will not allow string literals to contain NUL nor will SQLite so BLOBs will not dump correctly. * Schemas should not be set on the destination dump * MSSQL needs the N prefix to correctly ensure that UTF-8 data is correctly transferred. Signed-off-by: Andrew Thornton --- engine.go | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/engine.go b/engine.go index 709cc384..51a11b35 100644 --- a/engine.go +++ b/engine.go @@ -11,7 +11,9 @@ import ( "io" "os" "reflect" + "regexp" "runtime" + "strconv" "strings" "time" @@ -450,6 +452,8 @@ func formatBool(s string, dstDialect dialects.Dialect) string { return s } +var controlCharactersRe = regexp.MustCompile(`[\x00-\x1f\x7f]+`) + // dumpTables dump database all table structs and data to w with specify db type func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w io.Writer, tp ...schemas.DBType) error { var dstDialect dialects.Dialect @@ -465,7 +469,7 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w destURI := dialects.URI{ DBType: tp[0], DBName: uri.DBName, - Schema: uri.Schema, + // DO NOT SET SCHEMA HERE } if err := dstDialect.Init(&destURI); err != nil { return err @@ -480,6 +484,13 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w return err } + if dstDialect.URI().DBType == schemas.MYSQL { + // For MySQL set NO_BACKLASH_ESCAPES so that strings work properly + if _, err := io.WriteString(w, "SET sql_mode='NO_BACKSLASH_ESCAPES';\n"); err != nil { + return err + } + } + for i, table := range tables { dstTable := table if table.Type != nil { @@ -594,7 +605,135 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w if _, err = io.WriteString(w, "'"+r+"'"); err != nil { return err } + } else if len(s.String) == 0 { + if _, err := io.WriteString(w, "''"); err != nil { + return err + } + } else if dstDialect.URI().DBType == schemas.POSTGRES { + if dstTable.Columns()[i].SQLType.IsBlob() { + // Postgres has the escape format and we should use that for bytea data + if _, err := fmt.Fprintf(w, "'\\x%x'", s.String); err != nil { + return err + } + } else { + // Postgres concatentates strings using || (NOTE: a NUL byte in a text segment will fail) + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil { + return err + } + } + if _, err := io.WriteString(w, "e'"); err != nil { + return err + } + for i := loc[0]; i < loc[1]; i++ { + if _, err := fmt.Fprintf(w, "\\x%0x", toCheck[i]); err != nil { + return err + } + } + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "' || "); err != nil { + return err + } + } else { + if _, err := io.WriteString(w, "'"); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.MYSQL { + loc := controlCharactersRe.FindStringIndex(s.String) + if loc == nil { + if _, err := io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { + return err + } + } else { + if _, err := io.WriteString(w, "CONCAT("); err != nil { + return err + } + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"')"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"', "); err != nil { + return err + } + } + for i := loc[0]; i < loc[1]-1; i++ { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(toCheck[i]))+"), "); err != nil { + return err + } + } + char := toCheck[loc[1]-1] + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"), "); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"))"); err != nil { + return err + } + } + } + } + } else if dstDialect.URI().DBType == schemas.SQLITE { + if dstTable.Columns()[i].SQLType.IsBlob() { + // SQLite has its escape format + if _, err := fmt.Fprintf(w, "X'%x'", s.String); err != nil { + return err + } + } else { + // SQLite concatentates strings using || (NOTE: a NUL byte in a text segment will fail) + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"'"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"' || "); err != nil { + return err + } + } + if _, err := fmt.Fprintf(w, "X'%x'", toCheck[loc[0]:loc[1]]); err != nil { + return err + } + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, " || "); err != nil { + return err + } + } + } + } } else { + // In MSSQL we have to use NChar format to get unicode strings. + if dstDialect.URI().DBType == schemas.MSSQL && dstTable.Columns()[i].SQLType.IsText() { + if _, err = io.WriteString(w, "N"); err != nil { + return err + } + } + if _, err = io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { return err } -- 2.40.1 From b772658c0b0aca330713c68f15c832d956e84017 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 4 Jan 2022 18:34:57 +0000 Subject: [PATCH 02/10] On postgres put the schema in if it is set Signed-off-by: Andrew Thornton --- engine.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/engine.go b/engine.go index 51a11b35..77688a5c 100644 --- a/engine.go +++ b/engine.go @@ -489,6 +489,10 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w if _, err := io.WriteString(w, "SET sql_mode='NO_BACKSLASH_ESCAPES';\n"); err != nil { return err } + } else if dstDialect.URI().DBType == schemas.POSTGRES && engine.dialect.URI().Schema != "" { + if _, err := fmt.Fprintf(w, `SELECT set_config('search_path', '%s,' || current_setting('search_path'), false)`, strings.ReplaceAll(engine.dialect.URI().Schema, "'", "''")); err != nil { + return err + } } for i, table := range tables { -- 2.40.1 From 129043e50dc6a085ac5775505d6cdc46456eb0c7 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 4 Jan 2022 19:42:20 +0000 Subject: [PATCH 03/10] oops Signed-off-by: Andrew Thornton --- engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine.go b/engine.go index 77688a5c..3eefdfa6 100644 --- a/engine.go +++ b/engine.go @@ -490,7 +490,7 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w return err } } else if dstDialect.URI().DBType == schemas.POSTGRES && engine.dialect.URI().Schema != "" { - if _, err := fmt.Fprintf(w, `SELECT set_config('search_path', '%s,' || current_setting('search_path'), false)`, strings.ReplaceAll(engine.dialect.URI().Schema, "'", "''")); err != nil { + if _, err := fmt.Fprintf(w, `SELECT set_config('search_path', '%s,' || current_setting('search_path'), false);`+"\n", strings.ReplaceAll(engine.dialect.URI().Schema, "'", "''")); err != nil { return err } } -- 2.40.1 From 7f471d653de8b77f8128bb1319399d721dd046c6 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 4 Jan 2022 21:37:43 +0000 Subject: [PATCH 04/10] attempt to placae test Signed-off-by: Andrew Thornton --- engine.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/engine.go b/engine.go index 3eefdfa6..430e82b8 100644 --- a/engine.go +++ b/engine.go @@ -471,6 +471,9 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w DBName: uri.DBName, // DO NOT SET SCHEMA HERE } + if dstDialect.URI().DBType == schemas.POSTGRES { + destURI.Schema = engine.dialect.URI().Schema + } if err := dstDialect.Init(&destURI); err != nil { return err } @@ -489,10 +492,6 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w if _, err := io.WriteString(w, "SET sql_mode='NO_BACKSLASH_ESCAPES';\n"); err != nil { return err } - } else if dstDialect.URI().DBType == schemas.POSTGRES && engine.dialect.URI().Schema != "" { - if _, err := fmt.Fprintf(w, `SELECT set_config('search_path', '%s,' || current_setting('search_path'), false);`+"\n", strings.ReplaceAll(engine.dialect.URI().Schema, "'", "''")); err != nil { - return err - } } for i, table := range tables { -- 2.40.1 From ec132613674c9d5a6fbff9bcef0e881c32f174ae Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 5 Jan 2022 10:23:52 +0000 Subject: [PATCH 05/10] fix npe Signed-off-by: Andrew Thornton --- engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine.go b/engine.go index 430e82b8..e71dd3a6 100644 --- a/engine.go +++ b/engine.go @@ -471,7 +471,7 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w DBName: uri.DBName, // DO NOT SET SCHEMA HERE } - if dstDialect.URI().DBType == schemas.POSTGRES { + if tp[0] == schemas.POSTGRES { destURI.Schema = engine.dialect.URI().Schema } if err := dstDialect.Init(&destURI); err != nil { -- 2.40.1 From 47f1bd022f7e4d15701e2c16d19bf512759e0d5a Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 5 Jan 2022 11:03:57 +0000 Subject: [PATCH 06/10] attempt to fix cockroach Signed-off-by: Andrew Thornton --- engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engine.go b/engine.go index e71dd3a6..4c65c9d4 100644 --- a/engine.go +++ b/engine.go @@ -638,7 +638,7 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w return err } for i := loc[0]; i < loc[1]; i++ { - if _, err := fmt.Fprintf(w, "\\x%0x", toCheck[i]); err != nil { + if _, err := fmt.Fprintf(w, "\\x%02x", toCheck[i]); err != nil { return err } } -- 2.40.1 From d03d953a3f805e144c55e02251fa9407c71a0914 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 5 Jan 2022 23:02:34 +0000 Subject: [PATCH 07/10] handle dameng Signed-off-by: Andrew Thornton --- engine.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/engine.go b/engine.go index c52533cb..b9261c08 100644 --- a/engine.go +++ b/engine.go @@ -732,6 +732,50 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w } } } + } else if dstDialect.URI().DBType == schemas.DAMENG { + if dstTable.Columns()[i].SQLType.IsBlob() { + // DAMENG uses HEXTORAW + if _, err := fmt.Fprintf(w, "HEXTORAW('%x')", s.String); err != nil { + return err + } + } else { + // DAMENG concatentates strings in multiple ways but uses CHAR and has CONCAT + // (NOTE: a NUL byte in a text segment will fail) + if _, err := io.WriteString(w, "CONCAT("); err != nil { + return err + } + toCheck := strings.ReplaceAll(s.String, "'", "''") + for len(toCheck) > 0 { + loc := controlCharactersRe.FindStringIndex(toCheck) + if loc == nil { + if _, err := io.WriteString(w, "'"+toCheck+"')"); err != nil { + return err + } + break + } + if loc[0] > 0 { + if _, err := io.WriteString(w, "'"+toCheck[:loc[0]]+"', "); err != nil { + return err + } + } + for i := loc[0]; i < loc[1]-1; i++ { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(toCheck[i]))+"), "); err != nil { + return err + } + } + char := toCheck[loc[1]-1] + toCheck = toCheck[loc[1]:] + if len(toCheck) > 0 { + if _, err := io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"), "); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "CHAR("+strconv.Itoa(int(char))+"))"); err != nil { + return err + } + } + } + } } else { // In MSSQL we have to use NChar format to get unicode strings. if dstDialect.URI().DBType == schemas.MSSQL && dstTable.Columns()[i].SQLType.IsText() { -- 2.40.1 From 83817e383e973a8ca5284c2c9bd8b0d245c4260f Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 5 Jan 2022 23:25:16 +0000 Subject: [PATCH 08/10] handle mssql properly Signed-off-by: Andrew Thornton --- engine.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/engine.go b/engine.go index b9261c08..4fc83cba 100644 --- a/engine.go +++ b/engine.go @@ -776,14 +776,18 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w } } } - } else { - // In MSSQL we have to use NChar format to get unicode strings. - if dstDialect.URI().DBType == schemas.MSSQL && dstTable.Columns()[i].SQLType.IsText() { - if _, err = io.WriteString(w, "N"); err != nil { + } else if dstDialect.URI().DBType == schemas.MSSQL { + if dstTable.Columns()[i].SQLType.IsBlob() { + // MSSQL uses CONVERT(VARBINARY(MAX), '0xDEADBEEF', 1) + if _, err := fmt.Fprintf(w, "CONVERT(VARBINARY(MAX), '0x%x', 1)", s.String); err != nil { + return err + } + } else { + if _, err = io.WriteString(w, "N'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { return err } } - + } else { if _, err = io.WriteString(w, "'"+strings.ReplaceAll(s.String, "'", "''")+"'"); err != nil { return err } -- 2.40.1 From 9433f34337d876888e2b7c3b9717ebad48b83ad7 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 6 Jan 2022 04:26:52 +0000 Subject: [PATCH 09/10] Add some more testcases for dump Signed-off-by: Andrew Thornton --- integrations/engine_test.go | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/integrations/engine_test.go b/integrations/engine_test.go index dbe17571..cdcdd6be 100644 --- a/integrations/engine_test.go +++ b/integrations/engine_test.go @@ -143,6 +143,7 @@ func TestDumpTables(t *testing.T) { type TestDumpTableStruct struct { Id int64 + Data []byte `xorm:"BLOB"` Name string IsMan bool Created time.Time `xorm:"created"` @@ -152,10 +153,14 @@ func TestDumpTables(t *testing.T) { _, err := testEngine.Insert([]TestDumpTableStruct{ {Name: "1", IsMan: true}, - {Name: "2\n"}, - {Name: "3;"}, - {Name: "4\n;\n''"}, - {Name: "5'\n"}, + {Name: "2\n", Data: []byte{'\000', '\001', '\002'}}, + {Name: "3;", Data: []byte("0x000102")}, + {Name: "4\n;\n''", Data: []byte("Help")}, + {Name: "5'\n", Data: []byte("0x48656c70")}, + {Name: "6\\n'\n", Data: []byte("48656c70")}, + {Name: "7\\n'\r\n", Data: []byte("7\\n'\r\n")}, + {Name: "x0809ee"}, + {Name: "090a10"}, }) assert.NoError(t, err) -- 2.40.1 From 1189a63c9812fa99146143ad24d02e9c68900b2c Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Thu, 6 Jan 2022 05:02:14 +0000 Subject: [PATCH 10/10] handle oracle Signed-off-by: Andrew Thornton --- engine.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/engine.go b/engine.go index 4fc83cba..b7dcf5a2 100644 --- a/engine.go +++ b/engine.go @@ -732,14 +732,14 @@ func (engine *Engine) dumpTables(ctx context.Context, tables []*schemas.Table, w } } } - } else if dstDialect.URI().DBType == schemas.DAMENG { + } else if dstDialect.URI().DBType == schemas.DAMENG || dstDialect.URI().DBType == schemas.ORACLE { if dstTable.Columns()[i].SQLType.IsBlob() { - // DAMENG uses HEXTORAW + // ORACLE/DAMENG uses HEXTORAW if _, err := fmt.Fprintf(w, "HEXTORAW('%x')", s.String); err != nil { return err } } else { - // DAMENG concatentates strings in multiple ways but uses CHAR and has CONCAT + // ORACLE/DAMENG concatentates strings in multiple ways but uses CHAR and has CONCAT // (NOTE: a NUL byte in a text segment will fail) if _, err := io.WriteString(w, "CONCAT("); err != nil { return err -- 2.40.1