Skip to content

Commit

Permalink
cli: fix double-quote escaping for JSON values with format=sql
Browse files Browse the repository at this point in the history
A previous commit (98f9c8a) made an
attempt to fix how JSON values are escaped when they contain invalid
UTF8 codes and are displayed in the CLI using the --format=sql flag
(see #107518).

That commit ended up breaking how JSON values are escaped when they
contain double quotes.

Luckily it turns out that both problems were actually caused by a
long-lived mistake in the `clisqlexec.FormatVal` function. It shouldn't
use `fmt.Sprintf("%+q", s)` to escape a string that has invalid
characters, as that conflicts with how SQL strings are normally escaped.
The proper way is to use `lexbase.EscapeSQLString(s)`.

Release note (bug fix): Fixed a bug where the CLI would not correctly
escape JSON values that had double-quotes inside of a string when using
the --format=sql flag.
  • Loading branch information
rafiss committed Oct 4, 2024
1 parent 1922797 commit fcc3621
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 33 deletions.
1 change: 1 addition & 0 deletions pkg/cli/clisqlexec/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ go_test(
name = "clisqlexec_test",
srcs = [
"format_html_test.go",
"format_sql_test.go",
"format_table_test.go",
"format_value_test.go",
"main_test.go",
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/clisqlexec/format_sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (p *sqlReporter) iter(w, _ io.Writer, _ int, row []string) error {
fmt.Fprint(w, "INSERT INTO results VALUES (")
for i, r := range row {
var buf bytes.Buffer
lexbase.EncodeSQLStringWithFlags(&buf, r, lexbase.EncNoDoubleEscapeQuotes)
lexbase.EncodeSQLStringWithFlags(&buf, r, lexbase.EncNoFlags)
fmt.Fprint(w, buf.String())
if i < len(row)-1 {
fmt.Fprint(w, ", ")
Expand Down
71 changes: 71 additions & 0 deletions pkg/cli/clisqlexec/format_sql_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package clisqlexec_test

import "github.com/cockroachdb/cockroach/pkg/cli"

func Example_json_sql_format() {
c := cli.NewCLITest(cli.TestCLIParams{})
defer c.Cleanup()

testData := []string{
`e'{"a": "bc"}'`,
`e'{"a": "b\u0099c"}'`,
`e'{"a": "b\\"c"}'`,
`'"there are \"quotes\" in this json string"'`,
`'""'`,
`'{}'`,
}

for _, s := range testData {
query := `SELECT ` + s + `::json`
c.RunWithArgs([]string{"sql", "--format=sql", "-e", query})
}

// Output:
// sql --format=sql -e SELECT e'{"a": "bc"}'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES ('{"a": "bc"}');
// -- 1 row
// sql --format=sql -e SELECT e'{"a": "b\u0099c"}'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES (e'{"a": "b\\u0099c"}');
// -- 1 row
// sql --format=sql -e SELECT e'{"a": "b\\"c"}'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES (e'{"a": "b\\"c"}');
// -- 1 row
// sql --format=sql -e SELECT '"there are \"quotes\" in this json string"'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES (e'"there are \\"quotes\\" in this json string"');
// -- 1 row
// sql --format=sql -e SELECT '""'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES ('""');
// -- 1 row
// sql --format=sql -e SELECT '{}'::json
// CREATE TABLE results (
// jsonb STRING
// );
//
// INSERT INTO results VALUES ('{}');
// -- 1 row
}
7 changes: 5 additions & 2 deletions pkg/cli/clisqlexec/format_value.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"strings"
"unicode"
"unicode/utf8"

"github.com/cockroachdb/cockroach/pkg/sql/lexbase"
)

func isNotPrintableASCII(r rune) bool { return r < 0x20 || r > 0x7e || r == '"' || r == '\\' }
Expand Down Expand Up @@ -40,10 +42,11 @@ func FormatVal(val driver.Value, showPrintableUnicode bool, showNewLinesAndTabs
return t
}
}
s := fmt.Sprintf("%+q", t)
s := lexbase.EscapeSQLString(t)
// The result from EscapeSQLString is an escape-quoted string, like e'...'.
// Strip the start and final quotes. The surrounding display
// format (e.g. CSV/TSV) will add its own quotes.
return s[1 : len(s)-1]
return s[2 : len(s)-1]
}

// Fallback to printing the value as-is.
Expand Down
8 changes: 0 additions & 8 deletions pkg/sql/lexbase/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ const (
// without wrapping quotes.
EncBareIdentifiers

// EncNoDoubleEscapeQuotes indicates that backslashes will not be
// escaped when they are used as escape quotes.
EncNoDoubleEscapeQuotes

// EncFirstFreeFlagBit needs to remain unused; it is used as base
// bit offset for tree.FmtFlags.
EncFirstFreeFlagBit
Expand Down Expand Up @@ -144,7 +140,6 @@ func EncodeSQLStringWithFlags(buf *bytes.Buffer, in string, flags EncodeFlags) {
start := 0
escapedString := false
bareStrings := flags.HasFlags(EncBareStrings)
noDoubleEscapeQuotes := flags.HasFlags(EncNoDoubleEscapeQuotes)
// Loop through each unicode code point.
for i, r := range in {
if i < start {
Expand All @@ -165,9 +160,6 @@ func EncodeSQLStringWithFlags(buf *bytes.Buffer, in string, flags EncodeFlags) {
buf.WriteString("e'") // begin e'xxx' string
escapedString = true
}
if noDoubleEscapeQuotes && i+1 < len(in) && in[i:i+2] == "\\\"" {
continue
}
buf.WriteString(in[start:i])

ln := utf8.RuneLen(r)
Expand Down
22 changes: 0 additions & 22 deletions pkg/sql/lexbase/encode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,28 +112,6 @@ func testEncodeString(t *testing.T, input []byte, encode func(*bytes.Buffer, str
return stmt
}

func TestEncodeSQLStringWithNoDoubleEscapeQuotes(t *testing.T) {
testCases := []struct {
input string
output string
}{
// (GH issue #107518)
{`\"`, `e'\"'`},
{`{"a": "b\u0099c"}`, `e'{"a": "b\\u0099c"}'`},
{`{\"a\": \"b\u0099c\"}`, `e'{\"a\": \"b\\u0099c\"}'`},
}

for _, tc := range testCases {
var buf bytes.Buffer
lexbase.EncodeSQLStringWithFlags(&buf, tc.input, lexbase.EncNoDoubleEscapeQuotes)
out := buf.String()

if out != tc.output {
t.Errorf("`%s`: expected `%s`, got `%s`", tc.input, tc.output, out)
}
}
}

func BenchmarkEncodeSQLString(b *testing.B) {
str := strings.Repeat("foo", 10000)
for i := 0; i < b.N; i++ {
Expand Down

0 comments on commit fcc3621

Please sign in to comment.