diff --git a/materialize-postgres/.snapshots/TestSQLGeneration b/materialize-postgres/.snapshots/TestSQLGeneration index 6506a44470..6f63e0821a 100644 --- a/materialize-postgres/.snapshots/TestSQLGeneration +++ b/materialize-postgres/.snapshots/TestSQLGeneration @@ -344,4 +344,20 @@ BEGIN END $$; --- End Fence Update --- +--- Begin createTargetTable [jsonb contentMediaType] --- + +CREATE TABLE IF NOT EXISTS "public".jsonb_round_trip ( + id BIGINT NOT NULL, + plain_json JSON, + jsonb_col JSONB, + + PRIMARY KEY (id) +); + +COMMENT ON TABLE "public".jsonb_round_trip IS 'Generated for materialize-postgres jsonb round-trip test'; +COMMENT ON COLUMN "public".jsonb_round_trip.id IS ''; +COMMENT ON COLUMN "public".jsonb_round_trip.plain_json IS ''; +COMMENT ON COLUMN "public".jsonb_round_trip.jsonb_col IS ''; +--- End createTargetTable [jsonb contentMediaType] --- + diff --git a/materialize-postgres/sqlgen.go b/materialize-postgres/sqlgen.go index 00159bc3de..bee29236d4 100644 --- a/materialize-postgres/sqlgen.go +++ b/materialize-postgres/sqlgen.go @@ -34,18 +34,36 @@ func createPgDialect(featureFlags map[string]bool) sql.Dialect { binaryMapping = sql.MapStatic("BYTEA", sql.UsingConverter(sql.Base64Decoder)) } + // Fields originating from a PostgreSQL JSONB column carry this + // (non-standard, vendor-tree) contentMediaType so they can be recreated + // as JSONB at the destination instead of collapsing onto JSON. Anything + // without the annotation defaults to JSON, preserving the historical + // behavior for existing materializations and non-Postgres sources. + jsonbContentMediaType := "application/vnd.estuary.postgresql.jsonb+json" + jsonOrJsonb := func(jsonMapper, jsonbMapper sql.MapProjectionFn) sql.MapProjectionFn { + return func(p *sql.Projection) (sql.DDLer, sql.CompatibleColumnTypes, sql.ElementConverter) { + if p.Inference.String_ != nil && p.Inference.String_.ContentType == jsonbContentMediaType { + return jsonbMapper(p) + } + return jsonMapper(p) + } + } + mapper := sql.NewDDLMapper( sql.FlatTypeMappings{ sql.INTEGER: sql.MapSignedInt64( sql.MapStatic("BIGINT", sql.AlsoCompatibleWith("integer")), sql.MapStatic("NUMERIC"), ), - sql.NUMBER: sql.MapStatic("DOUBLE PRECISION"), - sql.BOOLEAN: sql.MapStatic("BOOLEAN"), - sql.OBJECT: sql.MapStatic("JSON"), - sql.ARRAY: sql.MapStatic("JSON"), - sql.BINARY: binaryMapping, - sql.MULTIPLE: sql.MapStatic("JSON", sql.UsingConverter(sql.ToJsonBytes)), + sql.NUMBER: sql.MapStatic("DOUBLE PRECISION"), + sql.BOOLEAN: sql.MapStatic("BOOLEAN"), + sql.OBJECT: jsonOrJsonb(sql.MapStatic("JSON"), sql.MapStatic("JSONB")), + sql.ARRAY: jsonOrJsonb(sql.MapStatic("JSON"), sql.MapStatic("JSONB")), + sql.BINARY: binaryMapping, + sql.MULTIPLE: jsonOrJsonb( + sql.MapStatic("JSON", sql.UsingConverter(sql.ToJsonBytes)), + sql.MapStatic("JSONB", sql.UsingConverter(sql.ToJsonBytes)), + ), sql.STRING_INTEGER: sql.MapStatic("NUMERIC"), sql.STRING_NUMBER: sql.MapStatic("DECIMAL", sql.AlsoCompatibleWith("numeric")), sql.STRING: sql.MapString(sql.StringMappings{ @@ -80,7 +98,12 @@ func createPgDialect(featureFlags map[string]bool) sql.Dialect { "text": {sql.NewMigrationSpec([]string{"bytea"}, sql.WithCastSQL(stringToByteaCast))}, "character varying": {sql.NewMigrationSpec([]string{"bytea"}, sql.WithCastSQL(stringToByteaCast))}, "bytea": {sql.NewMigrationSpec([]string{"text"}, sql.WithCastSQL(byteaToStringCast))}, - "*": {sql.NewMigrationSpec([]string{"json"}, sql.WithCastSQL(toJsonCast))}, + // PostgreSQL accepts implicit casts in both directions between + // json and jsonb, so columns can move freely as the upstream + // source type annotation flips. + "json": {sql.NewMigrationSpec([]string{"jsonb"})}, + "jsonb": {sql.NewMigrationSpec([]string{"json"})}, + "*": {sql.NewMigrationSpec([]string{"json", "jsonb"}, sql.WithCastSQL(toJsonCast))}, }, TableLocatorer: sql.TableLocatorFn(func(path []string) sql.InfoTableLocation { if len(path) == 1 { diff --git a/materialize-postgres/sqlgen_test.go b/materialize-postgres/sqlgen_test.go index 7b15374df6..04ab560f29 100644 --- a/materialize-postgres/sqlgen_test.go +++ b/materialize-postgres/sqlgen_test.go @@ -41,9 +41,84 @@ func TestSQLGeneration(t *testing.T) { }, ) + // Exercise the source-postgres -> materialize-postgres JSONB round-trip: + // a value field carrying the application/vnd.postgresql.jsonb+json + // contentMediaType must render as JSONB, while a sibling field without + // the annotation stays on JSON. + jsonbTable := buildJSONBTestTable(t) + jsonbName := "createTargetTable [jsonb contentMediaType]" + snap.WriteString("--- Begin " + jsonbName + " ---\n") + require.NoError(t, testTemplates.createTargetTable.Execute(snap, &jsonbTable)) + snap.WriteString("--- End " + jsonbName + " ---\n\n") + cupaloy.SnapshotT(t, snap.String()) } +// buildJSONBTestTable assembles a synthetic Table with two value projections: +// one carrying the jsonb contentMediaType so it should map to JSONB, and one +// without so it should default to JSON. +func buildJSONBTestTable(t *testing.T) sql.Table { + t.Helper() + + const jsonbMediaType = "application/vnd.estuary.postgresql.jsonb+json" + multipleTypes := []string{"object", "string", "array", "number", "boolean", "null"} + + mkValue := func(field, contentType string) sql.Column { + var stringInf *pf.Inference_String + if contentType != "" { + stringInf = &pf.Inference_String{ContentType: contentType} + } + p := sql.Projection{ + Projection: pf.Projection{ + Field: field, + Ptr: "/" + field, + Inference: pf.Inference{ + Types: multipleTypes, + String_: stringInf, + Exists: pf.Inference_MAY, + }, + }, + } + return sql.Column{ + Projection: p, + MappedType: testDialect.MapType(&p, sql.FieldConfig{}), + Identifier: testDialect.Identifier(field), + } + } + + keyProj := sql.Projection{ + Projection: pf.Projection{ + Field: "id", + Ptr: "/id", + IsPrimaryKey: true, + Inference: pf.Inference{ + Types: []string{"integer"}, + Exists: pf.Inference_MUST, + }, + }, + } + keyCol := sql.Column{ + Projection: keyProj, + MappedType: testDialect.MapType(&keyProj, sql.FieldConfig{}), + Identifier: testDialect.Identifier("id"), + MustExist: true, + } + + tableName := "jsonb_round_trip" + return sql.Table{ + TableShape: sql.TableShape{ + Path: []string{"public", tableName}, + Comment: "Generated for materialize-postgres jsonb round-trip test", + }, + Identifier: testDialect.Identifier("public", tableName), + Keys: []sql.Column{keyCol}, + Values: []sql.Column{ + mkValue("plain_json", ""), + mkValue("jsonb_col", jsonbMediaType), + }, + } +} + func TestDateTimeColumn(t *testing.T) { var mapped = testDialect.MapType(&sql.Projection{ @@ -62,6 +137,42 @@ func TestDateTimeColumn(t *testing.T) { require.NoError(t, err) } +func TestJSONBContentMediaType(t *testing.T) { + jsonbMediaType := "application/vnd.postgresql.jsonb+json" + + mapWithMediaType := func(types []string, contentType string) string { + var stringInf *pf.Inference_String + for _, ty := range types { + if ty == "string" { + stringInf = &pf.Inference_String{ContentType: contentType} + break + } + } + return testDialect.MapType(&sql.Projection{ + Projection: pf.Projection{ + Inference: pf.Inference{ + Types: types, + String_: stringInf, + Exists: pf.Inference_MUST, + }, + }, + }, sql.FieldConfig{}).DDL + } + + require.Equal(t, + "JSONB NOT NULL", + mapWithMediaType([]string{"object", "string", "array", "number", "boolean"}, jsonbMediaType), + "MULTIPLE-typed field with jsonb contentMediaType should map to JSONB") + require.Equal(t, + "JSON NOT NULL", + mapWithMediaType([]string{"object", "string", "array", "number", "boolean"}, "application/json"), + "MULTIPLE-typed field with application/json contentMediaType should map to JSON") + require.Equal(t, + "JSON NOT NULL", + mapWithMediaType([]string{"object", "string", "array", "number", "boolean"}, ""), + "MULTIPLE-typed field without contentMediaType should default to JSON") +} + func TestTruncatedIdentifier(t *testing.T) { tests := []struct { name string diff --git a/source-postgres/.snapshots/TestDiscoveryComplex b/source-postgres/.snapshots/TestDiscoveryComplex index 3f83263f0c..b6c6695a8d 100644 --- a/source-postgres/.snapshots/TestDiscoveryComplex +++ b/source-postgres/.snapshots/TestDiscoveryComplex @@ -36,10 +36,12 @@ sql> COMMENT ON COLUMN test.discoverycomplex_934635.k1 IS 'I think this is a key ] }, "doc": { - "description": "(source type: json)" + "description": "(source type: json)", + "contentMediaType": "application/json" }, "doc/bin": { - "description": "(source type: non-nullable jsonb)" + "description": "(source type: non-nullable jsonb)", + "contentMediaType": "application/vnd.estuary.postgresql.jsonb+json" }, "foo": { "description": "This is a text field! (source type: text)", diff --git a/source-postgres/discovery.go b/source-postgres/discovery.go index c2dcf7c82a..f9b0bcbfc5 100644 --- a/source-postgres/discovery.go +++ b/source-postgres/discovery.go @@ -267,10 +267,11 @@ func (db *postgresDatabase) TranslateDBToJSONType(column sqlcapture.ColumnInfo, } type columnSchema struct { - contentEncoding string - format string - nullable bool - jsonTypes []string + contentEncoding string + contentMediaType string + format string + nullable bool + jsonTypes []string } func (s columnSchema) toType() *jsonschema.Schema { @@ -283,6 +284,10 @@ func (s columnSchema) toType() *jsonschema.Schema { out.Extras["contentEncoding"] = s.contentEncoding // New in 2019-09. } + if s.contentMediaType != "" { + out.Extras["contentMediaType"] = s.contentMediaType // New in 2019-09. + } + if s.jsonTypes != nil { var types = append([]string(nil), s.jsonTypes...) if s.nullable { @@ -338,8 +343,12 @@ var postgresTypeToJSON = map[string]columnSchema{ "bit": {jsonTypes: []string{"string"}}, "varbit": {jsonTypes: []string{"string"}}, - "json": {}, - "jsonb": {}, + // json and jsonb columns capture arbitrary JSON values, so we don't constrain + // the JSON Schema type. The contentMediaType annotation distinguishes the two + // at the wire so downstream connectors (e.g. materialize-postgres) can + // recreate the original column type instead of collapsing both onto json. + "json": {contentMediaType: "application/json"}, + "jsonb": {contentMediaType: "application/vnd.estuary.postgresql.jsonb+json"}, "jsonpath": {jsonTypes: []string{"string"}}, // Domain-Specific Types