Skip to content

Commit 1133ab8

Browse files
kyleconroyclaude
andcommitted
Add COPY INTO statement parsing for Azure Synapse Analytics
- Create CopyStatement AST type with From, Into, and Options - Add CopyOption, SingleValueTypeCopyOption, CopyCredentialOption - Add ListTypeCopyOption and CopyColumnOption for column definitions - Parse COPY INTO table (columns) FROM 'url' WITH (options) - Handle Credential and ErrorFileCredential options with Identity/Secret - Normalize option names to PascalCase for JSON output Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent e29bfbb commit 1133ab8

File tree

6 files changed

+377
-2
lines changed

6 files changed

+377
-2
lines changed

ast/copy_statement.go

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package ast
2+
3+
// CopyStatement represents a COPY INTO statement for Azure Synapse Analytics
4+
type CopyStatement struct {
5+
Into *SchemaObjectName `json:"Into,omitempty"`
6+
From []ScalarExpression `json:"From,omitempty"`
7+
Options []*CopyOption `json:"Options,omitempty"`
8+
}
9+
10+
func (*CopyStatement) node() {}
11+
func (*CopyStatement) statement() {}
12+
13+
// CopyOption represents an option in COPY INTO
14+
type CopyOption struct {
15+
Kind string `json:"Kind,omitempty"`
16+
Value CopyOptionValue `json:"Value,omitempty"`
17+
}
18+
19+
func (*CopyOption) node() {}
20+
21+
// CopyOptionValue is an interface for COPY option values
22+
type CopyOptionValue interface {
23+
copyOptionValue()
24+
}
25+
26+
// SingleValueTypeCopyOption represents a simple value option
27+
type SingleValueTypeCopyOption struct {
28+
SingleValue *IdentifierOrValueExpression `json:"SingleValue,omitempty"`
29+
}
30+
31+
func (*SingleValueTypeCopyOption) node() {}
32+
func (*SingleValueTypeCopyOption) copyOptionValue() {}
33+
34+
// CopyCredentialOption represents a credential option with Identity and optional Secret
35+
type CopyCredentialOption struct {
36+
Identity ScalarExpression `json:"Identity,omitempty"`
37+
Secret ScalarExpression `json:"Secret,omitempty"`
38+
}
39+
40+
func (*CopyCredentialOption) node() {}
41+
func (*CopyCredentialOption) copyOptionValue() {}
42+
43+
// ListTypeCopyOption represents a list of column options
44+
type ListTypeCopyOption struct {
45+
Options []*CopyColumnOption `json:"Options,omitempty"`
46+
}
47+
48+
func (*ListTypeCopyOption) node() {}
49+
func (*ListTypeCopyOption) copyOptionValue() {}
50+
51+
// CopyColumnOption represents a column option with name, default value, and ordinal
52+
type CopyColumnOption struct {
53+
ColumnName *Identifier `json:"ColumnName,omitempty"`
54+
DefaultValue ScalarExpression `json:"DefaultValue,omitempty"`
55+
FieldNumber ScalarExpression `json:"FieldNumber,omitempty"`
56+
}
57+
58+
func (*CopyColumnOption) node() {}

parser/marshal.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -599,6 +599,8 @@ func statementToJSON(stmt ast.Statement) jsonNode {
599599
return insertBulkStatementToJSON(s)
600600
case *ast.BulkInsertStatement:
601601
return bulkInsertStatementToJSON(s)
602+
case *ast.CopyStatement:
603+
return copyStatementToJSON(s)
602604
case *ast.AlterUserStatement:
603605
return alterUserStatementToJSON(s)
604606
case *ast.AlterRouteStatement:
@@ -17682,6 +17684,121 @@ func bulkInsertStatementToJSON(s *ast.BulkInsertStatement) jsonNode {
1768217684
return node
1768317685
}
1768417686

17687+
func copyStatementToJSON(s *ast.CopyStatement) jsonNode {
17688+
node := jsonNode{
17689+
"$type": "CopyStatement",
17690+
}
17691+
if len(s.From) > 0 {
17692+
from := make([]jsonNode, len(s.From))
17693+
for i, f := range s.From {
17694+
from[i] = scalarExpressionToJSON(f)
17695+
}
17696+
node["From"] = from
17697+
}
17698+
if s.Into != nil {
17699+
node["Into"] = schemaObjectNameToJSON(s.Into)
17700+
}
17701+
if len(s.Options) > 0 {
17702+
opts := make([]jsonNode, len(s.Options))
17703+
for i, opt := range s.Options {
17704+
opts[i] = copyOptionToJSON(opt)
17705+
}
17706+
node["Options"] = opts
17707+
}
17708+
return node
17709+
}
17710+
17711+
func copyOptionToJSON(o *ast.CopyOption) jsonNode {
17712+
node := jsonNode{
17713+
"$type": "CopyOption",
17714+
"Kind": normalizeCopyOptionKind(o.Kind),
17715+
}
17716+
if o.Value != nil {
17717+
node["Value"] = copyOptionValueToJSON(o.Value)
17718+
}
17719+
return node
17720+
}
17721+
17722+
// normalizeCopyOptionKind converts option names to PascalCase
17723+
func normalizeCopyOptionKind(kind string) string {
17724+
// Map common option names
17725+
optionMap := map[string]string{
17726+
"FILE_TYPE": "File_Type",
17727+
"FIELDTERMINATOR": "FieldTerminator",
17728+
"ROWTERMINATOR": "RowTerminator",
17729+
"FIELDQUOTE": "FieldQuote",
17730+
"DATEFORMAT": "DateFormat",
17731+
"ENCODING": "Encoding",
17732+
"MAXERRORS": "MaxErrors",
17733+
"ERRORFILE": "ErrorFile",
17734+
"FIRSTROW": "FirstRow",
17735+
"CREDENTIAL": "Credential",
17736+
"IDENTITY_INSERT": "Identity_Insert",
17737+
"COMPRESSION": "Compression",
17738+
"FILE_FORMAT": "File_Format",
17739+
"ERRORFILE_CREDENTIAL": "ErrorFileCredential",
17740+
"COLUMNOPTIONS": "ColumnOptions",
17741+
}
17742+
upper := strings.ToUpper(kind)
17743+
if mapped, ok := optionMap[upper]; ok {
17744+
return mapped
17745+
}
17746+
return kind
17747+
}
17748+
17749+
func copyOptionValueToJSON(v ast.CopyOptionValue) jsonNode {
17750+
switch val := v.(type) {
17751+
case *ast.SingleValueTypeCopyOption:
17752+
node := jsonNode{
17753+
"$type": "SingleValueTypeCopyOption",
17754+
}
17755+
if val.SingleValue != nil {
17756+
node["SingleValue"] = identifierOrValueExpressionToJSON(val.SingleValue)
17757+
}
17758+
return node
17759+
case *ast.CopyCredentialOption:
17760+
node := jsonNode{
17761+
"$type": "CopyCredentialOption",
17762+
}
17763+
if val.Identity != nil {
17764+
node["Identity"] = scalarExpressionToJSON(val.Identity)
17765+
}
17766+
if val.Secret != nil {
17767+
node["Secret"] = scalarExpressionToJSON(val.Secret)
17768+
}
17769+
return node
17770+
case *ast.ListTypeCopyOption:
17771+
node := jsonNode{
17772+
"$type": "ListTypeCopyOption",
17773+
}
17774+
if len(val.Options) > 0 {
17775+
opts := make([]jsonNode, len(val.Options))
17776+
for i, opt := range val.Options {
17777+
opts[i] = copyColumnOptionToJSON(opt)
17778+
}
17779+
node["Options"] = opts
17780+
}
17781+
return node
17782+
}
17783+
return nil
17784+
}
17785+
17786+
func copyColumnOptionToJSON(c *ast.CopyColumnOption) jsonNode {
17787+
node := jsonNode{
17788+
"$type": "CopyColumnOption",
17789+
}
17790+
if c.ColumnName != nil {
17791+
node["ColumnName"] = identifierToJSON(c.ColumnName)
17792+
}
17793+
if c.DefaultValue != nil {
17794+
node["DefaultValue"] = scalarExpressionToJSON(c.DefaultValue)
17795+
}
17796+
if c.FieldNumber != nil {
17797+
node["FieldNumber"] = scalarExpressionToJSON(c.FieldNumber)
17798+
}
17799+
return node
17800+
}
17801+
1768517802
func alterUserStatementToJSON(s *ast.AlterUserStatement) jsonNode {
1768617803
node := jsonNode{
1768717804
"$type": "AlterUserStatement",

parser/parse_dml.go

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2949,3 +2949,199 @@ func (p *Parser) parseOutputClause() (*ast.OutputClause, *ast.OutputIntoClause,
29492949
}, nil, nil
29502950
}
29512951

2952+
// parseCopyStatement parses COPY INTO statement for Azure Synapse Analytics
2953+
func (p *Parser) parseCopyStatement() (*ast.CopyStatement, error) {
2954+
// Consume COPY
2955+
p.nextToken()
2956+
2957+
stmt := &ast.CopyStatement{}
2958+
2959+
// Expect INTO
2960+
if strings.ToUpper(p.curTok.Literal) == "INTO" {
2961+
p.nextToken() // consume INTO
2962+
}
2963+
2964+
// Parse target table name
2965+
tableName, err := p.parseSchemaObjectName()
2966+
if err != nil {
2967+
return nil, err
2968+
}
2969+
stmt.Into = tableName
2970+
2971+
// Parse optional column list with defaults: (col1 DEFAULT 'value' 1, col2 DEFAULT 2 3)
2972+
if p.curTok.Type == TokenLParen {
2973+
p.nextToken() // consume (
2974+
columnOpts := &ast.ListTypeCopyOption{}
2975+
for p.curTok.Type != TokenRParen && p.curTok.Type != TokenEOF {
2976+
colOpt := &ast.CopyColumnOption{}
2977+
colOpt.ColumnName = p.parseIdentifier()
2978+
2979+
// Check for DEFAULT
2980+
if strings.ToUpper(p.curTok.Literal) == "DEFAULT" {
2981+
p.nextToken() // consume DEFAULT
2982+
defValue, err := p.parseScalarExpression()
2983+
if err != nil {
2984+
return nil, err
2985+
}
2986+
colOpt.DefaultValue = defValue
2987+
}
2988+
2989+
// Parse field number (integer)
2990+
if p.curTok.Type == TokenNumber {
2991+
val := p.curTok.Literal
2992+
colOpt.FieldNumber = &ast.IntegerLiteral{Value: val, LiteralType: "Integer"}
2993+
p.nextToken()
2994+
}
2995+
2996+
columnOpts.Options = append(columnOpts.Options, colOpt)
2997+
2998+
if p.curTok.Type == TokenComma {
2999+
p.nextToken()
3000+
} else {
3001+
break
3002+
}
3003+
}
3004+
if p.curTok.Type == TokenRParen {
3005+
p.nextToken() // consume )
3006+
}
3007+
// Add column options as an option
3008+
if len(columnOpts.Options) > 0 {
3009+
stmt.Options = append(stmt.Options, &ast.CopyOption{
3010+
Kind: "ColumnOptions",
3011+
Value: columnOpts,
3012+
})
3013+
}
3014+
}
3015+
3016+
// Expect FROM
3017+
if strings.ToUpper(p.curTok.Literal) == "FROM" {
3018+
p.nextToken() // consume FROM
3019+
}
3020+
3021+
// Parse source URLs (comma-separated string literals)
3022+
for {
3023+
if p.curTok.Type == TokenString || p.curTok.Type == TokenNationalString {
3024+
strLit, err := p.parseStringLiteral()
3025+
if err != nil {
3026+
return nil, err
3027+
}
3028+
stmt.From = append(stmt.From, strLit)
3029+
}
3030+
if p.curTok.Type == TokenComma {
3031+
p.nextToken()
3032+
} else {
3033+
break
3034+
}
3035+
}
3036+
3037+
// Parse WITH clause if present
3038+
if p.curTok.Type == TokenWith {
3039+
p.nextToken() // consume WITH
3040+
if p.curTok.Type == TokenLParen {
3041+
p.nextToken() // consume (
3042+
}
3043+
3044+
for p.curTok.Type != TokenRParen && p.curTok.Type != TokenEOF && p.curTok.Type != TokenSemicolon {
3045+
opt, err := p.parseCopyOption()
3046+
if err != nil {
3047+
return nil, err
3048+
}
3049+
if opt != nil {
3050+
stmt.Options = append(stmt.Options, opt)
3051+
}
3052+
3053+
if p.curTok.Type == TokenComma {
3054+
p.nextToken()
3055+
} else {
3056+
break
3057+
}
3058+
}
3059+
3060+
if p.curTok.Type == TokenRParen {
3061+
p.nextToken() // consume )
3062+
}
3063+
}
3064+
3065+
// Skip optional semicolon
3066+
if p.curTok.Type == TokenSemicolon {
3067+
p.nextToken()
3068+
}
3069+
3070+
return stmt, nil
3071+
}
3072+
3073+
// parseCopyOption parses a single COPY option
3074+
func (p *Parser) parseCopyOption() (*ast.CopyOption, error) {
3075+
opt := &ast.CopyOption{}
3076+
3077+
// Get option name
3078+
optName := p.curTok.Literal
3079+
opt.Kind = optName
3080+
p.nextToken()
3081+
3082+
// Handle = sign
3083+
if p.curTok.Type == TokenEquals {
3084+
p.nextToken() // consume =
3085+
}
3086+
3087+
// Check for credential option (Identity = ..., Secret = ...)
3088+
if strings.ToUpper(optName) == "CREDENTIAL" || strings.ToUpper(optName) == "ERRORFILE_CREDENTIAL" {
3089+
credOpt := &ast.CopyCredentialOption{}
3090+
// Expect (
3091+
if p.curTok.Type == TokenLParen {
3092+
p.nextToken() // consume (
3093+
}
3094+
// Parse Identity = '...'
3095+
for p.curTok.Type != TokenRParen && p.curTok.Type != TokenEOF {
3096+
keyName := strings.ToUpper(p.curTok.Literal)
3097+
p.nextToken()
3098+
if p.curTok.Type == TokenEquals {
3099+
p.nextToken() // consume =
3100+
}
3101+
if keyName == "IDENTITY" {
3102+
strLit, _ := p.parseStringLiteral()
3103+
credOpt.Identity = strLit
3104+
} else if keyName == "SECRET" {
3105+
strLit, _ := p.parseStringLiteral()
3106+
credOpt.Secret = strLit
3107+
}
3108+
if p.curTok.Type == TokenComma {
3109+
p.nextToken()
3110+
} else {
3111+
break
3112+
}
3113+
}
3114+
if p.curTok.Type == TokenRParen {
3115+
p.nextToken() // consume )
3116+
}
3117+
opt.Value = credOpt
3118+
} else {
3119+
// Single value option
3120+
singleOpt := &ast.SingleValueTypeCopyOption{}
3121+
idOrVal := &ast.IdentifierOrValueExpression{}
3122+
3123+
if p.curTok.Type == TokenString || p.curTok.Type == TokenNationalString {
3124+
strLit, _ := p.parseStringLiteral()
3125+
// Extract value without quotes
3126+
val := strLit.Value
3127+
idOrVal.Value = val
3128+
idOrVal.ValueExpression = strLit
3129+
} else if p.curTok.Type == TokenNumber {
3130+
val := p.curTok.Literal
3131+
idOrVal.Value = val
3132+
idOrVal.ValueExpression = &ast.IntegerLiteral{Value: val, LiteralType: "Integer"}
3133+
p.nextToken()
3134+
} else {
3135+
// Identifier value (like FILEFORMAT, GZIP, etc.)
3136+
val := p.curTok.Literal
3137+
idOrVal.Value = val
3138+
idOrVal.Identifier = &ast.Identifier{Value: val, QuoteType: "NotQuoted"}
3139+
p.nextToken()
3140+
}
3141+
singleOpt.SingleValue = idOrVal
3142+
opt.Value = singleOpt
3143+
}
3144+
3145+
return opt, nil
3146+
}
3147+

0 commit comments

Comments
 (0)