sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and not unnest_alias.this 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 TRY_CAST_REQUIRES_STRING = True 498 499 ANNOTATORS = { 500 **Dialect.ANNOTATORS, 501 **{ 502 expr_type: lambda self, e: self._annotate_by_args(e, "this") 503 for expr_type in (exp.Reverse,) 504 }, 505 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 506 } 507 508 TIME_MAPPING = { 509 "YYYY": "%Y", 510 "yyyy": "%Y", 511 "YY": "%y", 512 "yy": "%y", 513 "MMMM": "%B", 514 "mmmm": "%B", 515 "MON": "%b", 516 "mon": "%b", 517 "MM": "%m", 518 "mm": "%m", 519 "DD": "%d", 520 "dd": "%-d", 521 "DY": "%a", 522 "dy": "%w", 523 "HH24": "%H", 524 "hh24": "%H", 525 "HH12": "%I", 526 "hh12": "%I", 527 "MI": "%M", 528 "mi": "%M", 529 "SS": "%S", 530 "ss": "%S", 531 "FF6": "%f", 532 "ff6": "%f", 533 } 534 535 DATE_PART_MAPPING = { 536 **Dialect.DATE_PART_MAPPING, 537 "ISOWEEK": "WEEKISO", 538 } 539 540 def quote_identifier(self, expression: E, identify: bool = True) -> E: 541 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 542 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 543 if ( 544 isinstance(expression, exp.Identifier) 545 and isinstance(expression.parent, exp.Table) 546 and expression.name.lower() == "dual" 547 ): 548 return expression # type: ignore 549 550 return super().quote_identifier(expression, identify=identify) 551 552 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 553 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 554 SINGLE_TOKENS.pop("$") 555 556 class Parser(parser.Parser): 557 IDENTIFY_PIVOT_STRINGS = True 558 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 559 COLON_IS_VARIANT_EXTRACT = True 560 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 561 562 ID_VAR_TOKENS = { 563 *parser.Parser.ID_VAR_TOKENS, 564 TokenType.EXCEPT, 565 TokenType.MATCH_CONDITION, 566 } 567 568 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 569 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 570 571 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 572 573 FUNCTIONS = { 574 **parser.Parser.FUNCTIONS, 575 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 576 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 577 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 578 this=seq_get(args, 1), expression=seq_get(args, 0) 579 ), 580 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 581 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 582 start=seq_get(args, 0), 583 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 584 step=seq_get(args, 2), 585 ), 586 "ARRAY_SORT": exp.SortArray.from_arg_list, 587 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 588 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 589 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 590 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 591 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 592 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 593 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 594 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 595 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 596 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 597 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 598 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 599 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 600 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 601 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 602 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 603 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 604 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 605 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 606 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 607 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 608 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 609 "DATE_TRUNC": _date_trunc_to_time, 610 "DATEADD": _build_date_time_add(exp.DateAdd), 611 "DATEDIFF": _build_datediff, 612 "DIV0": _build_if_from_div0, 613 "EDITDISTANCE": lambda args: exp.Levenshtein( 614 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 615 ), 616 "FLATTEN": exp.Explode.from_arg_list, 617 "GET": exp.GetExtract.from_arg_list, 618 "GET_PATH": lambda args, dialect: exp.JSONExtract( 619 this=seq_get(args, 0), 620 expression=dialect.to_json_path(seq_get(args, 1)), 621 requires_json=True, 622 ), 623 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 624 "IFF": exp.If.from_arg_list, 625 "LAST_DAY": lambda args: exp.LastDay( 626 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 627 ), 628 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 629 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 630 "NULLIFZERO": _build_if_from_nullifzero, 631 "OBJECT_CONSTRUCT": _build_object_construct, 632 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 633 "REGEXP_REPLACE": _build_regexp_replace, 634 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 635 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 636 "REPLACE": build_replace_with_optional_replacement, 637 "RLIKE": exp.RegexpLike.from_arg_list, 638 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 639 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 640 "TIMEADD": _build_date_time_add(exp.TimeAdd), 641 "TIMEDIFF": _build_datediff, 642 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 643 "TIMESTAMPDIFF": _build_datediff, 644 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 645 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 646 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 647 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 648 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 649 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 650 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 651 "TRY_TO_TIMESTAMP": _build_datetime( 652 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 653 ), 654 "TO_CHAR": build_timetostr_or_tochar, 655 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 656 "TO_NUMBER": lambda args: exp.ToNumber( 657 this=seq_get(args, 0), 658 format=seq_get(args, 1), 659 precision=seq_get(args, 2), 660 scale=seq_get(args, 3), 661 ), 662 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 663 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 664 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 665 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 666 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 667 "TO_VARCHAR": exp.ToChar.from_arg_list, 668 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 669 "ZEROIFNULL": _build_if_from_zeroifnull, 670 } 671 672 FUNCTION_PARSERS = { 673 **parser.Parser.FUNCTION_PARSERS, 674 "DATE_PART": lambda self: self._parse_date_part(), 675 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 676 "LISTAGG": lambda self: self._parse_string_agg(), 677 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 678 } 679 FUNCTION_PARSERS.pop("TRIM") 680 681 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 682 683 ALTER_PARSERS = { 684 **parser.Parser.ALTER_PARSERS, 685 "SESSION": lambda self: self._parse_alter_session(), 686 "UNSET": lambda self: self.expression( 687 exp.Set, 688 tag=self._match_text_seq("TAG"), 689 expressions=self._parse_csv(self._parse_id_var), 690 unset=True, 691 ), 692 } 693 694 STATEMENT_PARSERS = { 695 **parser.Parser.STATEMENT_PARSERS, 696 TokenType.GET: lambda self: self._parse_get(), 697 TokenType.PUT: lambda self: self._parse_put(), 698 TokenType.SHOW: lambda self: self._parse_show(), 699 } 700 701 PROPERTY_PARSERS = { 702 **parser.Parser.PROPERTY_PARSERS, 703 "CREDENTIALS": lambda self: self._parse_credentials_property(), 704 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 705 "LOCATION": lambda self: self._parse_location_property(), 706 "TAG": lambda self: self._parse_tag(), 707 "USING": lambda self: self._match_text_seq("TEMPLATE") 708 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 709 } 710 711 TYPE_CONVERTERS = { 712 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 713 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 714 } 715 716 SHOW_PARSERS = { 717 "DATABASES": _show_parser("DATABASES"), 718 "TERSE DATABASES": _show_parser("DATABASES"), 719 "SCHEMAS": _show_parser("SCHEMAS"), 720 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 721 "OBJECTS": _show_parser("OBJECTS"), 722 "TERSE OBJECTS": _show_parser("OBJECTS"), 723 "TABLES": _show_parser("TABLES"), 724 "TERSE TABLES": _show_parser("TABLES"), 725 "VIEWS": _show_parser("VIEWS"), 726 "TERSE VIEWS": _show_parser("VIEWS"), 727 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 728 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 729 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 730 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 731 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 732 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 733 "SEQUENCES": _show_parser("SEQUENCES"), 734 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 735 "STAGES": _show_parser("STAGES"), 736 "COLUMNS": _show_parser("COLUMNS"), 737 "USERS": _show_parser("USERS"), 738 "TERSE USERS": _show_parser("USERS"), 739 "FILE FORMATS": _show_parser("FILE FORMATS"), 740 "FUNCTIONS": _show_parser("FUNCTIONS"), 741 "PROCEDURES": _show_parser("PROCEDURES"), 742 "WAREHOUSES": _show_parser("WAREHOUSES"), 743 } 744 745 CONSTRAINT_PARSERS = { 746 **parser.Parser.CONSTRAINT_PARSERS, 747 "WITH": lambda self: self._parse_with_constraint(), 748 "MASKING": lambda self: self._parse_with_constraint(), 749 "PROJECTION": lambda self: self._parse_with_constraint(), 750 "TAG": lambda self: self._parse_with_constraint(), 751 } 752 753 STAGED_FILE_SINGLE_TOKENS = { 754 TokenType.DOT, 755 TokenType.MOD, 756 TokenType.SLASH, 757 } 758 759 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 760 761 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 762 763 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 764 765 LAMBDAS = { 766 **parser.Parser.LAMBDAS, 767 TokenType.ARROW: lambda self, expressions: self.expression( 768 exp.Lambda, 769 this=self._replace_lambda( 770 self._parse_assignment(), 771 expressions, 772 ), 773 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 774 ), 775 } 776 777 def _parse_use(self) -> exp.Use: 778 if self._match_text_seq("SECONDARY", "ROLES"): 779 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 780 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 781 return self.expression( 782 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 783 ) 784 785 return super()._parse_use() 786 787 def _negate_range( 788 self, this: t.Optional[exp.Expression] = None 789 ) -> t.Optional[exp.Expression]: 790 if not this: 791 return this 792 793 query = this.args.get("query") 794 if isinstance(this, exp.In) and isinstance(query, exp.Query): 795 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 796 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 797 # which can produce different results (most likely a SnowFlake bug). 798 # 799 # https://docs.snowflake.com/en/sql-reference/functions/in 800 # Context: https://github.com/tobymao/sqlglot/issues/3890 801 return self.expression( 802 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 803 ) 804 805 return self.expression(exp.Not, this=this) 806 807 def _parse_tag(self) -> exp.Tags: 808 return self.expression( 809 exp.Tags, 810 expressions=self._parse_wrapped_csv(self._parse_property), 811 ) 812 813 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 814 if self._prev.token_type != TokenType.WITH: 815 self._retreat(self._index - 1) 816 817 if self._match_text_seq("MASKING", "POLICY"): 818 policy = self._parse_column() 819 return self.expression( 820 exp.MaskingPolicyColumnConstraint, 821 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 822 expressions=self._match(TokenType.USING) 823 and self._parse_wrapped_csv(self._parse_id_var), 824 ) 825 if self._match_text_seq("PROJECTION", "POLICY"): 826 policy = self._parse_column() 827 return self.expression( 828 exp.ProjectionPolicyColumnConstraint, 829 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 830 ) 831 if self._match(TokenType.TAG): 832 return self._parse_tag() 833 834 return None 835 836 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 837 if self._match(TokenType.TAG): 838 return self._parse_tag() 839 840 return super()._parse_with_property() 841 842 def _parse_create(self) -> exp.Create | exp.Command: 843 expression = super()._parse_create() 844 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 845 # Replace the Table node with the enclosed Identifier 846 expression.this.replace(expression.this.this) 847 848 return expression 849 850 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 851 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 852 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 853 this = self._parse_var() or self._parse_type() 854 855 if not this: 856 return None 857 858 self._match(TokenType.COMMA) 859 expression = self._parse_bitwise() 860 this = map_date_part(this) 861 name = this.name.upper() 862 863 if name.startswith("EPOCH"): 864 if name == "EPOCH_MILLISECOND": 865 scale = 10**3 866 elif name == "EPOCH_MICROSECOND": 867 scale = 10**6 868 elif name == "EPOCH_NANOSECOND": 869 scale = 10**9 870 else: 871 scale = None 872 873 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 874 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 875 876 if scale: 877 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 878 879 return to_unix 880 881 return self.expression(exp.Extract, this=this, expression=expression) 882 883 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 884 if is_map: 885 # Keys are strings in Snowflake's objects, see also: 886 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 887 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 888 return self._parse_slice(self._parse_string()) 889 890 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 891 892 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 893 lateral = super()._parse_lateral() 894 if not lateral: 895 return lateral 896 897 if isinstance(lateral.this, exp.Explode): 898 table_alias = lateral.args.get("alias") 899 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 900 if table_alias and not table_alias.args.get("columns"): 901 table_alias.set("columns", columns) 902 elif not table_alias: 903 exp.alias_(lateral, "_flattened", table=columns, copy=False) 904 905 return lateral 906 907 def _parse_table_parts( 908 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 909 ) -> exp.Table: 910 # https://docs.snowflake.com/en/user-guide/querying-stage 911 if self._match(TokenType.STRING, advance=False): 912 table = self._parse_string() 913 elif self._match_text_seq("@", advance=False): 914 table = self._parse_location_path() 915 else: 916 table = None 917 918 if table: 919 file_format = None 920 pattern = None 921 922 wrapped = self._match(TokenType.L_PAREN) 923 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 924 if self._match_text_seq("FILE_FORMAT", "=>"): 925 file_format = self._parse_string() or super()._parse_table_parts( 926 is_db_reference=is_db_reference 927 ) 928 elif self._match_text_seq("PATTERN", "=>"): 929 pattern = self._parse_string() 930 else: 931 break 932 933 self._match(TokenType.COMMA) 934 935 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 936 else: 937 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 938 939 return table 940 941 def _parse_table( 942 self, 943 schema: bool = False, 944 joins: bool = False, 945 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 946 parse_bracket: bool = False, 947 is_db_reference: bool = False, 948 parse_partition: bool = False, 949 consume_pipe: bool = False, 950 ) -> t.Optional[exp.Expression]: 951 table = super()._parse_table( 952 schema=schema, 953 joins=joins, 954 alias_tokens=alias_tokens, 955 parse_bracket=parse_bracket, 956 is_db_reference=is_db_reference, 957 parse_partition=parse_partition, 958 ) 959 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 960 table_from_rows = table.this 961 for arg in exp.TableFromRows.arg_types: 962 if arg != "this": 963 table_from_rows.set(arg, table.args.get(arg)) 964 965 table = table_from_rows 966 967 return table 968 969 def _parse_id_var( 970 self, 971 any_token: bool = True, 972 tokens: t.Optional[t.Collection[TokenType]] = None, 973 ) -> t.Optional[exp.Expression]: 974 if self._match_text_seq("IDENTIFIER", "("): 975 identifier = ( 976 super()._parse_id_var(any_token=any_token, tokens=tokens) 977 or self._parse_string() 978 ) 979 self._match_r_paren() 980 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 981 982 return super()._parse_id_var(any_token=any_token, tokens=tokens) 983 984 def _parse_show_snowflake(self, this: str) -> exp.Show: 985 scope = None 986 scope_kind = None 987 988 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 989 # which is syntactically valid but has no effect on the output 990 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 991 992 history = self._match_text_seq("HISTORY") 993 994 like = self._parse_string() if self._match(TokenType.LIKE) else None 995 996 if self._match(TokenType.IN): 997 if self._match_text_seq("ACCOUNT"): 998 scope_kind = "ACCOUNT" 999 elif self._match_text_seq("CLASS"): 1000 scope_kind = "CLASS" 1001 scope = self._parse_table_parts() 1002 elif self._match_text_seq("APPLICATION"): 1003 scope_kind = "APPLICATION" 1004 if self._match_text_seq("PACKAGE"): 1005 scope_kind += " PACKAGE" 1006 scope = self._parse_table_parts() 1007 elif self._match_set(self.DB_CREATABLES): 1008 scope_kind = self._prev.text.upper() 1009 if self._curr: 1010 scope = self._parse_table_parts() 1011 elif self._curr: 1012 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1013 scope = self._parse_table_parts() 1014 1015 return self.expression( 1016 exp.Show, 1017 **{ 1018 "terse": terse, 1019 "this": this, 1020 "history": history, 1021 "like": like, 1022 "scope": scope, 1023 "scope_kind": scope_kind, 1024 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1025 "limit": self._parse_limit(), 1026 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1027 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1028 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1029 }, 1030 ) 1031 1032 def _parse_put(self) -> exp.Put | exp.Command: 1033 if self._curr.token_type != TokenType.STRING: 1034 return self._parse_as_command(self._prev) 1035 1036 return self.expression( 1037 exp.Put, 1038 this=self._parse_string(), 1039 target=self._parse_location_path(), 1040 properties=self._parse_properties(), 1041 ) 1042 1043 def _parse_get(self) -> t.Optional[exp.Expression]: 1044 start = self._prev 1045 1046 # If we detect GET( then we need to parse a function, not a statement 1047 if self._match(TokenType.L_PAREN): 1048 self._retreat(self._index - 2) 1049 return self._parse_expression() 1050 1051 target = self._parse_location_path() 1052 1053 # Parse as command if unquoted file path 1054 if self._curr.token_type == TokenType.URI_START: 1055 return self._parse_as_command(start) 1056 1057 return self.expression( 1058 exp.Get, 1059 this=self._parse_string(), 1060 target=target, 1061 properties=self._parse_properties(), 1062 ) 1063 1064 def _parse_location_property(self) -> exp.LocationProperty: 1065 self._match(TokenType.EQ) 1066 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1067 1068 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1069 # Parse either a subquery or a staged file 1070 return ( 1071 self._parse_select(table=True, parse_subquery_alias=False) 1072 if self._match(TokenType.L_PAREN, advance=False) 1073 else self._parse_table_parts() 1074 ) 1075 1076 def _parse_location_path(self) -> exp.Var: 1077 start = self._curr 1078 self._advance_any(ignore_reserved=True) 1079 1080 # We avoid consuming a comma token because external tables like @foo and @bar 1081 # can be joined in a query with a comma separator, as well as closing paren 1082 # in case of subqueries 1083 while self._is_connected() and not self._match_set( 1084 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1085 ): 1086 self._advance_any(ignore_reserved=True) 1087 1088 return exp.var(self._find_sql(start, self._prev)) 1089 1090 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1091 this = super()._parse_lambda_arg() 1092 1093 if not this: 1094 return this 1095 1096 typ = self._parse_types() 1097 1098 if typ: 1099 return self.expression(exp.Cast, this=this, to=typ) 1100 1101 return this 1102 1103 def _parse_foreign_key(self) -> exp.ForeignKey: 1104 # inlineFK, the REFERENCES columns are implied 1105 if self._match(TokenType.REFERENCES, advance=False): 1106 return self.expression(exp.ForeignKey) 1107 1108 # outoflineFK, explicitly names the columns 1109 return super()._parse_foreign_key() 1110 1111 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1112 self._match(TokenType.EQ) 1113 if self._match(TokenType.L_PAREN, advance=False): 1114 expressions = self._parse_wrapped_options() 1115 else: 1116 expressions = [self._parse_format_name()] 1117 1118 return self.expression( 1119 exp.FileFormatProperty, 1120 expressions=expressions, 1121 ) 1122 1123 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1124 return self.expression( 1125 exp.CredentialsProperty, 1126 expressions=self._parse_wrapped_options(), 1127 ) 1128 1129 def _parse_semantic_view(self) -> exp.SemanticView: 1130 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1131 1132 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1133 if self._match_text_seq("DIMENSIONS"): 1134 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1135 if self._match_text_seq("METRICS"): 1136 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1137 if self._match_text_seq("WHERE"): 1138 kwargs["where"] = self._parse_expression() 1139 1140 return self.expression(exp.SemanticView, **kwargs) 1141 1142 class Tokenizer(tokens.Tokenizer): 1143 STRING_ESCAPES = ["\\", "'"] 1144 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1145 RAW_STRINGS = ["$$"] 1146 COMMENTS = ["--", "//", ("/*", "*/")] 1147 NESTED_COMMENTS = False 1148 1149 KEYWORDS = { 1150 **tokens.Tokenizer.KEYWORDS, 1151 "BYTEINT": TokenType.INT, 1152 "FILE://": TokenType.URI_START, 1153 "FILE FORMAT": TokenType.FILE_FORMAT, 1154 "GET": TokenType.GET, 1155 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1156 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1157 "MINUS": TokenType.EXCEPT, 1158 "NCHAR VARYING": TokenType.VARCHAR, 1159 "PUT": TokenType.PUT, 1160 "REMOVE": TokenType.COMMAND, 1161 "RM": TokenType.COMMAND, 1162 "SAMPLE": TokenType.TABLE_SAMPLE, 1163 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1164 "SQL_DOUBLE": TokenType.DOUBLE, 1165 "SQL_VARCHAR": TokenType.VARCHAR, 1166 "STAGE": TokenType.STAGE, 1167 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1168 "STREAMLIT": TokenType.STREAMLIT, 1169 "TAG": TokenType.TAG, 1170 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1171 "TOP": TokenType.TOP, 1172 "WAREHOUSE": TokenType.WAREHOUSE, 1173 } 1174 KEYWORDS.pop("/*+") 1175 1176 SINGLE_TOKENS = { 1177 **tokens.Tokenizer.SINGLE_TOKENS, 1178 "$": TokenType.PARAMETER, 1179 } 1180 1181 VAR_SINGLE_TOKENS = {"$"} 1182 1183 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1184 1185 class Generator(generator.Generator): 1186 PARAMETER_TOKEN = "$" 1187 MATCHED_BY_SOURCE = False 1188 SINGLE_STRING_INTERVAL = True 1189 JOIN_HINTS = False 1190 TABLE_HINTS = False 1191 QUERY_HINTS = False 1192 AGGREGATE_FILTER_SUPPORTED = False 1193 SUPPORTS_TABLE_COPY = False 1194 COLLATE_IS_FUNC = True 1195 LIMIT_ONLY_LITERALS = True 1196 JSON_KEY_VALUE_PAIR_SEP = "," 1197 INSERT_OVERWRITE = " OVERWRITE INTO" 1198 STRUCT_DELIMITER = ("(", ")") 1199 COPY_PARAMS_ARE_WRAPPED = False 1200 COPY_PARAMS_EQ_REQUIRED = True 1201 STAR_EXCEPT = "EXCLUDE" 1202 SUPPORTS_EXPLODING_PROJECTIONS = False 1203 ARRAY_CONCAT_IS_VAR_LEN = False 1204 SUPPORTS_CONVERT_TIMEZONE = True 1205 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1206 SUPPORTS_MEDIAN = True 1207 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1208 SUPPORTS_DECODE_CASE = True 1209 IS_BOOL_ALLOWED = False 1210 1211 TRANSFORMS = { 1212 **generator.Generator.TRANSFORMS, 1213 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1214 exp.ArgMax: rename_func("MAX_BY"), 1215 exp.ArgMin: rename_func("MIN_BY"), 1216 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1217 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1218 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1219 exp.AtTimeZone: lambda self, e: self.func( 1220 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1221 ), 1222 exp.BitwiseOr: rename_func("BITOR"), 1223 exp.BitwiseXor: rename_func("BITXOR"), 1224 exp.BitwiseAnd: rename_func("BITAND"), 1225 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1226 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1227 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1228 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1229 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1230 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1231 exp.DateAdd: date_delta_sql("DATEADD"), 1232 exp.DateDiff: date_delta_sql("DATEDIFF"), 1233 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1234 exp.DatetimeDiff: timestampdiff_sql, 1235 exp.DateStrToDate: datestrtodate_sql, 1236 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1237 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1238 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1239 exp.DayOfYear: rename_func("DAYOFYEAR"), 1240 exp.Explode: rename_func("FLATTEN"), 1241 exp.Extract: lambda self, e: self.func( 1242 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1243 ), 1244 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1245 exp.FileFormatProperty: lambda self, 1246 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1247 exp.FromTimeZone: lambda self, e: self.func( 1248 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1249 ), 1250 exp.GenerateSeries: lambda self, e: self.func( 1251 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1252 ), 1253 exp.GetExtract: rename_func("GET"), 1254 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1255 exp.If: if_sql(name="IFF", false_value="NULL"), 1256 exp.JSONExtractArray: _json_extract_value_array_sql, 1257 exp.JSONExtractScalar: lambda self, e: self.func( 1258 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1259 ), 1260 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1261 exp.JSONPathRoot: lambda *_: "", 1262 exp.JSONValueArray: _json_extract_value_array_sql, 1263 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1264 rename_func("EDITDISTANCE") 1265 ), 1266 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1267 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1268 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1269 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1270 exp.MakeInterval: no_make_interval_sql, 1271 exp.Max: max_or_greatest, 1272 exp.Min: min_or_least, 1273 exp.ParseJSON: lambda self, e: self.func( 1274 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1275 ), 1276 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1277 exp.PercentileCont: transforms.preprocess( 1278 [transforms.add_within_group_for_percentiles] 1279 ), 1280 exp.PercentileDisc: transforms.preprocess( 1281 [transforms.add_within_group_for_percentiles] 1282 ), 1283 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1284 exp.RegexpExtract: _regexpextract_sql, 1285 exp.RegexpExtractAll: _regexpextract_sql, 1286 exp.RegexpILike: _regexpilike_sql, 1287 exp.Rand: rename_func("RANDOM"), 1288 exp.Select: transforms.preprocess( 1289 [ 1290 transforms.eliminate_window_clause, 1291 transforms.eliminate_distinct_on, 1292 transforms.explode_projection_to_unnest(), 1293 transforms.eliminate_semi_and_anti_joins, 1294 _transform_generate_date_array, 1295 _qualify_unnested_columns, 1296 _eliminate_dot_variant_lookup, 1297 ] 1298 ), 1299 exp.SHA: rename_func("SHA1"), 1300 exp.SortArray: rename_func("ARRAY_SORT"), 1301 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1302 exp.StartsWith: rename_func("STARTSWITH"), 1303 exp.EndsWith: rename_func("ENDSWITH"), 1304 exp.StrPosition: lambda self, e: strposition_sql( 1305 self, e, func_name="CHARINDEX", supports_position=True 1306 ), 1307 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1308 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1309 exp.Stuff: rename_func("INSERT"), 1310 exp.StPoint: rename_func("ST_MAKEPOINT"), 1311 exp.TimeAdd: date_delta_sql("TIMEADD"), 1312 exp.Timestamp: no_timestamp_sql, 1313 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1314 exp.TimestampDiff: lambda self, e: self.func( 1315 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1316 ), 1317 exp.TimestampTrunc: timestamptrunc_sql(), 1318 exp.TimeStrToTime: timestrtotime_sql, 1319 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1320 exp.ToArray: rename_func("TO_ARRAY"), 1321 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1322 exp.ToDouble: rename_func("TO_DOUBLE"), 1323 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1324 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1325 exp.TsOrDsToDate: lambda self, e: self.func( 1326 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1327 ), 1328 exp.TsOrDsToTime: lambda self, e: self.func( 1329 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1330 ), 1331 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1332 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1333 exp.Uuid: rename_func("UUID_STRING"), 1334 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1335 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1336 exp.Xor: rename_func("BOOLXOR"), 1337 } 1338 1339 SUPPORTED_JSON_PATH_PARTS = { 1340 exp.JSONPathKey, 1341 exp.JSONPathRoot, 1342 exp.JSONPathSubscript, 1343 } 1344 1345 TYPE_MAPPING = { 1346 **generator.Generator.TYPE_MAPPING, 1347 exp.DataType.Type.NESTED: "OBJECT", 1348 exp.DataType.Type.STRUCT: "OBJECT", 1349 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1350 } 1351 1352 TOKEN_MAPPING = { 1353 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1354 } 1355 1356 PROPERTIES_LOCATION = { 1357 **generator.Generator.PROPERTIES_LOCATION, 1358 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1359 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1360 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1361 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1362 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1363 } 1364 1365 UNSUPPORTED_VALUES_EXPRESSIONS = { 1366 exp.Map, 1367 exp.StarMap, 1368 exp.Struct, 1369 exp.VarMap, 1370 } 1371 1372 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1373 1374 def with_properties(self, properties: exp.Properties) -> str: 1375 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1376 1377 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1378 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1379 values_as_table = False 1380 1381 return super().values_sql(expression, values_as_table=values_as_table) 1382 1383 def datatype_sql(self, expression: exp.DataType) -> str: 1384 expressions = expression.expressions 1385 if ( 1386 expressions 1387 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1388 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1389 ): 1390 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1391 return "OBJECT" 1392 1393 return super().datatype_sql(expression) 1394 1395 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1396 return self.func( 1397 "TO_NUMBER", 1398 expression.this, 1399 expression.args.get("format"), 1400 expression.args.get("precision"), 1401 expression.args.get("scale"), 1402 ) 1403 1404 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1405 milli = expression.args.get("milli") 1406 if milli is not None: 1407 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1408 expression.set("nano", milli_to_nano) 1409 1410 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1411 1412 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1413 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1414 return self.func("TO_GEOGRAPHY", expression.this) 1415 if expression.is_type(exp.DataType.Type.GEOMETRY): 1416 return self.func("TO_GEOMETRY", expression.this) 1417 1418 return super().cast_sql(expression, safe_prefix=safe_prefix) 1419 1420 def trycast_sql(self, expression: exp.TryCast) -> str: 1421 value = expression.this 1422 1423 if value.type is None: 1424 from sqlglot.optimizer.annotate_types import annotate_types 1425 1426 value = annotate_types(value, dialect=self.dialect) 1427 1428 # Snowflake requires that TRY_CAST's value be a string 1429 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1430 # if we can deduce that the value is a string, then we can generate TRY_CAST 1431 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1432 return super().trycast_sql(expression) 1433 1434 return self.cast_sql(expression) 1435 1436 def log_sql(self, expression: exp.Log) -> str: 1437 if not expression.expression: 1438 return self.func("LN", expression.this) 1439 1440 return super().log_sql(expression) 1441 1442 def unnest_sql(self, expression: exp.Unnest) -> str: 1443 unnest_alias = expression.args.get("alias") 1444 offset = expression.args.get("offset") 1445 1446 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1447 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1448 1449 columns = [ 1450 exp.to_identifier("seq"), 1451 exp.to_identifier("key"), 1452 exp.to_identifier("path"), 1453 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1454 value, 1455 exp.to_identifier("this"), 1456 ] 1457 1458 if unnest_alias: 1459 unnest_alias.set("columns", columns) 1460 else: 1461 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1462 1463 table_input = self.sql(expression.expressions[0]) 1464 if not table_input.startswith("INPUT =>"): 1465 table_input = f"INPUT => {table_input}" 1466 1467 expression_parent = expression.parent 1468 1469 explode = ( 1470 f"FLATTEN({table_input})" 1471 if isinstance(expression_parent, exp.Lateral) 1472 else f"TABLE(FLATTEN({table_input}))" 1473 ) 1474 alias = self.sql(unnest_alias) 1475 alias = f" AS {alias}" if alias else "" 1476 value = ( 1477 "" 1478 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1479 else f"{value} FROM " 1480 ) 1481 1482 return f"{value}{explode}{alias}" 1483 1484 def show_sql(self, expression: exp.Show) -> str: 1485 terse = "TERSE " if expression.args.get("terse") else "" 1486 history = " HISTORY" if expression.args.get("history") else "" 1487 like = self.sql(expression, "like") 1488 like = f" LIKE {like}" if like else "" 1489 1490 scope = self.sql(expression, "scope") 1491 scope = f" {scope}" if scope else "" 1492 1493 scope_kind = self.sql(expression, "scope_kind") 1494 if scope_kind: 1495 scope_kind = f" IN {scope_kind}" 1496 1497 starts_with = self.sql(expression, "starts_with") 1498 if starts_with: 1499 starts_with = f" STARTS WITH {starts_with}" 1500 1501 limit = self.sql(expression, "limit") 1502 1503 from_ = self.sql(expression, "from") 1504 if from_: 1505 from_ = f" FROM {from_}" 1506 1507 privileges = self.expressions(expression, key="privileges", flat=True) 1508 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1509 1510 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1511 1512 def describe_sql(self, expression: exp.Describe) -> str: 1513 # Default to table if kind is unknown 1514 kind_value = expression.args.get("kind") or "TABLE" 1515 kind = f" {kind_value}" if kind_value else "" 1516 this = f" {self.sql(expression, 'this')}" 1517 expressions = self.expressions(expression, flat=True) 1518 expressions = f" {expressions}" if expressions else "" 1519 return f"DESCRIBE{kind}{this}{expressions}" 1520 1521 def generatedasidentitycolumnconstraint_sql( 1522 self, expression: exp.GeneratedAsIdentityColumnConstraint 1523 ) -> str: 1524 start = expression.args.get("start") 1525 start = f" START {start}" if start else "" 1526 increment = expression.args.get("increment") 1527 increment = f" INCREMENT {increment}" if increment else "" 1528 1529 order = expression.args.get("order") 1530 if order is not None: 1531 order_clause = " ORDER" if order else " NOORDER" 1532 else: 1533 order_clause = "" 1534 1535 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1536 1537 def cluster_sql(self, expression: exp.Cluster) -> str: 1538 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1539 1540 def struct_sql(self, expression: exp.Struct) -> str: 1541 keys = [] 1542 values = [] 1543 1544 for i, e in enumerate(expression.expressions): 1545 if isinstance(e, exp.PropertyEQ): 1546 keys.append( 1547 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1548 ) 1549 values.append(e.expression) 1550 else: 1551 keys.append(exp.Literal.string(f"_{i}")) 1552 values.append(e) 1553 1554 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1555 1556 @unsupported_args("weight", "accuracy") 1557 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1558 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1559 1560 def alterset_sql(self, expression: exp.AlterSet) -> str: 1561 exprs = self.expressions(expression, flat=True) 1562 exprs = f" {exprs}" if exprs else "" 1563 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1564 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1565 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1566 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1567 tag = self.expressions(expression, key="tag", flat=True) 1568 tag = f" TAG {tag}" if tag else "" 1569 1570 return f"SET{exprs}{file_format}{copy_options}{tag}" 1571 1572 def strtotime_sql(self, expression: exp.StrToTime): 1573 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1574 return self.func( 1575 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1576 ) 1577 1578 def timestampsub_sql(self, expression: exp.TimestampSub): 1579 return self.sql( 1580 exp.TimestampAdd( 1581 this=expression.this, 1582 expression=expression.expression * -1, 1583 unit=expression.unit, 1584 ) 1585 ) 1586 1587 def jsonextract_sql(self, expression: exp.JSONExtract): 1588 this = expression.this 1589 1590 # JSON strings are valid coming from other dialects such as BQ so 1591 # for these cases we PARSE_JSON preemptively 1592 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1593 "requires_json" 1594 ): 1595 this = exp.ParseJSON(this=this) 1596 1597 return self.func( 1598 "GET_PATH", 1599 this, 1600 expression.expression, 1601 ) 1602 1603 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1604 this = expression.this 1605 if this.is_string: 1606 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1607 1608 return self.func("TO_CHAR", this, self.format_time(expression)) 1609 1610 def datesub_sql(self, expression: exp.DateSub) -> str: 1611 value = expression.expression 1612 if value: 1613 value.replace(value * (-1)) 1614 else: 1615 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1616 1617 return date_delta_sql("DATEADD")(self, expression) 1618 1619 def select_sql(self, expression: exp.Select) -> str: 1620 limit = expression.args.get("limit") 1621 offset = expression.args.get("offset") 1622 if offset and not limit: 1623 expression.limit(exp.Null(), copy=False) 1624 return super().select_sql(expression) 1625 1626 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1627 is_materialized = expression.find(exp.MaterializedProperty) 1628 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1629 1630 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1631 # For materialized views, COPY GRANTS is located *before* the columns list 1632 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1633 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1634 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1635 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1636 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1637 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1638 1639 this_name = self.sql(expression.this, "this") 1640 copy_grants = self.sql(copy_grants_property) 1641 this_schema = self.schema_columns_sql(expression.this) 1642 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1643 1644 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1645 1646 return super().createable_sql(expression, locations) 1647 1648 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1649 this = expression.this 1650 1651 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1652 # and add it later as part of the WITHIN GROUP clause 1653 order = this if isinstance(this, exp.Order) else None 1654 if order: 1655 expression.set("this", order.this.pop()) 1656 1657 expr_sql = super().arrayagg_sql(expression) 1658 1659 if order: 1660 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1661 1662 return expr_sql 1663 1664 def array_sql(self, expression: exp.Array) -> str: 1665 expressions = expression.expressions 1666 1667 first_expr = seq_get(expressions, 0) 1668 if isinstance(first_expr, exp.Select): 1669 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1670 if first_expr.text("kind").upper() == "STRUCT": 1671 object_construct_args = [] 1672 for expr in first_expr.expressions: 1673 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1674 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1675 name = expr.this if isinstance(expr, exp.Alias) else expr 1676 1677 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1678 1679 array_agg = exp.ArrayAgg( 1680 this=_build_object_construct(args=object_construct_args) 1681 ) 1682 1683 first_expr.set("kind", None) 1684 first_expr.set("expressions", [array_agg]) 1685 1686 return self.sql(first_expr.subquery()) 1687 1688 return inline_array_sql(self, expression) 1689 1690 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1691 zone = self.sql(expression, "this") 1692 if not zone: 1693 return super().currentdate_sql(expression) 1694 1695 expr = exp.Cast( 1696 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1697 to=exp.DataType(this=exp.DataType.Type.DATE), 1698 ) 1699 return self.sql(expr) 1700 1701 def dot_sql(self, expression: exp.Dot) -> str: 1702 this = expression.this 1703 1704 if not this.type: 1705 from sqlglot.optimizer.annotate_types import annotate_types 1706 1707 this = annotate_types(this, dialect=self.dialect) 1708 1709 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1710 # Generate colon notation for the top level STRUCT 1711 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1712 1713 return super().dot_sql(expression)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 TRY_CAST_REQUIRES_STRING = True 499 500 ANNOTATORS = { 501 **Dialect.ANNOTATORS, 502 **{ 503 expr_type: lambda self, e: self._annotate_by_args(e, "this") 504 for expr_type in (exp.Reverse,) 505 }, 506 exp.ConcatWs: lambda self, e: self._annotate_by_args(e, "expressions"), 507 } 508 509 TIME_MAPPING = { 510 "YYYY": "%Y", 511 "yyyy": "%Y", 512 "YY": "%y", 513 "yy": "%y", 514 "MMMM": "%B", 515 "mmmm": "%B", 516 "MON": "%b", 517 "mon": "%b", 518 "MM": "%m", 519 "mm": "%m", 520 "DD": "%d", 521 "dd": "%-d", 522 "DY": "%a", 523 "dy": "%w", 524 "HH24": "%H", 525 "hh24": "%H", 526 "HH12": "%I", 527 "hh12": "%I", 528 "MI": "%M", 529 "mi": "%M", 530 "SS": "%S", 531 "ss": "%S", 532 "FF6": "%f", 533 "ff6": "%f", 534 } 535 536 DATE_PART_MAPPING = { 537 **Dialect.DATE_PART_MAPPING, 538 "ISOWEEK": "WEEKISO", 539 } 540 541 def quote_identifier(self, expression: E, identify: bool = True) -> E: 542 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 543 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 544 if ( 545 isinstance(expression, exp.Identifier) 546 and isinstance(expression.parent, exp.Table) 547 and expression.name.lower() == "dual" 548 ): 549 return expression # type: ignore 550 551 return super().quote_identifier(expression, identify=identify) 552 553 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 554 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 555 SINGLE_TOKENS.pop("$") 556 557 class Parser(parser.Parser): 558 IDENTIFY_PIVOT_STRINGS = True 559 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 560 COLON_IS_VARIANT_EXTRACT = True 561 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 562 563 ID_VAR_TOKENS = { 564 *parser.Parser.ID_VAR_TOKENS, 565 TokenType.EXCEPT, 566 TokenType.MATCH_CONDITION, 567 } 568 569 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 570 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 571 572 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 573 574 FUNCTIONS = { 575 **parser.Parser.FUNCTIONS, 576 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 577 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 578 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 579 this=seq_get(args, 1), expression=seq_get(args, 0) 580 ), 581 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 582 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 583 start=seq_get(args, 0), 584 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 585 step=seq_get(args, 2), 586 ), 587 "ARRAY_SORT": exp.SortArray.from_arg_list, 588 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 589 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 590 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 591 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 592 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 593 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 594 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 595 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 596 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 597 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 598 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 599 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 600 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 601 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 602 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 603 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 604 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 605 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 606 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 607 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 608 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 609 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 610 "DATE_TRUNC": _date_trunc_to_time, 611 "DATEADD": _build_date_time_add(exp.DateAdd), 612 "DATEDIFF": _build_datediff, 613 "DIV0": _build_if_from_div0, 614 "EDITDISTANCE": lambda args: exp.Levenshtein( 615 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 616 ), 617 "FLATTEN": exp.Explode.from_arg_list, 618 "GET": exp.GetExtract.from_arg_list, 619 "GET_PATH": lambda args, dialect: exp.JSONExtract( 620 this=seq_get(args, 0), 621 expression=dialect.to_json_path(seq_get(args, 1)), 622 requires_json=True, 623 ), 624 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 625 "IFF": exp.If.from_arg_list, 626 "LAST_DAY": lambda args: exp.LastDay( 627 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 628 ), 629 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 630 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 631 "NULLIFZERO": _build_if_from_nullifzero, 632 "OBJECT_CONSTRUCT": _build_object_construct, 633 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 634 "REGEXP_REPLACE": _build_regexp_replace, 635 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 636 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 637 "REPLACE": build_replace_with_optional_replacement, 638 "RLIKE": exp.RegexpLike.from_arg_list, 639 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 640 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 641 "TIMEADD": _build_date_time_add(exp.TimeAdd), 642 "TIMEDIFF": _build_datediff, 643 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 644 "TIMESTAMPDIFF": _build_datediff, 645 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 646 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 647 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 648 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 649 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 650 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 651 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 652 "TRY_TO_TIMESTAMP": _build_datetime( 653 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 654 ), 655 "TO_CHAR": build_timetostr_or_tochar, 656 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 657 "TO_NUMBER": lambda args: exp.ToNumber( 658 this=seq_get(args, 0), 659 format=seq_get(args, 1), 660 precision=seq_get(args, 2), 661 scale=seq_get(args, 3), 662 ), 663 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 664 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 665 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 666 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 667 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 668 "TO_VARCHAR": exp.ToChar.from_arg_list, 669 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 670 "ZEROIFNULL": _build_if_from_zeroifnull, 671 } 672 673 FUNCTION_PARSERS = { 674 **parser.Parser.FUNCTION_PARSERS, 675 "DATE_PART": lambda self: self._parse_date_part(), 676 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 677 "LISTAGG": lambda self: self._parse_string_agg(), 678 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 679 } 680 FUNCTION_PARSERS.pop("TRIM") 681 682 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 683 684 ALTER_PARSERS = { 685 **parser.Parser.ALTER_PARSERS, 686 "SESSION": lambda self: self._parse_alter_session(), 687 "UNSET": lambda self: self.expression( 688 exp.Set, 689 tag=self._match_text_seq("TAG"), 690 expressions=self._parse_csv(self._parse_id_var), 691 unset=True, 692 ), 693 } 694 695 STATEMENT_PARSERS = { 696 **parser.Parser.STATEMENT_PARSERS, 697 TokenType.GET: lambda self: self._parse_get(), 698 TokenType.PUT: lambda self: self._parse_put(), 699 TokenType.SHOW: lambda self: self._parse_show(), 700 } 701 702 PROPERTY_PARSERS = { 703 **parser.Parser.PROPERTY_PARSERS, 704 "CREDENTIALS": lambda self: self._parse_credentials_property(), 705 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 706 "LOCATION": lambda self: self._parse_location_property(), 707 "TAG": lambda self: self._parse_tag(), 708 "USING": lambda self: self._match_text_seq("TEMPLATE") 709 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 710 } 711 712 TYPE_CONVERTERS = { 713 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 714 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 715 } 716 717 SHOW_PARSERS = { 718 "DATABASES": _show_parser("DATABASES"), 719 "TERSE DATABASES": _show_parser("DATABASES"), 720 "SCHEMAS": _show_parser("SCHEMAS"), 721 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 722 "OBJECTS": _show_parser("OBJECTS"), 723 "TERSE OBJECTS": _show_parser("OBJECTS"), 724 "TABLES": _show_parser("TABLES"), 725 "TERSE TABLES": _show_parser("TABLES"), 726 "VIEWS": _show_parser("VIEWS"), 727 "TERSE VIEWS": _show_parser("VIEWS"), 728 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 729 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 730 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 731 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 732 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 733 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 734 "SEQUENCES": _show_parser("SEQUENCES"), 735 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 736 "STAGES": _show_parser("STAGES"), 737 "COLUMNS": _show_parser("COLUMNS"), 738 "USERS": _show_parser("USERS"), 739 "TERSE USERS": _show_parser("USERS"), 740 "FILE FORMATS": _show_parser("FILE FORMATS"), 741 "FUNCTIONS": _show_parser("FUNCTIONS"), 742 "PROCEDURES": _show_parser("PROCEDURES"), 743 "WAREHOUSES": _show_parser("WAREHOUSES"), 744 } 745 746 CONSTRAINT_PARSERS = { 747 **parser.Parser.CONSTRAINT_PARSERS, 748 "WITH": lambda self: self._parse_with_constraint(), 749 "MASKING": lambda self: self._parse_with_constraint(), 750 "PROJECTION": lambda self: self._parse_with_constraint(), 751 "TAG": lambda self: self._parse_with_constraint(), 752 } 753 754 STAGED_FILE_SINGLE_TOKENS = { 755 TokenType.DOT, 756 TokenType.MOD, 757 TokenType.SLASH, 758 } 759 760 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 761 762 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 763 764 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 765 766 LAMBDAS = { 767 **parser.Parser.LAMBDAS, 768 TokenType.ARROW: lambda self, expressions: self.expression( 769 exp.Lambda, 770 this=self._replace_lambda( 771 self._parse_assignment(), 772 expressions, 773 ), 774 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 775 ), 776 } 777 778 def _parse_use(self) -> exp.Use: 779 if self._match_text_seq("SECONDARY", "ROLES"): 780 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 781 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 782 return self.expression( 783 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 784 ) 785 786 return super()._parse_use() 787 788 def _negate_range( 789 self, this: t.Optional[exp.Expression] = None 790 ) -> t.Optional[exp.Expression]: 791 if not this: 792 return this 793 794 query = this.args.get("query") 795 if isinstance(this, exp.In) and isinstance(query, exp.Query): 796 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 797 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 798 # which can produce different results (most likely a SnowFlake bug). 799 # 800 # https://docs.snowflake.com/en/sql-reference/functions/in 801 # Context: https://github.com/tobymao/sqlglot/issues/3890 802 return self.expression( 803 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 804 ) 805 806 return self.expression(exp.Not, this=this) 807 808 def _parse_tag(self) -> exp.Tags: 809 return self.expression( 810 exp.Tags, 811 expressions=self._parse_wrapped_csv(self._parse_property), 812 ) 813 814 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 815 if self._prev.token_type != TokenType.WITH: 816 self._retreat(self._index - 1) 817 818 if self._match_text_seq("MASKING", "POLICY"): 819 policy = self._parse_column() 820 return self.expression( 821 exp.MaskingPolicyColumnConstraint, 822 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 823 expressions=self._match(TokenType.USING) 824 and self._parse_wrapped_csv(self._parse_id_var), 825 ) 826 if self._match_text_seq("PROJECTION", "POLICY"): 827 policy = self._parse_column() 828 return self.expression( 829 exp.ProjectionPolicyColumnConstraint, 830 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 831 ) 832 if self._match(TokenType.TAG): 833 return self._parse_tag() 834 835 return None 836 837 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 838 if self._match(TokenType.TAG): 839 return self._parse_tag() 840 841 return super()._parse_with_property() 842 843 def _parse_create(self) -> exp.Create | exp.Command: 844 expression = super()._parse_create() 845 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 846 # Replace the Table node with the enclosed Identifier 847 expression.this.replace(expression.this.this) 848 849 return expression 850 851 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 852 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 853 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 854 this = self._parse_var() or self._parse_type() 855 856 if not this: 857 return None 858 859 self._match(TokenType.COMMA) 860 expression = self._parse_bitwise() 861 this = map_date_part(this) 862 name = this.name.upper() 863 864 if name.startswith("EPOCH"): 865 if name == "EPOCH_MILLISECOND": 866 scale = 10**3 867 elif name == "EPOCH_MICROSECOND": 868 scale = 10**6 869 elif name == "EPOCH_NANOSECOND": 870 scale = 10**9 871 else: 872 scale = None 873 874 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 875 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 876 877 if scale: 878 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 879 880 return to_unix 881 882 return self.expression(exp.Extract, this=this, expression=expression) 883 884 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 885 if is_map: 886 # Keys are strings in Snowflake's objects, see also: 887 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 888 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 889 return self._parse_slice(self._parse_string()) 890 891 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 892 893 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 894 lateral = super()._parse_lateral() 895 if not lateral: 896 return lateral 897 898 if isinstance(lateral.this, exp.Explode): 899 table_alias = lateral.args.get("alias") 900 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 901 if table_alias and not table_alias.args.get("columns"): 902 table_alias.set("columns", columns) 903 elif not table_alias: 904 exp.alias_(lateral, "_flattened", table=columns, copy=False) 905 906 return lateral 907 908 def _parse_table_parts( 909 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 910 ) -> exp.Table: 911 # https://docs.snowflake.com/en/user-guide/querying-stage 912 if self._match(TokenType.STRING, advance=False): 913 table = self._parse_string() 914 elif self._match_text_seq("@", advance=False): 915 table = self._parse_location_path() 916 else: 917 table = None 918 919 if table: 920 file_format = None 921 pattern = None 922 923 wrapped = self._match(TokenType.L_PAREN) 924 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 925 if self._match_text_seq("FILE_FORMAT", "=>"): 926 file_format = self._parse_string() or super()._parse_table_parts( 927 is_db_reference=is_db_reference 928 ) 929 elif self._match_text_seq("PATTERN", "=>"): 930 pattern = self._parse_string() 931 else: 932 break 933 934 self._match(TokenType.COMMA) 935 936 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 937 else: 938 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 939 940 return table 941 942 def _parse_table( 943 self, 944 schema: bool = False, 945 joins: bool = False, 946 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 947 parse_bracket: bool = False, 948 is_db_reference: bool = False, 949 parse_partition: bool = False, 950 consume_pipe: bool = False, 951 ) -> t.Optional[exp.Expression]: 952 table = super()._parse_table( 953 schema=schema, 954 joins=joins, 955 alias_tokens=alias_tokens, 956 parse_bracket=parse_bracket, 957 is_db_reference=is_db_reference, 958 parse_partition=parse_partition, 959 ) 960 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 961 table_from_rows = table.this 962 for arg in exp.TableFromRows.arg_types: 963 if arg != "this": 964 table_from_rows.set(arg, table.args.get(arg)) 965 966 table = table_from_rows 967 968 return table 969 970 def _parse_id_var( 971 self, 972 any_token: bool = True, 973 tokens: t.Optional[t.Collection[TokenType]] = None, 974 ) -> t.Optional[exp.Expression]: 975 if self._match_text_seq("IDENTIFIER", "("): 976 identifier = ( 977 super()._parse_id_var(any_token=any_token, tokens=tokens) 978 or self._parse_string() 979 ) 980 self._match_r_paren() 981 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 982 983 return super()._parse_id_var(any_token=any_token, tokens=tokens) 984 985 def _parse_show_snowflake(self, this: str) -> exp.Show: 986 scope = None 987 scope_kind = None 988 989 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 990 # which is syntactically valid but has no effect on the output 991 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 992 993 history = self._match_text_seq("HISTORY") 994 995 like = self._parse_string() if self._match(TokenType.LIKE) else None 996 997 if self._match(TokenType.IN): 998 if self._match_text_seq("ACCOUNT"): 999 scope_kind = "ACCOUNT" 1000 elif self._match_text_seq("CLASS"): 1001 scope_kind = "CLASS" 1002 scope = self._parse_table_parts() 1003 elif self._match_text_seq("APPLICATION"): 1004 scope_kind = "APPLICATION" 1005 if self._match_text_seq("PACKAGE"): 1006 scope_kind += " PACKAGE" 1007 scope = self._parse_table_parts() 1008 elif self._match_set(self.DB_CREATABLES): 1009 scope_kind = self._prev.text.upper() 1010 if self._curr: 1011 scope = self._parse_table_parts() 1012 elif self._curr: 1013 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1014 scope = self._parse_table_parts() 1015 1016 return self.expression( 1017 exp.Show, 1018 **{ 1019 "terse": terse, 1020 "this": this, 1021 "history": history, 1022 "like": like, 1023 "scope": scope, 1024 "scope_kind": scope_kind, 1025 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1026 "limit": self._parse_limit(), 1027 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1028 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1029 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1030 }, 1031 ) 1032 1033 def _parse_put(self) -> exp.Put | exp.Command: 1034 if self._curr.token_type != TokenType.STRING: 1035 return self._parse_as_command(self._prev) 1036 1037 return self.expression( 1038 exp.Put, 1039 this=self._parse_string(), 1040 target=self._parse_location_path(), 1041 properties=self._parse_properties(), 1042 ) 1043 1044 def _parse_get(self) -> t.Optional[exp.Expression]: 1045 start = self._prev 1046 1047 # If we detect GET( then we need to parse a function, not a statement 1048 if self._match(TokenType.L_PAREN): 1049 self._retreat(self._index - 2) 1050 return self._parse_expression() 1051 1052 target = self._parse_location_path() 1053 1054 # Parse as command if unquoted file path 1055 if self._curr.token_type == TokenType.URI_START: 1056 return self._parse_as_command(start) 1057 1058 return self.expression( 1059 exp.Get, 1060 this=self._parse_string(), 1061 target=target, 1062 properties=self._parse_properties(), 1063 ) 1064 1065 def _parse_location_property(self) -> exp.LocationProperty: 1066 self._match(TokenType.EQ) 1067 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1068 1069 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1070 # Parse either a subquery or a staged file 1071 return ( 1072 self._parse_select(table=True, parse_subquery_alias=False) 1073 if self._match(TokenType.L_PAREN, advance=False) 1074 else self._parse_table_parts() 1075 ) 1076 1077 def _parse_location_path(self) -> exp.Var: 1078 start = self._curr 1079 self._advance_any(ignore_reserved=True) 1080 1081 # We avoid consuming a comma token because external tables like @foo and @bar 1082 # can be joined in a query with a comma separator, as well as closing paren 1083 # in case of subqueries 1084 while self._is_connected() and not self._match_set( 1085 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1086 ): 1087 self._advance_any(ignore_reserved=True) 1088 1089 return exp.var(self._find_sql(start, self._prev)) 1090 1091 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1092 this = super()._parse_lambda_arg() 1093 1094 if not this: 1095 return this 1096 1097 typ = self._parse_types() 1098 1099 if typ: 1100 return self.expression(exp.Cast, this=this, to=typ) 1101 1102 return this 1103 1104 def _parse_foreign_key(self) -> exp.ForeignKey: 1105 # inlineFK, the REFERENCES columns are implied 1106 if self._match(TokenType.REFERENCES, advance=False): 1107 return self.expression(exp.ForeignKey) 1108 1109 # outoflineFK, explicitly names the columns 1110 return super()._parse_foreign_key() 1111 1112 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1113 self._match(TokenType.EQ) 1114 if self._match(TokenType.L_PAREN, advance=False): 1115 expressions = self._parse_wrapped_options() 1116 else: 1117 expressions = [self._parse_format_name()] 1118 1119 return self.expression( 1120 exp.FileFormatProperty, 1121 expressions=expressions, 1122 ) 1123 1124 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1125 return self.expression( 1126 exp.CredentialsProperty, 1127 expressions=self._parse_wrapped_options(), 1128 ) 1129 1130 def _parse_semantic_view(self) -> exp.SemanticView: 1131 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1132 1133 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1134 if self._match_text_seq("DIMENSIONS"): 1135 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1136 if self._match_text_seq("METRICS"): 1137 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1138 if self._match_text_seq("WHERE"): 1139 kwargs["where"] = self._parse_expression() 1140 1141 return self.expression(exp.SemanticView, **kwargs) 1142 1143 class Tokenizer(tokens.Tokenizer): 1144 STRING_ESCAPES = ["\\", "'"] 1145 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1146 RAW_STRINGS = ["$$"] 1147 COMMENTS = ["--", "//", ("/*", "*/")] 1148 NESTED_COMMENTS = False 1149 1150 KEYWORDS = { 1151 **tokens.Tokenizer.KEYWORDS, 1152 "BYTEINT": TokenType.INT, 1153 "FILE://": TokenType.URI_START, 1154 "FILE FORMAT": TokenType.FILE_FORMAT, 1155 "GET": TokenType.GET, 1156 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1157 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1158 "MINUS": TokenType.EXCEPT, 1159 "NCHAR VARYING": TokenType.VARCHAR, 1160 "PUT": TokenType.PUT, 1161 "REMOVE": TokenType.COMMAND, 1162 "RM": TokenType.COMMAND, 1163 "SAMPLE": TokenType.TABLE_SAMPLE, 1164 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1165 "SQL_DOUBLE": TokenType.DOUBLE, 1166 "SQL_VARCHAR": TokenType.VARCHAR, 1167 "STAGE": TokenType.STAGE, 1168 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1169 "STREAMLIT": TokenType.STREAMLIT, 1170 "TAG": TokenType.TAG, 1171 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1172 "TOP": TokenType.TOP, 1173 "WAREHOUSE": TokenType.WAREHOUSE, 1174 } 1175 KEYWORDS.pop("/*+") 1176 1177 SINGLE_TOKENS = { 1178 **tokens.Tokenizer.SINGLE_TOKENS, 1179 "$": TokenType.PARAMETER, 1180 } 1181 1182 VAR_SINGLE_TOKENS = {"$"} 1183 1184 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1185 1186 class Generator(generator.Generator): 1187 PARAMETER_TOKEN = "$" 1188 MATCHED_BY_SOURCE = False 1189 SINGLE_STRING_INTERVAL = True 1190 JOIN_HINTS = False 1191 TABLE_HINTS = False 1192 QUERY_HINTS = False 1193 AGGREGATE_FILTER_SUPPORTED = False 1194 SUPPORTS_TABLE_COPY = False 1195 COLLATE_IS_FUNC = True 1196 LIMIT_ONLY_LITERALS = True 1197 JSON_KEY_VALUE_PAIR_SEP = "," 1198 INSERT_OVERWRITE = " OVERWRITE INTO" 1199 STRUCT_DELIMITER = ("(", ")") 1200 COPY_PARAMS_ARE_WRAPPED = False 1201 COPY_PARAMS_EQ_REQUIRED = True 1202 STAR_EXCEPT = "EXCLUDE" 1203 SUPPORTS_EXPLODING_PROJECTIONS = False 1204 ARRAY_CONCAT_IS_VAR_LEN = False 1205 SUPPORTS_CONVERT_TIMEZONE = True 1206 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1207 SUPPORTS_MEDIAN = True 1208 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1209 SUPPORTS_DECODE_CASE = True 1210 IS_BOOL_ALLOWED = False 1211 1212 TRANSFORMS = { 1213 **generator.Generator.TRANSFORMS, 1214 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1215 exp.ArgMax: rename_func("MAX_BY"), 1216 exp.ArgMin: rename_func("MIN_BY"), 1217 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1218 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1219 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1220 exp.AtTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1222 ), 1223 exp.BitwiseOr: rename_func("BITOR"), 1224 exp.BitwiseXor: rename_func("BITXOR"), 1225 exp.BitwiseAnd: rename_func("BITAND"), 1226 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1227 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1228 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1229 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1230 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1231 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1232 exp.DateAdd: date_delta_sql("DATEADD"), 1233 exp.DateDiff: date_delta_sql("DATEDIFF"), 1234 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1235 exp.DatetimeDiff: timestampdiff_sql, 1236 exp.DateStrToDate: datestrtodate_sql, 1237 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1238 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1239 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1240 exp.DayOfYear: rename_func("DAYOFYEAR"), 1241 exp.Explode: rename_func("FLATTEN"), 1242 exp.Extract: lambda self, e: self.func( 1243 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1244 ), 1245 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1246 exp.FileFormatProperty: lambda self, 1247 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1248 exp.FromTimeZone: lambda self, e: self.func( 1249 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1250 ), 1251 exp.GenerateSeries: lambda self, e: self.func( 1252 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1253 ), 1254 exp.GetExtract: rename_func("GET"), 1255 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1256 exp.If: if_sql(name="IFF", false_value="NULL"), 1257 exp.JSONExtractArray: _json_extract_value_array_sql, 1258 exp.JSONExtractScalar: lambda self, e: self.func( 1259 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1260 ), 1261 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1262 exp.JSONPathRoot: lambda *_: "", 1263 exp.JSONValueArray: _json_extract_value_array_sql, 1264 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1265 rename_func("EDITDISTANCE") 1266 ), 1267 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1268 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1269 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1270 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1271 exp.MakeInterval: no_make_interval_sql, 1272 exp.Max: max_or_greatest, 1273 exp.Min: min_or_least, 1274 exp.ParseJSON: lambda self, e: self.func( 1275 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1276 ), 1277 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1278 exp.PercentileCont: transforms.preprocess( 1279 [transforms.add_within_group_for_percentiles] 1280 ), 1281 exp.PercentileDisc: transforms.preprocess( 1282 [transforms.add_within_group_for_percentiles] 1283 ), 1284 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1285 exp.RegexpExtract: _regexpextract_sql, 1286 exp.RegexpExtractAll: _regexpextract_sql, 1287 exp.RegexpILike: _regexpilike_sql, 1288 exp.Rand: rename_func("RANDOM"), 1289 exp.Select: transforms.preprocess( 1290 [ 1291 transforms.eliminate_window_clause, 1292 transforms.eliminate_distinct_on, 1293 transforms.explode_projection_to_unnest(), 1294 transforms.eliminate_semi_and_anti_joins, 1295 _transform_generate_date_array, 1296 _qualify_unnested_columns, 1297 _eliminate_dot_variant_lookup, 1298 ] 1299 ), 1300 exp.SHA: rename_func("SHA1"), 1301 exp.SortArray: rename_func("ARRAY_SORT"), 1302 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1303 exp.StartsWith: rename_func("STARTSWITH"), 1304 exp.EndsWith: rename_func("ENDSWITH"), 1305 exp.StrPosition: lambda self, e: strposition_sql( 1306 self, e, func_name="CHARINDEX", supports_position=True 1307 ), 1308 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1309 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1310 exp.Stuff: rename_func("INSERT"), 1311 exp.StPoint: rename_func("ST_MAKEPOINT"), 1312 exp.TimeAdd: date_delta_sql("TIMEADD"), 1313 exp.Timestamp: no_timestamp_sql, 1314 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1315 exp.TimestampDiff: lambda self, e: self.func( 1316 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1317 ), 1318 exp.TimestampTrunc: timestamptrunc_sql(), 1319 exp.TimeStrToTime: timestrtotime_sql, 1320 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1321 exp.ToArray: rename_func("TO_ARRAY"), 1322 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1323 exp.ToDouble: rename_func("TO_DOUBLE"), 1324 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1325 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1326 exp.TsOrDsToDate: lambda self, e: self.func( 1327 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1328 ), 1329 exp.TsOrDsToTime: lambda self, e: self.func( 1330 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1331 ), 1332 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1333 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1334 exp.Uuid: rename_func("UUID_STRING"), 1335 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1336 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1337 exp.Xor: rename_func("BOOLXOR"), 1338 } 1339 1340 SUPPORTED_JSON_PATH_PARTS = { 1341 exp.JSONPathKey, 1342 exp.JSONPathRoot, 1343 exp.JSONPathSubscript, 1344 } 1345 1346 TYPE_MAPPING = { 1347 **generator.Generator.TYPE_MAPPING, 1348 exp.DataType.Type.NESTED: "OBJECT", 1349 exp.DataType.Type.STRUCT: "OBJECT", 1350 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1351 } 1352 1353 TOKEN_MAPPING = { 1354 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1355 } 1356 1357 PROPERTIES_LOCATION = { 1358 **generator.Generator.PROPERTIES_LOCATION, 1359 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1360 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1361 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1362 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1363 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1364 } 1365 1366 UNSUPPORTED_VALUES_EXPRESSIONS = { 1367 exp.Map, 1368 exp.StarMap, 1369 exp.Struct, 1370 exp.VarMap, 1371 } 1372 1373 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1374 1375 def with_properties(self, properties: exp.Properties) -> str: 1376 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1377 1378 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1379 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1380 values_as_table = False 1381 1382 return super().values_sql(expression, values_as_table=values_as_table) 1383 1384 def datatype_sql(self, expression: exp.DataType) -> str: 1385 expressions = expression.expressions 1386 if ( 1387 expressions 1388 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1389 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1390 ): 1391 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1392 return "OBJECT" 1393 1394 return super().datatype_sql(expression) 1395 1396 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1397 return self.func( 1398 "TO_NUMBER", 1399 expression.this, 1400 expression.args.get("format"), 1401 expression.args.get("precision"), 1402 expression.args.get("scale"), 1403 ) 1404 1405 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1406 milli = expression.args.get("milli") 1407 if milli is not None: 1408 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1409 expression.set("nano", milli_to_nano) 1410 1411 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1412 1413 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1414 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1415 return self.func("TO_GEOGRAPHY", expression.this) 1416 if expression.is_type(exp.DataType.Type.GEOMETRY): 1417 return self.func("TO_GEOMETRY", expression.this) 1418 1419 return super().cast_sql(expression, safe_prefix=safe_prefix) 1420 1421 def trycast_sql(self, expression: exp.TryCast) -> str: 1422 value = expression.this 1423 1424 if value.type is None: 1425 from sqlglot.optimizer.annotate_types import annotate_types 1426 1427 value = annotate_types(value, dialect=self.dialect) 1428 1429 # Snowflake requires that TRY_CAST's value be a string 1430 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1431 # if we can deduce that the value is a string, then we can generate TRY_CAST 1432 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1433 return super().trycast_sql(expression) 1434 1435 return self.cast_sql(expression) 1436 1437 def log_sql(self, expression: exp.Log) -> str: 1438 if not expression.expression: 1439 return self.func("LN", expression.this) 1440 1441 return super().log_sql(expression) 1442 1443 def unnest_sql(self, expression: exp.Unnest) -> str: 1444 unnest_alias = expression.args.get("alias") 1445 offset = expression.args.get("offset") 1446 1447 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1448 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1449 1450 columns = [ 1451 exp.to_identifier("seq"), 1452 exp.to_identifier("key"), 1453 exp.to_identifier("path"), 1454 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1455 value, 1456 exp.to_identifier("this"), 1457 ] 1458 1459 if unnest_alias: 1460 unnest_alias.set("columns", columns) 1461 else: 1462 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1463 1464 table_input = self.sql(expression.expressions[0]) 1465 if not table_input.startswith("INPUT =>"): 1466 table_input = f"INPUT => {table_input}" 1467 1468 expression_parent = expression.parent 1469 1470 explode = ( 1471 f"FLATTEN({table_input})" 1472 if isinstance(expression_parent, exp.Lateral) 1473 else f"TABLE(FLATTEN({table_input}))" 1474 ) 1475 alias = self.sql(unnest_alias) 1476 alias = f" AS {alias}" if alias else "" 1477 value = ( 1478 "" 1479 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1480 else f"{value} FROM " 1481 ) 1482 1483 return f"{value}{explode}{alias}" 1484 1485 def show_sql(self, expression: exp.Show) -> str: 1486 terse = "TERSE " if expression.args.get("terse") else "" 1487 history = " HISTORY" if expression.args.get("history") else "" 1488 like = self.sql(expression, "like") 1489 like = f" LIKE {like}" if like else "" 1490 1491 scope = self.sql(expression, "scope") 1492 scope = f" {scope}" if scope else "" 1493 1494 scope_kind = self.sql(expression, "scope_kind") 1495 if scope_kind: 1496 scope_kind = f" IN {scope_kind}" 1497 1498 starts_with = self.sql(expression, "starts_with") 1499 if starts_with: 1500 starts_with = f" STARTS WITH {starts_with}" 1501 1502 limit = self.sql(expression, "limit") 1503 1504 from_ = self.sql(expression, "from") 1505 if from_: 1506 from_ = f" FROM {from_}" 1507 1508 privileges = self.expressions(expression, key="privileges", flat=True) 1509 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1510 1511 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1512 1513 def describe_sql(self, expression: exp.Describe) -> str: 1514 # Default to table if kind is unknown 1515 kind_value = expression.args.get("kind") or "TABLE" 1516 kind = f" {kind_value}" if kind_value else "" 1517 this = f" {self.sql(expression, 'this')}" 1518 expressions = self.expressions(expression, flat=True) 1519 expressions = f" {expressions}" if expressions else "" 1520 return f"DESCRIBE{kind}{this}{expressions}" 1521 1522 def generatedasidentitycolumnconstraint_sql( 1523 self, expression: exp.GeneratedAsIdentityColumnConstraint 1524 ) -> str: 1525 start = expression.args.get("start") 1526 start = f" START {start}" if start else "" 1527 increment = expression.args.get("increment") 1528 increment = f" INCREMENT {increment}" if increment else "" 1529 1530 order = expression.args.get("order") 1531 if order is not None: 1532 order_clause = " ORDER" if order else " NOORDER" 1533 else: 1534 order_clause = "" 1535 1536 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1537 1538 def cluster_sql(self, expression: exp.Cluster) -> str: 1539 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1540 1541 def struct_sql(self, expression: exp.Struct) -> str: 1542 keys = [] 1543 values = [] 1544 1545 for i, e in enumerate(expression.expressions): 1546 if isinstance(e, exp.PropertyEQ): 1547 keys.append( 1548 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1549 ) 1550 values.append(e.expression) 1551 else: 1552 keys.append(exp.Literal.string(f"_{i}")) 1553 values.append(e) 1554 1555 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1556 1557 @unsupported_args("weight", "accuracy") 1558 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1559 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1560 1561 def alterset_sql(self, expression: exp.AlterSet) -> str: 1562 exprs = self.expressions(expression, flat=True) 1563 exprs = f" {exprs}" if exprs else "" 1564 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1565 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1566 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1567 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1568 tag = self.expressions(expression, key="tag", flat=True) 1569 tag = f" TAG {tag}" if tag else "" 1570 1571 return f"SET{exprs}{file_format}{copy_options}{tag}" 1572 1573 def strtotime_sql(self, expression: exp.StrToTime): 1574 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1575 return self.func( 1576 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1577 ) 1578 1579 def timestampsub_sql(self, expression: exp.TimestampSub): 1580 return self.sql( 1581 exp.TimestampAdd( 1582 this=expression.this, 1583 expression=expression.expression * -1, 1584 unit=expression.unit, 1585 ) 1586 ) 1587 1588 def jsonextract_sql(self, expression: exp.JSONExtract): 1589 this = expression.this 1590 1591 # JSON strings are valid coming from other dialects such as BQ so 1592 # for these cases we PARSE_JSON preemptively 1593 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1594 "requires_json" 1595 ): 1596 this = exp.ParseJSON(this=this) 1597 1598 return self.func( 1599 "GET_PATH", 1600 this, 1601 expression.expression, 1602 ) 1603 1604 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1605 this = expression.this 1606 if this.is_string: 1607 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1608 1609 return self.func("TO_CHAR", this, self.format_time(expression)) 1610 1611 def datesub_sql(self, expression: exp.DateSub) -> str: 1612 value = expression.expression 1613 if value: 1614 value.replace(value * (-1)) 1615 else: 1616 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1617 1618 return date_delta_sql("DATEADD")(self, expression) 1619 1620 def select_sql(self, expression: exp.Select) -> str: 1621 limit = expression.args.get("limit") 1622 offset = expression.args.get("offset") 1623 if offset and not limit: 1624 expression.limit(exp.Null(), copy=False) 1625 return super().select_sql(expression) 1626 1627 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1628 is_materialized = expression.find(exp.MaterializedProperty) 1629 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1630 1631 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1632 # For materialized views, COPY GRANTS is located *before* the columns list 1633 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1634 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1635 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1636 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1637 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1638 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1639 1640 this_name = self.sql(expression.this, "this") 1641 copy_grants = self.sql(copy_grants_property) 1642 this_schema = self.schema_columns_sql(expression.this) 1643 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1644 1645 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1646 1647 return super().createable_sql(expression, locations) 1648 1649 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1650 this = expression.this 1651 1652 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1653 # and add it later as part of the WITHIN GROUP clause 1654 order = this if isinstance(this, exp.Order) else None 1655 if order: 1656 expression.set("this", order.this.pop()) 1657 1658 expr_sql = super().arrayagg_sql(expression) 1659 1660 if order: 1661 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1662 1663 return expr_sql 1664 1665 def array_sql(self, expression: exp.Array) -> str: 1666 expressions = expression.expressions 1667 1668 first_expr = seq_get(expressions, 0) 1669 if isinstance(first_expr, exp.Select): 1670 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1671 if first_expr.text("kind").upper() == "STRUCT": 1672 object_construct_args = [] 1673 for expr in first_expr.expressions: 1674 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1675 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1676 name = expr.this if isinstance(expr, exp.Alias) else expr 1677 1678 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1679 1680 array_agg = exp.ArrayAgg( 1681 this=_build_object_construct(args=object_construct_args) 1682 ) 1683 1684 first_expr.set("kind", None) 1685 first_expr.set("expressions", [array_agg]) 1686 1687 return self.sql(first_expr.subquery()) 1688 1689 return inline_array_sql(self, expression) 1690 1691 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1692 zone = self.sql(expression, "this") 1693 if not zone: 1694 return super().currentdate_sql(expression) 1695 1696 expr = exp.Cast( 1697 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1698 to=exp.DataType(this=exp.DataType.Type.DATE), 1699 ) 1700 return self.sql(expr) 1701 1702 def dot_sql(self, expression: exp.Dot) -> str: 1703 this = expression.this 1704 1705 if not this.type: 1706 from sqlglot.optimizer.annotate_types import annotate_types 1707 1708 this = annotate_types(this, dialect=self.dialect) 1709 1710 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1711 # Generate colon notation for the top level STRUCT 1712 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1713 1714 return super().dot_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
541 def quote_identifier(self, expression: E, identify: bool = True) -> E: 542 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 543 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 544 if ( 545 isinstance(expression, exp.Identifier) 546 and isinstance(expression.parent, exp.Table) 547 and expression.name.lower() == "dual" 548 ): 549 return expression # type: ignore 550 551 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
553 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 554 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 555 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
557 class Parser(parser.Parser): 558 IDENTIFY_PIVOT_STRINGS = True 559 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 560 COLON_IS_VARIANT_EXTRACT = True 561 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 562 563 ID_VAR_TOKENS = { 564 *parser.Parser.ID_VAR_TOKENS, 565 TokenType.EXCEPT, 566 TokenType.MATCH_CONDITION, 567 } 568 569 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 570 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 571 572 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 573 574 FUNCTIONS = { 575 **parser.Parser.FUNCTIONS, 576 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 577 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 578 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 579 this=seq_get(args, 1), expression=seq_get(args, 0) 580 ), 581 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 582 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 583 start=seq_get(args, 0), 584 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 585 step=seq_get(args, 2), 586 ), 587 "ARRAY_SORT": exp.SortArray.from_arg_list, 588 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 589 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 590 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 591 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 592 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 593 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 594 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 595 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 596 "BITANDAGG": exp.BitwiseAndAgg.from_arg_list, 597 "BITAND_AGG": exp.BitwiseAndAgg.from_arg_list, 598 "BIT_AND_AGG": exp.BitwiseAndAgg.from_arg_list, 599 "BIT_ANDAGG": exp.BitwiseAndAgg.from_arg_list, 600 "BITORAGG": exp.BitwiseOrAgg.from_arg_list, 601 "BITOR_AGG": exp.BitwiseOrAgg.from_arg_list, 602 "BIT_OR_AGG": exp.BitwiseOrAgg.from_arg_list, 603 "BIT_ORAGG": exp.BitwiseOrAgg.from_arg_list, 604 "BITXORAGG": exp.BitwiseXorAgg.from_arg_list, 605 "BITXOR_AGG": exp.BitwiseXorAgg.from_arg_list, 606 "BIT_XOR_AGG": exp.BitwiseXorAgg.from_arg_list, 607 "BIT_XORAGG": exp.BitwiseXorAgg.from_arg_list, 608 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 609 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 610 "DATE_TRUNC": _date_trunc_to_time, 611 "DATEADD": _build_date_time_add(exp.DateAdd), 612 "DATEDIFF": _build_datediff, 613 "DIV0": _build_if_from_div0, 614 "EDITDISTANCE": lambda args: exp.Levenshtein( 615 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 616 ), 617 "FLATTEN": exp.Explode.from_arg_list, 618 "GET": exp.GetExtract.from_arg_list, 619 "GET_PATH": lambda args, dialect: exp.JSONExtract( 620 this=seq_get(args, 0), 621 expression=dialect.to_json_path(seq_get(args, 1)), 622 requires_json=True, 623 ), 624 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 625 "IFF": exp.If.from_arg_list, 626 "LAST_DAY": lambda args: exp.LastDay( 627 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 628 ), 629 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 630 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 631 "NULLIFZERO": _build_if_from_nullifzero, 632 "OBJECT_CONSTRUCT": _build_object_construct, 633 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 634 "REGEXP_REPLACE": _build_regexp_replace, 635 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 636 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 637 "REPLACE": build_replace_with_optional_replacement, 638 "RLIKE": exp.RegexpLike.from_arg_list, 639 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 640 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 641 "TIMEADD": _build_date_time_add(exp.TimeAdd), 642 "TIMEDIFF": _build_datediff, 643 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 644 "TIMESTAMPDIFF": _build_datediff, 645 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 646 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 647 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 648 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 649 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 650 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 651 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 652 "TRY_TO_TIMESTAMP": _build_datetime( 653 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 654 ), 655 "TO_CHAR": build_timetostr_or_tochar, 656 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 657 "TO_NUMBER": lambda args: exp.ToNumber( 658 this=seq_get(args, 0), 659 format=seq_get(args, 1), 660 precision=seq_get(args, 2), 661 scale=seq_get(args, 3), 662 ), 663 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 664 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 665 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 666 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 667 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 668 "TO_VARCHAR": exp.ToChar.from_arg_list, 669 "VECTOR_L2_DISTANCE": exp.EuclideanDistance.from_arg_list, 670 "ZEROIFNULL": _build_if_from_zeroifnull, 671 } 672 673 FUNCTION_PARSERS = { 674 **parser.Parser.FUNCTION_PARSERS, 675 "DATE_PART": lambda self: self._parse_date_part(), 676 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 677 "LISTAGG": lambda self: self._parse_string_agg(), 678 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 679 } 680 FUNCTION_PARSERS.pop("TRIM") 681 682 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 683 684 ALTER_PARSERS = { 685 **parser.Parser.ALTER_PARSERS, 686 "SESSION": lambda self: self._parse_alter_session(), 687 "UNSET": lambda self: self.expression( 688 exp.Set, 689 tag=self._match_text_seq("TAG"), 690 expressions=self._parse_csv(self._parse_id_var), 691 unset=True, 692 ), 693 } 694 695 STATEMENT_PARSERS = { 696 **parser.Parser.STATEMENT_PARSERS, 697 TokenType.GET: lambda self: self._parse_get(), 698 TokenType.PUT: lambda self: self._parse_put(), 699 TokenType.SHOW: lambda self: self._parse_show(), 700 } 701 702 PROPERTY_PARSERS = { 703 **parser.Parser.PROPERTY_PARSERS, 704 "CREDENTIALS": lambda self: self._parse_credentials_property(), 705 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 706 "LOCATION": lambda self: self._parse_location_property(), 707 "TAG": lambda self: self._parse_tag(), 708 "USING": lambda self: self._match_text_seq("TEMPLATE") 709 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 710 } 711 712 TYPE_CONVERTERS = { 713 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 714 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 715 } 716 717 SHOW_PARSERS = { 718 "DATABASES": _show_parser("DATABASES"), 719 "TERSE DATABASES": _show_parser("DATABASES"), 720 "SCHEMAS": _show_parser("SCHEMAS"), 721 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 722 "OBJECTS": _show_parser("OBJECTS"), 723 "TERSE OBJECTS": _show_parser("OBJECTS"), 724 "TABLES": _show_parser("TABLES"), 725 "TERSE TABLES": _show_parser("TABLES"), 726 "VIEWS": _show_parser("VIEWS"), 727 "TERSE VIEWS": _show_parser("VIEWS"), 728 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 729 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 730 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 731 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 732 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 733 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 734 "SEQUENCES": _show_parser("SEQUENCES"), 735 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 736 "STAGES": _show_parser("STAGES"), 737 "COLUMNS": _show_parser("COLUMNS"), 738 "USERS": _show_parser("USERS"), 739 "TERSE USERS": _show_parser("USERS"), 740 "FILE FORMATS": _show_parser("FILE FORMATS"), 741 "FUNCTIONS": _show_parser("FUNCTIONS"), 742 "PROCEDURES": _show_parser("PROCEDURES"), 743 "WAREHOUSES": _show_parser("WAREHOUSES"), 744 } 745 746 CONSTRAINT_PARSERS = { 747 **parser.Parser.CONSTRAINT_PARSERS, 748 "WITH": lambda self: self._parse_with_constraint(), 749 "MASKING": lambda self: self._parse_with_constraint(), 750 "PROJECTION": lambda self: self._parse_with_constraint(), 751 "TAG": lambda self: self._parse_with_constraint(), 752 } 753 754 STAGED_FILE_SINGLE_TOKENS = { 755 TokenType.DOT, 756 TokenType.MOD, 757 TokenType.SLASH, 758 } 759 760 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 761 762 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 763 764 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 765 766 LAMBDAS = { 767 **parser.Parser.LAMBDAS, 768 TokenType.ARROW: lambda self, expressions: self.expression( 769 exp.Lambda, 770 this=self._replace_lambda( 771 self._parse_assignment(), 772 expressions, 773 ), 774 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 775 ), 776 } 777 778 def _parse_use(self) -> exp.Use: 779 if self._match_text_seq("SECONDARY", "ROLES"): 780 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 781 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 782 return self.expression( 783 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 784 ) 785 786 return super()._parse_use() 787 788 def _negate_range( 789 self, this: t.Optional[exp.Expression] = None 790 ) -> t.Optional[exp.Expression]: 791 if not this: 792 return this 793 794 query = this.args.get("query") 795 if isinstance(this, exp.In) and isinstance(query, exp.Query): 796 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 797 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 798 # which can produce different results (most likely a SnowFlake bug). 799 # 800 # https://docs.snowflake.com/en/sql-reference/functions/in 801 # Context: https://github.com/tobymao/sqlglot/issues/3890 802 return self.expression( 803 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 804 ) 805 806 return self.expression(exp.Not, this=this) 807 808 def _parse_tag(self) -> exp.Tags: 809 return self.expression( 810 exp.Tags, 811 expressions=self._parse_wrapped_csv(self._parse_property), 812 ) 813 814 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 815 if self._prev.token_type != TokenType.WITH: 816 self._retreat(self._index - 1) 817 818 if self._match_text_seq("MASKING", "POLICY"): 819 policy = self._parse_column() 820 return self.expression( 821 exp.MaskingPolicyColumnConstraint, 822 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 823 expressions=self._match(TokenType.USING) 824 and self._parse_wrapped_csv(self._parse_id_var), 825 ) 826 if self._match_text_seq("PROJECTION", "POLICY"): 827 policy = self._parse_column() 828 return self.expression( 829 exp.ProjectionPolicyColumnConstraint, 830 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 831 ) 832 if self._match(TokenType.TAG): 833 return self._parse_tag() 834 835 return None 836 837 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 838 if self._match(TokenType.TAG): 839 return self._parse_tag() 840 841 return super()._parse_with_property() 842 843 def _parse_create(self) -> exp.Create | exp.Command: 844 expression = super()._parse_create() 845 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 846 # Replace the Table node with the enclosed Identifier 847 expression.this.replace(expression.this.this) 848 849 return expression 850 851 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 852 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 853 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 854 this = self._parse_var() or self._parse_type() 855 856 if not this: 857 return None 858 859 self._match(TokenType.COMMA) 860 expression = self._parse_bitwise() 861 this = map_date_part(this) 862 name = this.name.upper() 863 864 if name.startswith("EPOCH"): 865 if name == "EPOCH_MILLISECOND": 866 scale = 10**3 867 elif name == "EPOCH_MICROSECOND": 868 scale = 10**6 869 elif name == "EPOCH_NANOSECOND": 870 scale = 10**9 871 else: 872 scale = None 873 874 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 875 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 876 877 if scale: 878 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 879 880 return to_unix 881 882 return self.expression(exp.Extract, this=this, expression=expression) 883 884 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 885 if is_map: 886 # Keys are strings in Snowflake's objects, see also: 887 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 888 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 889 return self._parse_slice(self._parse_string()) 890 891 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 892 893 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 894 lateral = super()._parse_lateral() 895 if not lateral: 896 return lateral 897 898 if isinstance(lateral.this, exp.Explode): 899 table_alias = lateral.args.get("alias") 900 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 901 if table_alias and not table_alias.args.get("columns"): 902 table_alias.set("columns", columns) 903 elif not table_alias: 904 exp.alias_(lateral, "_flattened", table=columns, copy=False) 905 906 return lateral 907 908 def _parse_table_parts( 909 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 910 ) -> exp.Table: 911 # https://docs.snowflake.com/en/user-guide/querying-stage 912 if self._match(TokenType.STRING, advance=False): 913 table = self._parse_string() 914 elif self._match_text_seq("@", advance=False): 915 table = self._parse_location_path() 916 else: 917 table = None 918 919 if table: 920 file_format = None 921 pattern = None 922 923 wrapped = self._match(TokenType.L_PAREN) 924 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 925 if self._match_text_seq("FILE_FORMAT", "=>"): 926 file_format = self._parse_string() or super()._parse_table_parts( 927 is_db_reference=is_db_reference 928 ) 929 elif self._match_text_seq("PATTERN", "=>"): 930 pattern = self._parse_string() 931 else: 932 break 933 934 self._match(TokenType.COMMA) 935 936 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 937 else: 938 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 939 940 return table 941 942 def _parse_table( 943 self, 944 schema: bool = False, 945 joins: bool = False, 946 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 947 parse_bracket: bool = False, 948 is_db_reference: bool = False, 949 parse_partition: bool = False, 950 consume_pipe: bool = False, 951 ) -> t.Optional[exp.Expression]: 952 table = super()._parse_table( 953 schema=schema, 954 joins=joins, 955 alias_tokens=alias_tokens, 956 parse_bracket=parse_bracket, 957 is_db_reference=is_db_reference, 958 parse_partition=parse_partition, 959 ) 960 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 961 table_from_rows = table.this 962 for arg in exp.TableFromRows.arg_types: 963 if arg != "this": 964 table_from_rows.set(arg, table.args.get(arg)) 965 966 table = table_from_rows 967 968 return table 969 970 def _parse_id_var( 971 self, 972 any_token: bool = True, 973 tokens: t.Optional[t.Collection[TokenType]] = None, 974 ) -> t.Optional[exp.Expression]: 975 if self._match_text_seq("IDENTIFIER", "("): 976 identifier = ( 977 super()._parse_id_var(any_token=any_token, tokens=tokens) 978 or self._parse_string() 979 ) 980 self._match_r_paren() 981 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 982 983 return super()._parse_id_var(any_token=any_token, tokens=tokens) 984 985 def _parse_show_snowflake(self, this: str) -> exp.Show: 986 scope = None 987 scope_kind = None 988 989 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 990 # which is syntactically valid but has no effect on the output 991 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 992 993 history = self._match_text_seq("HISTORY") 994 995 like = self._parse_string() if self._match(TokenType.LIKE) else None 996 997 if self._match(TokenType.IN): 998 if self._match_text_seq("ACCOUNT"): 999 scope_kind = "ACCOUNT" 1000 elif self._match_text_seq("CLASS"): 1001 scope_kind = "CLASS" 1002 scope = self._parse_table_parts() 1003 elif self._match_text_seq("APPLICATION"): 1004 scope_kind = "APPLICATION" 1005 if self._match_text_seq("PACKAGE"): 1006 scope_kind += " PACKAGE" 1007 scope = self._parse_table_parts() 1008 elif self._match_set(self.DB_CREATABLES): 1009 scope_kind = self._prev.text.upper() 1010 if self._curr: 1011 scope = self._parse_table_parts() 1012 elif self._curr: 1013 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 1014 scope = self._parse_table_parts() 1015 1016 return self.expression( 1017 exp.Show, 1018 **{ 1019 "terse": terse, 1020 "this": this, 1021 "history": history, 1022 "like": like, 1023 "scope": scope, 1024 "scope_kind": scope_kind, 1025 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1026 "limit": self._parse_limit(), 1027 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1028 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1029 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1030 }, 1031 ) 1032 1033 def _parse_put(self) -> exp.Put | exp.Command: 1034 if self._curr.token_type != TokenType.STRING: 1035 return self._parse_as_command(self._prev) 1036 1037 return self.expression( 1038 exp.Put, 1039 this=self._parse_string(), 1040 target=self._parse_location_path(), 1041 properties=self._parse_properties(), 1042 ) 1043 1044 def _parse_get(self) -> t.Optional[exp.Expression]: 1045 start = self._prev 1046 1047 # If we detect GET( then we need to parse a function, not a statement 1048 if self._match(TokenType.L_PAREN): 1049 self._retreat(self._index - 2) 1050 return self._parse_expression() 1051 1052 target = self._parse_location_path() 1053 1054 # Parse as command if unquoted file path 1055 if self._curr.token_type == TokenType.URI_START: 1056 return self._parse_as_command(start) 1057 1058 return self.expression( 1059 exp.Get, 1060 this=self._parse_string(), 1061 target=target, 1062 properties=self._parse_properties(), 1063 ) 1064 1065 def _parse_location_property(self) -> exp.LocationProperty: 1066 self._match(TokenType.EQ) 1067 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1068 1069 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1070 # Parse either a subquery or a staged file 1071 return ( 1072 self._parse_select(table=True, parse_subquery_alias=False) 1073 if self._match(TokenType.L_PAREN, advance=False) 1074 else self._parse_table_parts() 1075 ) 1076 1077 def _parse_location_path(self) -> exp.Var: 1078 start = self._curr 1079 self._advance_any(ignore_reserved=True) 1080 1081 # We avoid consuming a comma token because external tables like @foo and @bar 1082 # can be joined in a query with a comma separator, as well as closing paren 1083 # in case of subqueries 1084 while self._is_connected() and not self._match_set( 1085 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1086 ): 1087 self._advance_any(ignore_reserved=True) 1088 1089 return exp.var(self._find_sql(start, self._prev)) 1090 1091 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1092 this = super()._parse_lambda_arg() 1093 1094 if not this: 1095 return this 1096 1097 typ = self._parse_types() 1098 1099 if typ: 1100 return self.expression(exp.Cast, this=this, to=typ) 1101 1102 return this 1103 1104 def _parse_foreign_key(self) -> exp.ForeignKey: 1105 # inlineFK, the REFERENCES columns are implied 1106 if self._match(TokenType.REFERENCES, advance=False): 1107 return self.expression(exp.ForeignKey) 1108 1109 # outoflineFK, explicitly names the columns 1110 return super()._parse_foreign_key() 1111 1112 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1113 self._match(TokenType.EQ) 1114 if self._match(TokenType.L_PAREN, advance=False): 1115 expressions = self._parse_wrapped_options() 1116 else: 1117 expressions = [self._parse_format_name()] 1118 1119 return self.expression( 1120 exp.FileFormatProperty, 1121 expressions=expressions, 1122 ) 1123 1124 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1125 return self.expression( 1126 exp.CredentialsProperty, 1127 expressions=self._parse_wrapped_options(), 1128 ) 1129 1130 def _parse_semantic_view(self) -> exp.SemanticView: 1131 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1132 1133 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1134 if self._match_text_seq("DIMENSIONS"): 1135 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1136 if self._match_text_seq("METRICS"): 1137 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1138 if self._match_text_seq("WHERE"): 1139 kwargs["where"] = self._parse_expression() 1140 1141 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- CAST_COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1143 class Tokenizer(tokens.Tokenizer): 1144 STRING_ESCAPES = ["\\", "'"] 1145 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1146 RAW_STRINGS = ["$$"] 1147 COMMENTS = ["--", "//", ("/*", "*/")] 1148 NESTED_COMMENTS = False 1149 1150 KEYWORDS = { 1151 **tokens.Tokenizer.KEYWORDS, 1152 "BYTEINT": TokenType.INT, 1153 "FILE://": TokenType.URI_START, 1154 "FILE FORMAT": TokenType.FILE_FORMAT, 1155 "GET": TokenType.GET, 1156 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1157 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1158 "MINUS": TokenType.EXCEPT, 1159 "NCHAR VARYING": TokenType.VARCHAR, 1160 "PUT": TokenType.PUT, 1161 "REMOVE": TokenType.COMMAND, 1162 "RM": TokenType.COMMAND, 1163 "SAMPLE": TokenType.TABLE_SAMPLE, 1164 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1165 "SQL_DOUBLE": TokenType.DOUBLE, 1166 "SQL_VARCHAR": TokenType.VARCHAR, 1167 "STAGE": TokenType.STAGE, 1168 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1169 "STREAMLIT": TokenType.STREAMLIT, 1170 "TAG": TokenType.TAG, 1171 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1172 "TOP": TokenType.TOP, 1173 "WAREHOUSE": TokenType.WAREHOUSE, 1174 } 1175 KEYWORDS.pop("/*+") 1176 1177 SINGLE_TOKENS = { 1178 **tokens.Tokenizer.SINGLE_TOKENS, 1179 "$": TokenType.PARAMETER, 1180 } 1181 1182 VAR_SINGLE_TOKENS = {"$"} 1183 1184 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1186 class Generator(generator.Generator): 1187 PARAMETER_TOKEN = "$" 1188 MATCHED_BY_SOURCE = False 1189 SINGLE_STRING_INTERVAL = True 1190 JOIN_HINTS = False 1191 TABLE_HINTS = False 1192 QUERY_HINTS = False 1193 AGGREGATE_FILTER_SUPPORTED = False 1194 SUPPORTS_TABLE_COPY = False 1195 COLLATE_IS_FUNC = True 1196 LIMIT_ONLY_LITERALS = True 1197 JSON_KEY_VALUE_PAIR_SEP = "," 1198 INSERT_OVERWRITE = " OVERWRITE INTO" 1199 STRUCT_DELIMITER = ("(", ")") 1200 COPY_PARAMS_ARE_WRAPPED = False 1201 COPY_PARAMS_EQ_REQUIRED = True 1202 STAR_EXCEPT = "EXCLUDE" 1203 SUPPORTS_EXPLODING_PROJECTIONS = False 1204 ARRAY_CONCAT_IS_VAR_LEN = False 1205 SUPPORTS_CONVERT_TIMEZONE = True 1206 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1207 SUPPORTS_MEDIAN = True 1208 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1209 SUPPORTS_DECODE_CASE = True 1210 IS_BOOL_ALLOWED = False 1211 1212 TRANSFORMS = { 1213 **generator.Generator.TRANSFORMS, 1214 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1215 exp.ArgMax: rename_func("MAX_BY"), 1216 exp.ArgMin: rename_func("MIN_BY"), 1217 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1218 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1219 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1220 exp.AtTimeZone: lambda self, e: self.func( 1221 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1222 ), 1223 exp.BitwiseOr: rename_func("BITOR"), 1224 exp.BitwiseXor: rename_func("BITXOR"), 1225 exp.BitwiseAnd: rename_func("BITAND"), 1226 exp.BitwiseAndAgg: rename_func("BITANDAGG"), 1227 exp.BitwiseOrAgg: rename_func("BITORAGG"), 1228 exp.BitwiseXorAgg: rename_func("BITXORAGG"), 1229 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1230 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1231 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1232 exp.DateAdd: date_delta_sql("DATEADD"), 1233 exp.DateDiff: date_delta_sql("DATEDIFF"), 1234 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1235 exp.DatetimeDiff: timestampdiff_sql, 1236 exp.DateStrToDate: datestrtodate_sql, 1237 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1238 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1239 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1240 exp.DayOfYear: rename_func("DAYOFYEAR"), 1241 exp.Explode: rename_func("FLATTEN"), 1242 exp.Extract: lambda self, e: self.func( 1243 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1244 ), 1245 exp.EuclideanDistance: rename_func("VECTOR_L2_DISTANCE"), 1246 exp.FileFormatProperty: lambda self, 1247 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1248 exp.FromTimeZone: lambda self, e: self.func( 1249 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1250 ), 1251 exp.GenerateSeries: lambda self, e: self.func( 1252 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1253 ), 1254 exp.GetExtract: rename_func("GET"), 1255 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1256 exp.If: if_sql(name="IFF", false_value="NULL"), 1257 exp.JSONExtractArray: _json_extract_value_array_sql, 1258 exp.JSONExtractScalar: lambda self, e: self.func( 1259 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1260 ), 1261 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1262 exp.JSONPathRoot: lambda *_: "", 1263 exp.JSONValueArray: _json_extract_value_array_sql, 1264 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1265 rename_func("EDITDISTANCE") 1266 ), 1267 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1268 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1269 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1270 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1271 exp.MakeInterval: no_make_interval_sql, 1272 exp.Max: max_or_greatest, 1273 exp.Min: min_or_least, 1274 exp.ParseJSON: lambda self, e: self.func( 1275 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1276 ), 1277 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1278 exp.PercentileCont: transforms.preprocess( 1279 [transforms.add_within_group_for_percentiles] 1280 ), 1281 exp.PercentileDisc: transforms.preprocess( 1282 [transforms.add_within_group_for_percentiles] 1283 ), 1284 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1285 exp.RegexpExtract: _regexpextract_sql, 1286 exp.RegexpExtractAll: _regexpextract_sql, 1287 exp.RegexpILike: _regexpilike_sql, 1288 exp.Rand: rename_func("RANDOM"), 1289 exp.Select: transforms.preprocess( 1290 [ 1291 transforms.eliminate_window_clause, 1292 transforms.eliminate_distinct_on, 1293 transforms.explode_projection_to_unnest(), 1294 transforms.eliminate_semi_and_anti_joins, 1295 _transform_generate_date_array, 1296 _qualify_unnested_columns, 1297 _eliminate_dot_variant_lookup, 1298 ] 1299 ), 1300 exp.SHA: rename_func("SHA1"), 1301 exp.SortArray: rename_func("ARRAY_SORT"), 1302 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1303 exp.StartsWith: rename_func("STARTSWITH"), 1304 exp.EndsWith: rename_func("ENDSWITH"), 1305 exp.StrPosition: lambda self, e: strposition_sql( 1306 self, e, func_name="CHARINDEX", supports_position=True 1307 ), 1308 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1309 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1310 exp.Stuff: rename_func("INSERT"), 1311 exp.StPoint: rename_func("ST_MAKEPOINT"), 1312 exp.TimeAdd: date_delta_sql("TIMEADD"), 1313 exp.Timestamp: no_timestamp_sql, 1314 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1315 exp.TimestampDiff: lambda self, e: self.func( 1316 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1317 ), 1318 exp.TimestampTrunc: timestamptrunc_sql(), 1319 exp.TimeStrToTime: timestrtotime_sql, 1320 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1321 exp.ToArray: rename_func("TO_ARRAY"), 1322 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1323 exp.ToDouble: rename_func("TO_DOUBLE"), 1324 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1325 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1326 exp.TsOrDsToDate: lambda self, e: self.func( 1327 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1328 ), 1329 exp.TsOrDsToTime: lambda self, e: self.func( 1330 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1331 ), 1332 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1333 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1334 exp.Uuid: rename_func("UUID_STRING"), 1335 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1336 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1337 exp.Xor: rename_func("BOOLXOR"), 1338 } 1339 1340 SUPPORTED_JSON_PATH_PARTS = { 1341 exp.JSONPathKey, 1342 exp.JSONPathRoot, 1343 exp.JSONPathSubscript, 1344 } 1345 1346 TYPE_MAPPING = { 1347 **generator.Generator.TYPE_MAPPING, 1348 exp.DataType.Type.NESTED: "OBJECT", 1349 exp.DataType.Type.STRUCT: "OBJECT", 1350 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1351 } 1352 1353 TOKEN_MAPPING = { 1354 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1355 } 1356 1357 PROPERTIES_LOCATION = { 1358 **generator.Generator.PROPERTIES_LOCATION, 1359 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1360 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1361 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1362 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1363 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1364 } 1365 1366 UNSUPPORTED_VALUES_EXPRESSIONS = { 1367 exp.Map, 1368 exp.StarMap, 1369 exp.Struct, 1370 exp.VarMap, 1371 } 1372 1373 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1374 1375 def with_properties(self, properties: exp.Properties) -> str: 1376 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1377 1378 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1379 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1380 values_as_table = False 1381 1382 return super().values_sql(expression, values_as_table=values_as_table) 1383 1384 def datatype_sql(self, expression: exp.DataType) -> str: 1385 expressions = expression.expressions 1386 if ( 1387 expressions 1388 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1389 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1390 ): 1391 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1392 return "OBJECT" 1393 1394 return super().datatype_sql(expression) 1395 1396 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1397 return self.func( 1398 "TO_NUMBER", 1399 expression.this, 1400 expression.args.get("format"), 1401 expression.args.get("precision"), 1402 expression.args.get("scale"), 1403 ) 1404 1405 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1406 milli = expression.args.get("milli") 1407 if milli is not None: 1408 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1409 expression.set("nano", milli_to_nano) 1410 1411 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1412 1413 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1414 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1415 return self.func("TO_GEOGRAPHY", expression.this) 1416 if expression.is_type(exp.DataType.Type.GEOMETRY): 1417 return self.func("TO_GEOMETRY", expression.this) 1418 1419 return super().cast_sql(expression, safe_prefix=safe_prefix) 1420 1421 def trycast_sql(self, expression: exp.TryCast) -> str: 1422 value = expression.this 1423 1424 if value.type is None: 1425 from sqlglot.optimizer.annotate_types import annotate_types 1426 1427 value = annotate_types(value, dialect=self.dialect) 1428 1429 # Snowflake requires that TRY_CAST's value be a string 1430 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1431 # if we can deduce that the value is a string, then we can generate TRY_CAST 1432 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1433 return super().trycast_sql(expression) 1434 1435 return self.cast_sql(expression) 1436 1437 def log_sql(self, expression: exp.Log) -> str: 1438 if not expression.expression: 1439 return self.func("LN", expression.this) 1440 1441 return super().log_sql(expression) 1442 1443 def unnest_sql(self, expression: exp.Unnest) -> str: 1444 unnest_alias = expression.args.get("alias") 1445 offset = expression.args.get("offset") 1446 1447 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1448 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1449 1450 columns = [ 1451 exp.to_identifier("seq"), 1452 exp.to_identifier("key"), 1453 exp.to_identifier("path"), 1454 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1455 value, 1456 exp.to_identifier("this"), 1457 ] 1458 1459 if unnest_alias: 1460 unnest_alias.set("columns", columns) 1461 else: 1462 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1463 1464 table_input = self.sql(expression.expressions[0]) 1465 if not table_input.startswith("INPUT =>"): 1466 table_input = f"INPUT => {table_input}" 1467 1468 expression_parent = expression.parent 1469 1470 explode = ( 1471 f"FLATTEN({table_input})" 1472 if isinstance(expression_parent, exp.Lateral) 1473 else f"TABLE(FLATTEN({table_input}))" 1474 ) 1475 alias = self.sql(unnest_alias) 1476 alias = f" AS {alias}" if alias else "" 1477 value = ( 1478 "" 1479 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1480 else f"{value} FROM " 1481 ) 1482 1483 return f"{value}{explode}{alias}" 1484 1485 def show_sql(self, expression: exp.Show) -> str: 1486 terse = "TERSE " if expression.args.get("terse") else "" 1487 history = " HISTORY" if expression.args.get("history") else "" 1488 like = self.sql(expression, "like") 1489 like = f" LIKE {like}" if like else "" 1490 1491 scope = self.sql(expression, "scope") 1492 scope = f" {scope}" if scope else "" 1493 1494 scope_kind = self.sql(expression, "scope_kind") 1495 if scope_kind: 1496 scope_kind = f" IN {scope_kind}" 1497 1498 starts_with = self.sql(expression, "starts_with") 1499 if starts_with: 1500 starts_with = f" STARTS WITH {starts_with}" 1501 1502 limit = self.sql(expression, "limit") 1503 1504 from_ = self.sql(expression, "from") 1505 if from_: 1506 from_ = f" FROM {from_}" 1507 1508 privileges = self.expressions(expression, key="privileges", flat=True) 1509 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1510 1511 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1512 1513 def describe_sql(self, expression: exp.Describe) -> str: 1514 # Default to table if kind is unknown 1515 kind_value = expression.args.get("kind") or "TABLE" 1516 kind = f" {kind_value}" if kind_value else "" 1517 this = f" {self.sql(expression, 'this')}" 1518 expressions = self.expressions(expression, flat=True) 1519 expressions = f" {expressions}" if expressions else "" 1520 return f"DESCRIBE{kind}{this}{expressions}" 1521 1522 def generatedasidentitycolumnconstraint_sql( 1523 self, expression: exp.GeneratedAsIdentityColumnConstraint 1524 ) -> str: 1525 start = expression.args.get("start") 1526 start = f" START {start}" if start else "" 1527 increment = expression.args.get("increment") 1528 increment = f" INCREMENT {increment}" if increment else "" 1529 1530 order = expression.args.get("order") 1531 if order is not None: 1532 order_clause = " ORDER" if order else " NOORDER" 1533 else: 1534 order_clause = "" 1535 1536 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1537 1538 def cluster_sql(self, expression: exp.Cluster) -> str: 1539 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1540 1541 def struct_sql(self, expression: exp.Struct) -> str: 1542 keys = [] 1543 values = [] 1544 1545 for i, e in enumerate(expression.expressions): 1546 if isinstance(e, exp.PropertyEQ): 1547 keys.append( 1548 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1549 ) 1550 values.append(e.expression) 1551 else: 1552 keys.append(exp.Literal.string(f"_{i}")) 1553 values.append(e) 1554 1555 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1556 1557 @unsupported_args("weight", "accuracy") 1558 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1559 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1560 1561 def alterset_sql(self, expression: exp.AlterSet) -> str: 1562 exprs = self.expressions(expression, flat=True) 1563 exprs = f" {exprs}" if exprs else "" 1564 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1565 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1566 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1567 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1568 tag = self.expressions(expression, key="tag", flat=True) 1569 tag = f" TAG {tag}" if tag else "" 1570 1571 return f"SET{exprs}{file_format}{copy_options}{tag}" 1572 1573 def strtotime_sql(self, expression: exp.StrToTime): 1574 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1575 return self.func( 1576 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1577 ) 1578 1579 def timestampsub_sql(self, expression: exp.TimestampSub): 1580 return self.sql( 1581 exp.TimestampAdd( 1582 this=expression.this, 1583 expression=expression.expression * -1, 1584 unit=expression.unit, 1585 ) 1586 ) 1587 1588 def jsonextract_sql(self, expression: exp.JSONExtract): 1589 this = expression.this 1590 1591 # JSON strings are valid coming from other dialects such as BQ so 1592 # for these cases we PARSE_JSON preemptively 1593 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1594 "requires_json" 1595 ): 1596 this = exp.ParseJSON(this=this) 1597 1598 return self.func( 1599 "GET_PATH", 1600 this, 1601 expression.expression, 1602 ) 1603 1604 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1605 this = expression.this 1606 if this.is_string: 1607 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1608 1609 return self.func("TO_CHAR", this, self.format_time(expression)) 1610 1611 def datesub_sql(self, expression: exp.DateSub) -> str: 1612 value = expression.expression 1613 if value: 1614 value.replace(value * (-1)) 1615 else: 1616 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1617 1618 return date_delta_sql("DATEADD")(self, expression) 1619 1620 def select_sql(self, expression: exp.Select) -> str: 1621 limit = expression.args.get("limit") 1622 offset = expression.args.get("offset") 1623 if offset and not limit: 1624 expression.limit(exp.Null(), copy=False) 1625 return super().select_sql(expression) 1626 1627 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1628 is_materialized = expression.find(exp.MaterializedProperty) 1629 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1630 1631 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1632 # For materialized views, COPY GRANTS is located *before* the columns list 1633 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1634 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1635 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1636 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1637 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1638 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1639 1640 this_name = self.sql(expression.this, "this") 1641 copy_grants = self.sql(copy_grants_property) 1642 this_schema = self.schema_columns_sql(expression.this) 1643 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1644 1645 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1646 1647 return super().createable_sql(expression, locations) 1648 1649 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1650 this = expression.this 1651 1652 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1653 # and add it later as part of the WITHIN GROUP clause 1654 order = this if isinstance(this, exp.Order) else None 1655 if order: 1656 expression.set("this", order.this.pop()) 1657 1658 expr_sql = super().arrayagg_sql(expression) 1659 1660 if order: 1661 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1662 1663 return expr_sql 1664 1665 def array_sql(self, expression: exp.Array) -> str: 1666 expressions = expression.expressions 1667 1668 first_expr = seq_get(expressions, 0) 1669 if isinstance(first_expr, exp.Select): 1670 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1671 if first_expr.text("kind").upper() == "STRUCT": 1672 object_construct_args = [] 1673 for expr in first_expr.expressions: 1674 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1675 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1676 name = expr.this if isinstance(expr, exp.Alias) else expr 1677 1678 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1679 1680 array_agg = exp.ArrayAgg( 1681 this=_build_object_construct(args=object_construct_args) 1682 ) 1683 1684 first_expr.set("kind", None) 1685 first_expr.set("expressions", [array_agg]) 1686 1687 return self.sql(first_expr.subquery()) 1688 1689 return inline_array_sql(self, expression) 1690 1691 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1692 zone = self.sql(expression, "this") 1693 if not zone: 1694 return super().currentdate_sql(expression) 1695 1696 expr = exp.Cast( 1697 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1698 to=exp.DataType(this=exp.DataType.Type.DATE), 1699 ) 1700 return self.sql(expr) 1701 1702 def dot_sql(self, expression: exp.Dot) -> str: 1703 this = expression.this 1704 1705 if not this.type: 1706 from sqlglot.optimizer.annotate_types import annotate_types 1707 1708 this = annotate_types(this, dialect=self.dialect) 1709 1710 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1711 # Generate colon notation for the top level STRUCT 1712 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1713 1714 return super().dot_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1384 def datatype_sql(self, expression: exp.DataType) -> str: 1385 expressions = expression.expressions 1386 if ( 1387 expressions 1388 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1389 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1390 ): 1391 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1392 return "OBJECT" 1393 1394 return super().datatype_sql(expression)
1405 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1406 milli = expression.args.get("milli") 1407 if milli is not None: 1408 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1409 expression.set("nano", milli_to_nano) 1410 1411 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1413 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1414 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1415 return self.func("TO_GEOGRAPHY", expression.this) 1416 if expression.is_type(exp.DataType.Type.GEOMETRY): 1417 return self.func("TO_GEOMETRY", expression.this) 1418 1419 return super().cast_sql(expression, safe_prefix=safe_prefix)
1421 def trycast_sql(self, expression: exp.TryCast) -> str: 1422 value = expression.this 1423 1424 if value.type is None: 1425 from sqlglot.optimizer.annotate_types import annotate_types 1426 1427 value = annotate_types(value, dialect=self.dialect) 1428 1429 # Snowflake requires that TRY_CAST's value be a string 1430 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1431 # if we can deduce that the value is a string, then we can generate TRY_CAST 1432 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1433 return super().trycast_sql(expression) 1434 1435 return self.cast_sql(expression)
1443 def unnest_sql(self, expression: exp.Unnest) -> str: 1444 unnest_alias = expression.args.get("alias") 1445 offset = expression.args.get("offset") 1446 1447 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1448 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1449 1450 columns = [ 1451 exp.to_identifier("seq"), 1452 exp.to_identifier("key"), 1453 exp.to_identifier("path"), 1454 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1455 value, 1456 exp.to_identifier("this"), 1457 ] 1458 1459 if unnest_alias: 1460 unnest_alias.set("columns", columns) 1461 else: 1462 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1463 1464 table_input = self.sql(expression.expressions[0]) 1465 if not table_input.startswith("INPUT =>"): 1466 table_input = f"INPUT => {table_input}" 1467 1468 expression_parent = expression.parent 1469 1470 explode = ( 1471 f"FLATTEN({table_input})" 1472 if isinstance(expression_parent, exp.Lateral) 1473 else f"TABLE(FLATTEN({table_input}))" 1474 ) 1475 alias = self.sql(unnest_alias) 1476 alias = f" AS {alias}" if alias else "" 1477 value = ( 1478 "" 1479 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1480 else f"{value} FROM " 1481 ) 1482 1483 return f"{value}{explode}{alias}"
1485 def show_sql(self, expression: exp.Show) -> str: 1486 terse = "TERSE " if expression.args.get("terse") else "" 1487 history = " HISTORY" if expression.args.get("history") else "" 1488 like = self.sql(expression, "like") 1489 like = f" LIKE {like}" if like else "" 1490 1491 scope = self.sql(expression, "scope") 1492 scope = f" {scope}" if scope else "" 1493 1494 scope_kind = self.sql(expression, "scope_kind") 1495 if scope_kind: 1496 scope_kind = f" IN {scope_kind}" 1497 1498 starts_with = self.sql(expression, "starts_with") 1499 if starts_with: 1500 starts_with = f" STARTS WITH {starts_with}" 1501 1502 limit = self.sql(expression, "limit") 1503 1504 from_ = self.sql(expression, "from") 1505 if from_: 1506 from_ = f" FROM {from_}" 1507 1508 privileges = self.expressions(expression, key="privileges", flat=True) 1509 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1510 1511 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1513 def describe_sql(self, expression: exp.Describe) -> str: 1514 # Default to table if kind is unknown 1515 kind_value = expression.args.get("kind") or "TABLE" 1516 kind = f" {kind_value}" if kind_value else "" 1517 this = f" {self.sql(expression, 'this')}" 1518 expressions = self.expressions(expression, flat=True) 1519 expressions = f" {expressions}" if expressions else "" 1520 return f"DESCRIBE{kind}{this}{expressions}"
1522 def generatedasidentitycolumnconstraint_sql( 1523 self, expression: exp.GeneratedAsIdentityColumnConstraint 1524 ) -> str: 1525 start = expression.args.get("start") 1526 start = f" START {start}" if start else "" 1527 increment = expression.args.get("increment") 1528 increment = f" INCREMENT {increment}" if increment else "" 1529 1530 order = expression.args.get("order") 1531 if order is not None: 1532 order_clause = " ORDER" if order else " NOORDER" 1533 else: 1534 order_clause = "" 1535 1536 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1541 def struct_sql(self, expression: exp.Struct) -> str: 1542 keys = [] 1543 values = [] 1544 1545 for i, e in enumerate(expression.expressions): 1546 if isinstance(e, exp.PropertyEQ): 1547 keys.append( 1548 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1549 ) 1550 values.append(e.expression) 1551 else: 1552 keys.append(exp.Literal.string(f"_{i}")) 1553 values.append(e) 1554 1555 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1561 def alterset_sql(self, expression: exp.AlterSet) -> str: 1562 exprs = self.expressions(expression, flat=True) 1563 exprs = f" {exprs}" if exprs else "" 1564 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1565 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1566 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1567 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1568 tag = self.expressions(expression, key="tag", flat=True) 1569 tag = f" TAG {tag}" if tag else "" 1570 1571 return f"SET{exprs}{file_format}{copy_options}{tag}"
1588 def jsonextract_sql(self, expression: exp.JSONExtract): 1589 this = expression.this 1590 1591 # JSON strings are valid coming from other dialects such as BQ so 1592 # for these cases we PARSE_JSON preemptively 1593 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1594 "requires_json" 1595 ): 1596 this = exp.ParseJSON(this=this) 1597 1598 return self.func( 1599 "GET_PATH", 1600 this, 1601 expression.expression, 1602 )
1611 def datesub_sql(self, expression: exp.DateSub) -> str: 1612 value = expression.expression 1613 if value: 1614 value.replace(value * (-1)) 1615 else: 1616 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1617 1618 return date_delta_sql("DATEADD")(self, expression)
1627 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1628 is_materialized = expression.find(exp.MaterializedProperty) 1629 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1630 1631 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1632 # For materialized views, COPY GRANTS is located *before* the columns list 1633 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1634 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1635 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1636 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1637 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1638 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1639 1640 this_name = self.sql(expression.this, "this") 1641 copy_grants = self.sql(copy_grants_property) 1642 this_schema = self.schema_columns_sql(expression.this) 1643 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1644 1645 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1646 1647 return super().createable_sql(expression, locations)
1649 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1650 this = expression.this 1651 1652 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1653 # and add it later as part of the WITHIN GROUP clause 1654 order = this if isinstance(this, exp.Order) else None 1655 if order: 1656 expression.set("this", order.this.pop()) 1657 1658 expr_sql = super().arrayagg_sql(expression) 1659 1660 if order: 1661 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1662 1663 return expr_sql
1665 def array_sql(self, expression: exp.Array) -> str: 1666 expressions = expression.expressions 1667 1668 first_expr = seq_get(expressions, 0) 1669 if isinstance(first_expr, exp.Select): 1670 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1671 if first_expr.text("kind").upper() == "STRUCT": 1672 object_construct_args = [] 1673 for expr in first_expr.expressions: 1674 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1675 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1676 name = expr.this if isinstance(expr, exp.Alias) else expr 1677 1678 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1679 1680 array_agg = exp.ArrayAgg( 1681 this=_build_object_construct(args=object_construct_args) 1682 ) 1683 1684 first_expr.set("kind", None) 1685 first_expr.set("expressions", [array_agg]) 1686 1687 return self.sql(first_expr.subquery()) 1688 1689 return inline_array_sql(self, expression)
1691 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1692 zone = self.sql(expression, "this") 1693 if not zone: 1694 return super().currentdate_sql(expression) 1695 1696 expr = exp.Cast( 1697 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1698 to=exp.DataType(this=exp.DataType.Type.DATE), 1699 ) 1700 return self.sql(expr)
1702 def dot_sql(self, expression: exp.Dot) -> str: 1703 this = expression.this 1704 1705 if not this.type: 1706 from sqlglot.optimizer.annotate_types import annotate_types 1707 1708 this = annotate_types(this, dialect=self.dialect) 1709 1710 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1711 # Generate colon notation for the top level STRUCT 1712 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1713 1714 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- UNICODE_SUBSTITUTE
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- MATCH_AGAINST_TABLE_PREFIX
- UNSUPPORTED_TYPES
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SAFE_JSON_PATH_KEY_RE
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- queryband_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- altersession_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- generateembedding_sql
- featuresattime_sql
- vectorsearch_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- revoke_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql
- getextract_sql
- datefromunixdate_sql
- space_sql
- buildproperty_sql
- refreshtriggerproperty_sql