sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ( 11 ErrorLevel, 12 ParseError, 13 TokenError, 14 concat_messages, 15 highlight_sql, 16 merge_errors, 17) 18from sqlglot.expressions import apply_index_offset 19from sqlglot.helper import ensure_list, i64, seq_get 20from sqlglot.trie import new_trie 21from sqlglot.time import format_time 22from sqlglot.tokens import Token, Tokenizer, TokenType 23from sqlglot.trie import TrieResult, in_trie 24from collections.abc import Sequence 25from builtins import type as Type 26 27if t.TYPE_CHECKING: 28 from sqlglot.expressions import ExpOrStr 29 from sqlglot._typing import E, BuilderArgs 30 from sqlglot.dialects.dialect import Dialect, DialectType 31 32 from re import Pattern 33 34 T = t.TypeVar("T") 35 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 36 37logger = logging.getLogger("sqlglot") 38 39OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]] 40 41# Used to detect alphabetical characters and +/- in timestamp literals 42TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 43 44 45def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 46 if len(args) == 1 and args[0].is_star: 47 return exp.StarMap(this=args[0]) 48 49 keys: list[ExpOrStr] = [] 50 values: list[ExpOrStr] = [] 51 for i in range(0, len(args), 2): 52 keys.append(args[i]) 53 values.append(args[i + 1]) 54 55 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 56 57 58def build_like(args: BuilderArgs) -> exp.Escape | exp.Like: 59 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 60 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 61 62 63def binary_range_parser( 64 expr_type: Type[exp.Expr], reverse_args: bool = False 65) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 66 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 67 expression = self._parse_bitwise() 68 if reverse_args: 69 this, expression = expression, this 70 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 71 72 return _parse_binary_range 73 74 75def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 76 # Default argument order is base, expression 77 this = seq_get(args, 0) 78 expression = seq_get(args, 1) 79 80 if expression: 81 if not dialect.LOG_BASE_FIRST: 82 this, expression = expression, this 83 return exp.Log(this=this, expression=expression) 84 85 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 86 87 88def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex: 89 arg = seq_get(args, 0) 90 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 91 92 93def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex: 94 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 95 arg = seq_get(args, 0) 96 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 97 98 99def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex: 100 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 101 arg = seq_get(args, 0) 102 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 103 104 105def build_extract_json_with_path( 106 expr_type: Type[E], 107) -> t.Callable[[BuilderArgs, Dialect], E]: 108 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 109 expression = expr_type( 110 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 111 ) 112 if len(args) > 2 and expr_type is exp.JSONExtract: 113 expression.set("expressions", args[2:]) 114 if expr_type is exp.JSONExtractScalar: 115 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 116 117 return expression 118 119 return _builder 120 121 122def build_mod(args: BuilderArgs) -> exp.Mod: 123 this = seq_get(args, 0) 124 expression = seq_get(args, 1) 125 126 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 127 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 128 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 129 130 return exp.Mod(this=this, expression=expression) 131 132 133def build_pad(args: BuilderArgs, is_left: bool = True): 134 return exp.Pad( 135 this=seq_get(args, 0), 136 expression=seq_get(args, 1), 137 fill_pattern=seq_get(args, 2), 138 is_left=is_left, 139 ) 140 141 142def build_array_constructor( 143 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 144) -> exp.Expr: 145 array_exp = exp_class(expressions=args) 146 147 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 148 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 149 150 return array_exp 151 152 153def build_convert_timezone( 154 args: BuilderArgs, default_source_tz: str | None = None 155) -> exp.ConvertTimezone | exp.Anonymous: 156 if len(args) == 2: 157 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 158 return exp.ConvertTimezone( 159 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 160 ) 161 162 return exp.ConvertTimezone.from_arg_list(args) 163 164 165def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 166 this, expression = seq_get(args, 0), seq_get(args, 1) 167 168 if expression and reverse_args: 169 this, expression = expression, this 170 171 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING") 172 173 174def build_coalesce( 175 args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None 176) -> exp.Coalesce: 177 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 178 179 180def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition: 181 return exp.StrPosition( 182 this=seq_get(args, 1), 183 substr=seq_get(args, 0), 184 position=seq_get(args, 2), 185 ) 186 187 188def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 189 """ 190 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 191 192 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 193 Others (DuckDB, PostgreSQL) create a new single-element array instead. 194 195 Args: 196 args: Function arguments [array, element] 197 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 198 199 Returns: 200 ArrayAppend expression with appropriate null_propagation flag 201 """ 202 return exp.ArrayAppend( 203 this=seq_get(args, 0), 204 expression=seq_get(args, 1), 205 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 206 ) 207 208 209def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 210 """ 211 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 212 213 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 214 Others (DuckDB, PostgreSQL) create a new single-element array instead. 215 216 Args: 217 args: Function arguments [array, element] 218 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 219 220 Returns: 221 ArrayPrepend expression with appropriate null_propagation flag 222 """ 223 return exp.ArrayPrepend( 224 this=seq_get(args, 0), 225 expression=seq_get(args, 1), 226 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 227 ) 228 229 230def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 231 """ 232 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 233 234 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 235 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 236 237 Args: 238 args: Function arguments [array1, array2, ...] (variadic) 239 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 240 241 Returns: 242 ArrayConcat expression with appropriate null_propagation flag 243 """ 244 return exp.ArrayConcat( 245 this=seq_get(args, 0), 246 expressions=args[1:], 247 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 248 ) 249 250 251def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 252 """ 253 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 254 255 Some dialects (Snowflake) return NULL when the removal value is NULL. 256 Others (DuckDB) may return empty array due to NULL comparison semantics. 257 258 Args: 259 args: Function arguments [array, value_to_remove] 260 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 261 262 Returns: 263 ArrayRemove expression with appropriate null_propagation flag 264 """ 265 return exp.ArrayRemove( 266 this=seq_get(args, 0), 267 expression=seq_get(args, 1), 268 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 269 ) 270 271 272def _resolve_dialect(dialect: DialectType) -> Dialect: 273 from sqlglot.dialects.dialect import Dialect 274 275 return Dialect.get_or_raise(dialect) 276 277 278def _unpivot_target(expr: exp.Expr) -> exp.Expr: 279 # UNPIVOT's pre-FOR values and FOR field are new output names, not column references. 280 if isinstance(expr, exp.Column) and not expr.table: 281 return expr.this 282 if isinstance(expr, exp.Tuple): 283 expr.set("expressions", [_unpivot_target(e) for e in expr.expressions]) 284 return expr 285 286 287SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL") 288 289 290class Parser: 291 """ 292 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 293 294 Args: 295 error_level: The desired error level. 296 Default: ErrorLevel.IMMEDIATE 297 error_message_context: The amount of context to capture from a query string when displaying 298 the error message (in number of characters). 299 Default: 100 300 max_errors: Maximum number of error messages to include in a raised ParseError. 301 This is only relevant if error_level is ErrorLevel.RAISE. 302 Default: 3 303 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 304 Set to -1 (default) to disable the check. 305 """ 306 307 __slots__ = ( 308 "error_level", 309 "error_message_context", 310 "max_errors", 311 "max_nodes", 312 "dialect", 313 "sql", 314 "errors", 315 "_tokens", 316 "_index", 317 "_curr", 318 "_next", 319 "_prev", 320 "_prev_comments", 321 "_pipe_cte_counter", 322 "_chunks", 323 "_chunk_index", 324 "_tokens_size", 325 "_node_count", 326 ) 327 328 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 329 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 330 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 331 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 332 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 333 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 334 ), 335 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 336 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 337 ), 338 "ARRAY_APPEND": build_array_append, 339 "ARRAY_CAT": build_array_concat, 340 "ARRAY_CONCAT": build_array_concat, 341 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 342 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 343 "ARRAY_PREPEND": build_array_prepend, 344 "ARRAY_REMOVE": build_array_remove, 345 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 346 "CONCAT": lambda args, dialect: exp.Concat( 347 expressions=args, 348 safe=not dialect.STRICT_STRING_CONCAT, 349 coalesce=dialect.CONCAT_COALESCE, 350 ), 351 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 352 expressions=args, 353 safe=not dialect.STRICT_STRING_CONCAT, 354 coalesce=dialect.CONCAT_WS_COALESCE, 355 ), 356 "CONVERT_TIMEZONE": build_convert_timezone, 357 "DATE_TO_DATE_STR": lambda args: exp.Cast( 358 this=seq_get(args, 0), 359 to=exp.DataType(this=exp.DType.TEXT), 360 ), 361 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 362 start=seq_get(args, 0), 363 end=seq_get(args, 1), 364 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 365 ), 366 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 367 is_string=dialect.UUID_IS_STRING_TYPE or None 368 ), 369 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 370 "GREATEST": lambda args, dialect: exp.Greatest( 371 this=seq_get(args, 0), 372 expressions=args[1:], 373 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 374 ), 375 "LEAST": lambda args, dialect: exp.Least( 376 this=seq_get(args, 0), 377 expressions=args[1:], 378 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 379 ), 380 "HEX": build_hex, 381 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 382 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 383 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 384 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 385 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 386 ), 387 "LIKE": build_like, 388 "LOG": build_logarithm, 389 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 390 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 391 "LOWER": build_lower, 392 "LPAD": lambda args: build_pad(args), 393 "LEFTPAD": lambda args: build_pad(args), 394 "LTRIM": lambda args: build_trim(args), 395 "MOD": build_mod, 396 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 397 "RPAD": lambda args: build_pad(args, is_left=False), 398 "RTRIM": lambda args: build_trim(args, is_left=False), 399 "SCOPE_RESOLUTION": lambda args: ( 400 exp.ScopeResolution(expression=seq_get(args, 0)) 401 if len(args) != 2 402 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 403 ), 404 "STRPOS": exp.StrPosition.from_arg_list, 405 "CHARINDEX": lambda args: build_locate_strposition(args), 406 "INSTR": exp.StrPosition.from_arg_list, 407 "LOCATE": lambda args: build_locate_strposition(args), 408 "TIME_TO_TIME_STR": lambda args: exp.Cast( 409 this=seq_get(args, 0), 410 to=exp.DataType(this=exp.DType.TEXT), 411 ), 412 "TO_HEX": build_hex, 413 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 414 this=exp.Cast( 415 this=seq_get(args, 0), 416 to=exp.DataType(this=exp.DType.TEXT), 417 ), 418 start=exp.Literal.number(1), 419 length=exp.Literal.number(10), 420 ), 421 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 422 "UPPER": build_upper, 423 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 424 "UUID_STRING": lambda args, dialect: exp.Uuid( 425 this=seq_get(args, 0), 426 name=seq_get(args, 1), 427 is_string=dialect.UUID_IS_STRING_TYPE or None, 428 ), 429 "VAR_MAP": build_var_map, 430 } 431 432 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 433 TokenType.CURRENT_DATE: exp.CurrentDate, 434 TokenType.CURRENT_DATETIME: exp.CurrentDate, 435 TokenType.CURRENT_TIME: exp.CurrentTime, 436 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 437 TokenType.CURRENT_USER: exp.CurrentUser, 438 TokenType.CURRENT_ROLE: exp.CurrentRole, 439 } 440 441 STRUCT_TYPE_TOKENS: t.ClassVar = { 442 TokenType.NESTED, 443 TokenType.OBJECT, 444 TokenType.STRUCT, 445 TokenType.UNION, 446 } 447 448 NESTED_TYPE_TOKENS: t.ClassVar = { 449 TokenType.ARRAY, 450 TokenType.LIST, 451 TokenType.LOWCARDINALITY, 452 TokenType.MAP, 453 TokenType.NULLABLE, 454 TokenType.RANGE, 455 *STRUCT_TYPE_TOKENS, 456 } 457 458 ENUM_TYPE_TOKENS: t.ClassVar = { 459 TokenType.DYNAMIC, 460 TokenType.ENUM, 461 TokenType.ENUM8, 462 TokenType.ENUM16, 463 } 464 465 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 466 TokenType.AGGREGATEFUNCTION, 467 TokenType.SIMPLEAGGREGATEFUNCTION, 468 } 469 470 TYPE_TOKENS: t.ClassVar = { 471 TokenType.BIT, 472 TokenType.BOOLEAN, 473 TokenType.TINYINT, 474 TokenType.UTINYINT, 475 TokenType.SMALLINT, 476 TokenType.USMALLINT, 477 TokenType.INT, 478 TokenType.UINT, 479 TokenType.BIGINT, 480 TokenType.UBIGINT, 481 TokenType.BIGNUM, 482 TokenType.INT128, 483 TokenType.UINT128, 484 TokenType.INT256, 485 TokenType.UINT256, 486 TokenType.MEDIUMINT, 487 TokenType.UMEDIUMINT, 488 TokenType.FIXEDSTRING, 489 TokenType.FLOAT, 490 TokenType.DOUBLE, 491 TokenType.UDOUBLE, 492 TokenType.CHAR, 493 TokenType.NCHAR, 494 TokenType.VARCHAR, 495 TokenType.NVARCHAR, 496 TokenType.BPCHAR, 497 TokenType.TEXT, 498 TokenType.MEDIUMTEXT, 499 TokenType.LONGTEXT, 500 TokenType.BLOB, 501 TokenType.MEDIUMBLOB, 502 TokenType.LONGBLOB, 503 TokenType.BINARY, 504 TokenType.VARBINARY, 505 TokenType.JSON, 506 TokenType.JSONB, 507 TokenType.INTERVAL, 508 TokenType.TINYBLOB, 509 TokenType.TINYTEXT, 510 TokenType.TIME, 511 TokenType.TIMETZ, 512 TokenType.TIME_NS, 513 TokenType.TIMESTAMP, 514 TokenType.TIMESTAMP_S, 515 TokenType.TIMESTAMP_MS, 516 TokenType.TIMESTAMP_NS, 517 TokenType.TIMESTAMPTZ, 518 TokenType.TIMESTAMPLTZ, 519 TokenType.TIMESTAMPNTZ, 520 TokenType.DATETIME, 521 TokenType.DATETIME2, 522 TokenType.DATETIME64, 523 TokenType.SMALLDATETIME, 524 TokenType.DATE, 525 TokenType.DATE32, 526 TokenType.INT4RANGE, 527 TokenType.INT4MULTIRANGE, 528 TokenType.INT8RANGE, 529 TokenType.INT8MULTIRANGE, 530 TokenType.NUMRANGE, 531 TokenType.NUMMULTIRANGE, 532 TokenType.TSRANGE, 533 TokenType.TSMULTIRANGE, 534 TokenType.TSTZRANGE, 535 TokenType.TSTZMULTIRANGE, 536 TokenType.DATERANGE, 537 TokenType.DATEMULTIRANGE, 538 TokenType.DECIMAL, 539 TokenType.DECIMAL32, 540 TokenType.DECIMAL64, 541 TokenType.DECIMAL128, 542 TokenType.DECIMAL256, 543 TokenType.DECFLOAT, 544 TokenType.UDECIMAL, 545 TokenType.BIGDECIMAL, 546 TokenType.UUID, 547 TokenType.GEOGRAPHY, 548 TokenType.GEOGRAPHYPOINT, 549 TokenType.GEOMETRY, 550 TokenType.POINT, 551 TokenType.RING, 552 TokenType.LINESTRING, 553 TokenType.MULTILINESTRING, 554 TokenType.POLYGON, 555 TokenType.MULTIPOLYGON, 556 TokenType.HLLSKETCH, 557 TokenType.HSTORE, 558 TokenType.PSEUDO_TYPE, 559 TokenType.SUPER, 560 TokenType.SERIAL, 561 TokenType.SMALLSERIAL, 562 TokenType.BIGSERIAL, 563 TokenType.XML, 564 TokenType.YEAR, 565 TokenType.USERDEFINED, 566 TokenType.MONEY, 567 TokenType.SMALLMONEY, 568 TokenType.ROWVERSION, 569 TokenType.IMAGE, 570 TokenType.VARIANT, 571 TokenType.VECTOR, 572 TokenType.VOID, 573 TokenType.OBJECT, 574 TokenType.OBJECT_IDENTIFIER, 575 TokenType.INET, 576 TokenType.IPADDRESS, 577 TokenType.IPPREFIX, 578 TokenType.IPV4, 579 TokenType.IPV6, 580 TokenType.UNKNOWN, 581 TokenType.NOTHING, 582 TokenType.NULL, 583 TokenType.NAME, 584 TokenType.TDIGEST, 585 TokenType.DYNAMIC, 586 *ENUM_TYPE_TOKENS, 587 *NESTED_TYPE_TOKENS, 588 *AGGREGATE_TYPE_TOKENS, 589 } 590 591 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 592 TokenType.BIGINT: TokenType.UBIGINT, 593 TokenType.INT: TokenType.UINT, 594 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 595 TokenType.SMALLINT: TokenType.USMALLINT, 596 TokenType.TINYINT: TokenType.UTINYINT, 597 TokenType.DECIMAL: TokenType.UDECIMAL, 598 TokenType.DOUBLE: TokenType.UDOUBLE, 599 } 600 601 SUBQUERY_PREDICATES: t.ClassVar = { 602 TokenType.ANY: exp.Any, 603 TokenType.ALL: exp.All, 604 TokenType.EXISTS: exp.Exists, 605 TokenType.SOME: exp.Any, 606 } 607 608 SUBQUERY_TOKENS: t.ClassVar = { 609 TokenType.SELECT, 610 TokenType.WITH, 611 TokenType.FROM, 612 } 613 614 RESERVED_TOKENS: t.ClassVar = { 615 *Tokenizer.SINGLE_TOKENS.values(), 616 TokenType.SELECT, 617 } - {TokenType.IDENTIFIER} 618 619 DB_CREATABLES: t.ClassVar = { 620 TokenType.DATABASE, 621 TokenType.DICTIONARY, 622 TokenType.FILE_FORMAT, 623 TokenType.MODEL, 624 TokenType.NAMESPACE, 625 TokenType.SCHEMA, 626 TokenType.SEMANTIC_VIEW, 627 TokenType.SEQUENCE, 628 TokenType.SINK, 629 TokenType.SOURCE, 630 TokenType.STAGE, 631 TokenType.STORAGE_INTEGRATION, 632 TokenType.STREAMLIT, 633 TokenType.TABLE, 634 TokenType.TAG, 635 TokenType.VIEW, 636 TokenType.WAREHOUSE, 637 } 638 639 CREATABLES: t.ClassVar = { 640 TokenType.COLUMN, 641 TokenType.CONSTRAINT, 642 TokenType.FOREIGN_KEY, 643 TokenType.FUNCTION, 644 TokenType.INDEX, 645 TokenType.PROCEDURE, 646 TokenType.TRIGGER, 647 TokenType.TYPE, 648 *DB_CREATABLES, 649 } 650 651 TRIGGER_EVENTS: t.ClassVar = { 652 TokenType.INSERT, 653 TokenType.UPDATE, 654 TokenType.DELETE, 655 TokenType.TRUNCATE, 656 } 657 658 ALTERABLES: t.ClassVar = { 659 TokenType.INDEX, 660 TokenType.TABLE, 661 TokenType.VIEW, 662 TokenType.SESSION, 663 } 664 665 # Tokens that can represent identifiers 666 ID_VAR_TOKENS: t.ClassVar[set] = { 667 TokenType.ALL, 668 TokenType.ANALYZE, 669 TokenType.ATTACH, 670 TokenType.VAR, 671 TokenType.ANTI, 672 TokenType.APPLY, 673 TokenType.ASC, 674 TokenType.ASOF, 675 TokenType.AUTO_INCREMENT, 676 TokenType.BEGIN, 677 TokenType.BPCHAR, 678 TokenType.CACHE, 679 TokenType.CASE, 680 TokenType.COLLATE, 681 TokenType.COMMAND, 682 TokenType.COMMENT, 683 TokenType.COMMIT, 684 TokenType.CONSTRAINT, 685 TokenType.COPY, 686 TokenType.CUBE, 687 TokenType.CURRENT_SCHEMA, 688 TokenType.DEFAULT, 689 TokenType.DELETE, 690 TokenType.DESC, 691 TokenType.DESCRIBE, 692 TokenType.DETACH, 693 TokenType.DICTIONARY, 694 TokenType.DIV, 695 TokenType.END, 696 TokenType.EXECUTE, 697 TokenType.EXPORT, 698 TokenType.ESCAPE, 699 TokenType.FALSE, 700 TokenType.FIRST, 701 TokenType.FILE, 702 TokenType.FILTER, 703 TokenType.FINAL, 704 TokenType.FORMAT, 705 TokenType.FULL, 706 TokenType.GET, 707 TokenType.IDENTIFIER, 708 TokenType.INOUT, 709 TokenType.IS, 710 TokenType.ISNULL, 711 TokenType.INTERVAL, 712 TokenType.KEEP, 713 TokenType.KILL, 714 TokenType.LEFT, 715 TokenType.LIMIT, 716 TokenType.LOAD, 717 TokenType.LOCK, 718 TokenType.MATCH, 719 TokenType.MERGE, 720 TokenType.NATURAL, 721 TokenType.NEXT, 722 TokenType.OFFSET, 723 TokenType.OPERATOR, 724 TokenType.ORDINALITY, 725 TokenType.OVER, 726 TokenType.OVERLAPS, 727 TokenType.OVERWRITE, 728 TokenType.PARTITION, 729 TokenType.PERCENT, 730 TokenType.PIVOT, 731 TokenType.PRAGMA, 732 TokenType.PUT, 733 TokenType.RANGE, 734 TokenType.RECURSIVE, 735 TokenType.REFERENCES, 736 TokenType.REFRESH, 737 TokenType.RENAME, 738 TokenType.REPLACE, 739 TokenType.RIGHT, 740 TokenType.ROLLUP, 741 TokenType.ROW, 742 TokenType.ROWS, 743 TokenType.SEMI, 744 TokenType.SET, 745 TokenType.SETTINGS, 746 TokenType.SHOW, 747 TokenType.STREAM, 748 TokenType.STREAMLIT, 749 TokenType.TEMPORARY, 750 TokenType.TOP, 751 TokenType.TRUE, 752 TokenType.TRUNCATE, 753 TokenType.UNIQUE, 754 TokenType.UNNEST, 755 TokenType.UNPIVOT, 756 TokenType.UPDATE, 757 TokenType.USE, 758 TokenType.VOLATILE, 759 TokenType.WINDOW, 760 TokenType.CURRENT_CATALOG, 761 TokenType.LOCALTIME, 762 TokenType.LOCALTIMESTAMP, 763 TokenType.SESSION_USER, 764 TokenType.STRAIGHT_JOIN, 765 *ALTERABLES, 766 *CREATABLES, 767 *SUBQUERY_PREDICATES, 768 *TYPE_TOKENS, 769 *NO_PAREN_FUNCTIONS, 770 } - {TokenType.UNION} 771 772 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 773 TokenType.ANTI, 774 TokenType.ASOF, 775 TokenType.FULL, 776 TokenType.LEFT, 777 TokenType.LOCK, 778 TokenType.NATURAL, 779 TokenType.RIGHT, 780 TokenType.SEMI, 781 TokenType.WINDOW, 782 } 783 784 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 785 786 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 787 788 ARRAY_CONSTRUCTORS: t.ClassVar = { 789 "ARRAY": exp.Array, 790 "LIST": exp.List, 791 } 792 793 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 794 795 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 796 797 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 798 799 # Tokens that indicate a simple column reference 800 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 801 802 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 803 804 # Postfix tokens that prevent the bare column fast path 805 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 806 { 807 TokenType.L_PAREN, 808 TokenType.L_BRACKET, 809 TokenType.L_BRACE, 810 TokenType.COLON, 811 TokenType.JOIN_MARKER, 812 } 813 ) 814 815 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 816 { 817 TokenType.L_PAREN, 818 TokenType.L_BRACKET, 819 TokenType.L_BRACE, 820 TokenType.PIVOT, 821 TokenType.UNPIVOT, 822 TokenType.TABLE_SAMPLE, 823 } 824 ) 825 826 FUNC_TOKENS: t.ClassVar = { 827 TokenType.COLLATE, 828 TokenType.COMMAND, 829 TokenType.CURRENT_DATE, 830 TokenType.CURRENT_DATETIME, 831 TokenType.CURRENT_SCHEMA, 832 TokenType.CURRENT_TIMESTAMP, 833 TokenType.CURRENT_TIME, 834 TokenType.CURRENT_USER, 835 TokenType.CURRENT_CATALOG, 836 TokenType.FILTER, 837 TokenType.FIRST, 838 TokenType.FORMAT, 839 TokenType.GET, 840 TokenType.GLOB, 841 TokenType.IDENTIFIER, 842 TokenType.INDEX, 843 TokenType.ISNULL, 844 TokenType.ILIKE, 845 TokenType.INSERT, 846 TokenType.LIKE, 847 TokenType.LOCALTIME, 848 TokenType.LOCALTIMESTAMP, 849 TokenType.MERGE, 850 TokenType.NEXT, 851 TokenType.OFFSET, 852 TokenType.PRIMARY_KEY, 853 TokenType.RANGE, 854 TokenType.REPLACE, 855 TokenType.RLIKE, 856 TokenType.ROW, 857 TokenType.SESSION_USER, 858 TokenType.UNNEST, 859 TokenType.VAR, 860 TokenType.LEFT, 861 TokenType.RIGHT, 862 TokenType.SEQUENCE, 863 TokenType.DATE, 864 TokenType.DATETIME, 865 TokenType.TABLE, 866 TokenType.TIMESTAMP, 867 TokenType.TIMESTAMPTZ, 868 TokenType.TRUNCATE, 869 TokenType.UTC_DATE, 870 TokenType.UTC_TIME, 871 TokenType.UTC_TIMESTAMP, 872 TokenType.WINDOW, 873 TokenType.XOR, 874 *TYPE_TOKENS, 875 *SUBQUERY_PREDICATES, 876 } 877 878 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 879 TokenType.AND: exp.And, 880 } 881 882 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 883 TokenType.COLON_EQ: exp.PropertyEQ, 884 } 885 886 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 887 TokenType.OR: exp.Or, 888 } 889 890 EQUALITY: t.ClassVar = { 891 TokenType.EQ: exp.EQ, 892 TokenType.NEQ: exp.NEQ, 893 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 894 } 895 896 COMPARISON: t.ClassVar = { 897 TokenType.GT: exp.GT, 898 TokenType.GTE: exp.GTE, 899 TokenType.LT: exp.LT, 900 TokenType.LTE: exp.LTE, 901 } 902 903 BITWISE: t.ClassVar = { 904 TokenType.AMP: exp.BitwiseAnd, 905 TokenType.CARET: exp.BitwiseXor, 906 TokenType.PIPE: exp.BitwiseOr, 907 } 908 909 TERM: t.ClassVar = { 910 TokenType.DASH: exp.Sub, 911 TokenType.PLUS: exp.Add, 912 TokenType.MOD: exp.Mod, 913 TokenType.COLLATE: exp.Collate, 914 } 915 916 FACTOR: t.ClassVar = { 917 TokenType.DIV: exp.IntDiv, 918 TokenType.LR_ARROW: exp.Distance, 919 TokenType.LLRR_ARROW: exp.DistanceNd, 920 TokenType.SLASH: exp.Div, 921 TokenType.STAR: exp.Mul, 922 } 923 924 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 925 926 TIMES: t.ClassVar = { 927 TokenType.TIME, 928 TokenType.TIMETZ, 929 } 930 931 TIMESTAMPS: t.ClassVar = { 932 TokenType.TIMESTAMP, 933 TokenType.TIMESTAMPNTZ, 934 TokenType.TIMESTAMPTZ, 935 TokenType.TIMESTAMPLTZ, 936 *TIMES, 937 } 938 939 SET_OPERATIONS: t.ClassVar = { 940 TokenType.UNION, 941 TokenType.INTERSECT, 942 TokenType.EXCEPT, 943 } 944 945 JOIN_METHODS: t.ClassVar = { 946 TokenType.ASOF, 947 TokenType.NATURAL, 948 TokenType.POSITIONAL, 949 } 950 951 JOIN_SIDES: t.ClassVar = { 952 TokenType.LEFT, 953 TokenType.RIGHT, 954 TokenType.FULL, 955 } 956 957 JOIN_KINDS: t.ClassVar = { 958 TokenType.ANTI, 959 TokenType.CROSS, 960 TokenType.INNER, 961 TokenType.OUTER, 962 TokenType.SEMI, 963 TokenType.STRAIGHT_JOIN, 964 } 965 966 JOIN_HINTS: t.ClassVar[set[str]] = set() 967 968 # Tokens that unambiguously end a table reference on the fast path 969 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 970 { 971 TokenType.COMMA, 972 TokenType.GROUP_BY, 973 TokenType.HAVING, 974 TokenType.JOIN, 975 TokenType.LIMIT, 976 TokenType.ON, 977 TokenType.ORDER_BY, 978 TokenType.R_PAREN, 979 TokenType.SEMICOLON, 980 TokenType.SENTINEL, 981 TokenType.WHERE, 982 *SET_OPERATIONS, 983 *JOIN_KINDS, 984 *JOIN_METHODS, 985 *JOIN_SIDES, 986 } 987 ) 988 989 LAMBDAS: t.ClassVar = { 990 TokenType.ARROW: lambda self, expressions: self.expression( 991 exp.Lambda( 992 this=self._replace_lambda( 993 self._parse_disjunction(), 994 expressions, 995 ), 996 expressions=expressions, 997 ) 998 ), 999 TokenType.FARROW: lambda self, expressions: self.expression( 1000 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 1001 ), 1002 } 1003 1004 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 1005 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 1006 1007 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 1008 1009 COLUMN_OPERATORS: t.ClassVar = { 1010 TokenType.DOT: None, 1011 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 1012 TokenType.DCOLON: lambda self, this, to: self.build_cast( 1013 strict=self.STRICT_CAST, this=this, to=to 1014 ), 1015 TokenType.ARROW: lambda self, this, path: self.expression( 1016 exp.JSONExtract( 1017 this=this, 1018 expression=self.dialect.to_json_path(path), 1019 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1020 ) 1021 ), 1022 TokenType.DARROW: lambda self, this, path: self.expression( 1023 exp.JSONExtractScalar( 1024 this=this, 1025 expression=self.dialect.to_json_path(path), 1026 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1027 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1028 ) 1029 ), 1030 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1031 exp.JSONBExtract(this=this, expression=path) 1032 ), 1033 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1034 exp.JSONBExtractScalar(this=this, expression=path) 1035 ), 1036 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1037 exp.JSONBContains(this=this, expression=key) 1038 ), 1039 } 1040 1041 CAST_COLUMN_OPERATORS: t.ClassVar = { 1042 TokenType.DOTCOLON, 1043 TokenType.DCOLON, 1044 } 1045 1046 EXPRESSION_PARSERS: t.ClassVar = { 1047 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1048 exp.Column: lambda self: self._parse_column(), 1049 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1050 exp.Condition: lambda self: self._parse_disjunction(), 1051 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1052 exp.Expr: lambda self: self._parse_expression(), 1053 exp.From: lambda self: self._parse_from(joins=True), 1054 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1055 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1056 exp.Group: lambda self: self._parse_group(), 1057 exp.Having: lambda self: self._parse_having(), 1058 exp.Hint: lambda self: self._parse_hint_body(), 1059 exp.Identifier: lambda self: self._parse_id_var(), 1060 exp.Join: lambda self: self._parse_join(), 1061 exp.Lambda: lambda self: self._parse_lambda(), 1062 exp.Lateral: lambda self: self._parse_lateral(), 1063 exp.Limit: lambda self: self._parse_limit(), 1064 exp.Offset: lambda self: self._parse_offset(), 1065 exp.Order: lambda self: self._parse_order(), 1066 exp.Ordered: lambda self: self._parse_ordered(), 1067 exp.Properties: lambda self: self._parse_properties(), 1068 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1069 exp.Qualify: lambda self: self._parse_qualify(), 1070 exp.Returning: lambda self: self._parse_returning(), 1071 exp.Select: lambda self: self._parse_select(), 1072 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1073 exp.Table: lambda self: self._parse_table_parts(), 1074 exp.TableAlias: lambda self: self._parse_table_alias(), 1075 exp.Tuple: lambda self: self._parse_value(values=False), 1076 exp.Whens: lambda self: self._parse_when_matched(), 1077 exp.Where: lambda self: self._parse_where(), 1078 exp.Window: lambda self: self._parse_named_window(), 1079 exp.With: lambda self: self._parse_with(), 1080 } 1081 1082 STATEMENT_PARSERS: t.ClassVar = { 1083 TokenType.ALTER: lambda self: self._parse_alter(), 1084 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1085 TokenType.BEGIN: lambda self: self._parse_transaction(), 1086 TokenType.CACHE: lambda self: self._parse_cache(), 1087 TokenType.COMMENT: lambda self: self._parse_comment(), 1088 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1089 TokenType.COPY: lambda self: self._parse_copy(), 1090 TokenType.CREATE: lambda self: self._parse_create(), 1091 TokenType.DELETE: lambda self: self._parse_delete(), 1092 TokenType.DESC: lambda self: self._parse_describe(), 1093 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1094 TokenType.DROP: lambda self: self._parse_drop(), 1095 TokenType.GRANT: lambda self: self._parse_grant(), 1096 TokenType.REVOKE: lambda self: self._parse_revoke(), 1097 TokenType.INSERT: lambda self: self._parse_insert(), 1098 TokenType.KILL: lambda self: self._parse_kill(), 1099 TokenType.LOAD: lambda self: self._parse_load(), 1100 TokenType.MERGE: lambda self: self._parse_merge(), 1101 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1102 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1103 TokenType.REFRESH: lambda self: self._parse_refresh(), 1104 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1105 TokenType.SET: lambda self: self._parse_set(), 1106 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1107 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1108 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1109 TokenType.UPDATE: lambda self: self._parse_update(), 1110 TokenType.USE: lambda self: self._parse_use(), 1111 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1112 } 1113 1114 UNARY_PARSERS: t.ClassVar = { 1115 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1116 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1117 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1118 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1119 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1120 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1121 } 1122 1123 STRING_PARSERS: t.ClassVar = { 1124 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1125 exp.RawString(this=token.text), token 1126 ), 1127 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1128 exp.National(this=token.text), token 1129 ), 1130 TokenType.RAW_STRING: lambda self, token: self.expression( 1131 exp.RawString(this=token.text), token 1132 ), 1133 TokenType.STRING: lambda self, token: self.expression( 1134 exp.Literal(this=token.text, is_string=True), token 1135 ), 1136 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1137 exp.UnicodeString( 1138 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1139 ), 1140 token, 1141 ), 1142 } 1143 1144 NUMERIC_PARSERS: t.ClassVar = { 1145 TokenType.BIT_STRING: lambda self, token: self.expression( 1146 exp.BitString(this=token.text), token 1147 ), 1148 TokenType.BYTE_STRING: lambda self, token: self.expression( 1149 exp.ByteString( 1150 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1151 ), 1152 token, 1153 ), 1154 TokenType.HEX_STRING: lambda self, token: self.expression( 1155 exp.HexString( 1156 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1157 ), 1158 token, 1159 ), 1160 TokenType.NUMBER: lambda self, token: self.expression( 1161 exp.Literal(this=token.text, is_string=False), token 1162 ), 1163 } 1164 1165 PRIMARY_PARSERS: t.ClassVar = { 1166 **STRING_PARSERS, 1167 **NUMERIC_PARSERS, 1168 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1169 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1170 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1171 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1172 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1173 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1174 } 1175 1176 PLACEHOLDER_PARSERS: t.ClassVar = { 1177 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1178 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1179 TokenType.COLON: lambda self: ( 1180 self.expression(exp.Placeholder(this=self._prev.text)) 1181 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1182 else None 1183 ), 1184 } 1185 1186 RANGE_PARSERS: t.ClassVar = { 1187 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1188 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1189 TokenType.GLOB: binary_range_parser(exp.Glob), 1190 TokenType.ILIKE: binary_range_parser(exp.ILike), 1191 TokenType.IN: lambda self, this: self._parse_in(this), 1192 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1193 TokenType.IS: lambda self, this: self._parse_is(this), 1194 TokenType.LIKE: binary_range_parser(exp.Like), 1195 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1196 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1197 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1198 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1199 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1200 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1201 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1202 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1203 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1204 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1205 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1206 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1207 } 1208 1209 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1210 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1211 "AS": lambda self, query: self._build_pipe_cte( 1212 query, [exp.Star()], self._parse_table_alias() 1213 ), 1214 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1215 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1216 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1217 "ORDER BY": lambda self, query: query.order_by( 1218 self._parse_order(), append=False, copy=False 1219 ), 1220 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1221 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1222 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1223 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1224 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1225 } 1226 1227 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1228 "ALLOWED_VALUES": lambda self: self.expression( 1229 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1230 ), 1231 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1232 "AUTO": lambda self: self._parse_auto_property(), 1233 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1234 "BACKUP": lambda self: self.expression( 1235 exp.BackupProperty(this=self._parse_var(any_token=True)) 1236 ), 1237 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1238 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1239 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1240 "CHECKSUM": lambda self: self._parse_checksum(), 1241 "CLUSTER BY": lambda self: self._parse_cluster_property(), 1242 "CLUSTERED": lambda self: self._parse_clustered_by(), 1243 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1244 exp.CollateProperty, **kwargs 1245 ), 1246 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1247 "CONTAINS": lambda self: self._parse_contains_property(), 1248 "COPY": lambda self: self._parse_copy_property(), 1249 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1250 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1251 "DEFINER": lambda self: self._parse_definer(), 1252 "DETERMINISTIC": lambda self: self.expression( 1253 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1254 ), 1255 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1256 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1257 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1258 "DISTKEY": lambda self: self._parse_distkey(), 1259 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1260 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1261 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1262 "ENVIRONMENT": lambda self: self.expression( 1263 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1264 ), 1265 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1266 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1267 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1268 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1269 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1270 "FREESPACE": lambda self: self._parse_freespace(), 1271 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1272 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1273 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1274 "IMMUTABLE": lambda self: self.expression( 1275 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1276 ), 1277 "INHERITS": lambda self: self.expression( 1278 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1279 ), 1280 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1281 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1282 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1283 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1284 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1285 "LIKE": lambda self: self._parse_create_like(), 1286 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1287 "LOCK": lambda self: self._parse_locking(), 1288 "LOCKING": lambda self: self._parse_locking(), 1289 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1290 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1291 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1292 "MODIFIES": lambda self: self._parse_modifies_property(), 1293 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1294 "NO": lambda self: self._parse_no_property(), 1295 "ON": lambda self: self._parse_on_property(), 1296 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1297 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1298 "PARTITION": lambda self: self._parse_partitioned_of(), 1299 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1300 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1301 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1302 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1303 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1304 "READS": lambda self: self._parse_reads_property(), 1305 "REMOTE": lambda self: self._parse_remote_with_connection(), 1306 "RETURNS": lambda self: self._parse_returns(), 1307 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1308 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1309 "ROW": lambda self: self._parse_row(), 1310 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1311 "SAMPLE": lambda self: self.expression( 1312 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1313 ), 1314 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1315 "SECURITY": lambda self: self._parse_sql_security(), 1316 "SQL SECURITY": lambda self: self._parse_sql_security(), 1317 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1318 "SETTINGS": lambda self: self._parse_settings_property(), 1319 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1320 "SORTKEY": lambda self: self._parse_sortkey(), 1321 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1322 "STABLE": lambda self: self.expression( 1323 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1324 ), 1325 "STORED": lambda self: self._parse_stored(), 1326 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1327 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1328 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1329 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1330 "TO": lambda self: self._parse_to_table(), 1331 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1332 "TRANSFORM": lambda self: self.expression( 1333 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1334 ), 1335 "TTL": lambda self: self._parse_ttl(), 1336 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1337 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1338 "VOLATILE": lambda self: self._parse_volatile_property(), 1339 "WITH": lambda self: self._parse_with_property(), 1340 } 1341 1342 CONSTRAINT_PARSERS: t.ClassVar = { 1343 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1344 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1345 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1346 "CHARACTER SET": lambda self: self.expression( 1347 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1348 ), 1349 "CHECK": lambda self: self._parse_check_constraint(), 1350 "COLLATE": lambda self: self.expression( 1351 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1352 ), 1353 "COMMENT": lambda self: self.expression( 1354 exp.CommentColumnConstraint(this=self._parse_string()) 1355 ), 1356 "COMPRESS": lambda self: self._parse_compress(), 1357 "CLUSTERED": lambda self: self.expression( 1358 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1359 ), 1360 "NONCLUSTERED": lambda self: self.expression( 1361 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1362 ), 1363 "DEFAULT": lambda self: self.expression( 1364 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1365 ), 1366 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1367 "EPHEMERAL": lambda self: self.expression( 1368 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1369 ), 1370 "EXCLUDE": lambda self: self.expression( 1371 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1372 ), 1373 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1374 "FORMAT": lambda self: self.expression( 1375 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1376 ), 1377 "GENERATED": lambda self: self._parse_generated_as_identity(), 1378 "IDENTITY": lambda self: self._parse_auto_increment(), 1379 "INLINE": lambda self: self._parse_inline(), 1380 "LIKE": lambda self: self._parse_create_like(), 1381 "NOT": lambda self: self._parse_not_constraint(), 1382 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1383 "ON": lambda self: ( 1384 ( 1385 self._match(TokenType.UPDATE) 1386 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1387 ) 1388 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1389 ), 1390 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1391 "PERIOD": lambda self: self._parse_period_for_system_time(), 1392 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1393 "REFERENCES": lambda self: self._parse_references(match=False), 1394 "TITLE": lambda self: self.expression( 1395 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1396 ), 1397 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1398 "UNIQUE": lambda self: self._parse_unique(), 1399 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1400 "WITH": lambda self: self.expression( 1401 exp.Properties(expressions=self._parse_wrapped_properties()) 1402 ), 1403 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1404 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1405 } 1406 1407 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1408 if not self._match(TokenType.L_PAREN, advance=False): 1409 # Partitioning by bucket or truncate follows the syntax: 1410 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1411 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1412 self._retreat(self._index - 1) 1413 return None 1414 1415 klass = ( 1416 exp.PartitionedByBucket 1417 if self._prev.text.upper() == "BUCKET" 1418 else exp.PartitionByTruncate 1419 ) 1420 1421 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1422 this, expression = seq_get(args, 0), seq_get(args, 1) 1423 1424 if isinstance(this, exp.Literal): 1425 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1426 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1427 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1428 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1429 # 1430 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1431 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1432 this, expression = expression, this 1433 1434 return self.expression(klass(this=this, expression=expression)) 1435 1436 ALTER_PARSERS: t.ClassVar = { 1437 "ADD": lambda self: self._parse_alter_table_add(), 1438 "AS": lambda self: self._parse_select(), 1439 "ALTER": lambda self: self._parse_alter_table_alter(), 1440 "CLUSTER BY": lambda self: self._parse_cluster_property(), 1441 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1442 "DROP": lambda self: self._parse_alter_table_drop(), 1443 "RENAME": lambda self: self._parse_alter_table_rename(), 1444 "SET": lambda self: self._parse_alter_table_set(), 1445 "SWAP": lambda self: self.expression( 1446 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1447 ), 1448 } 1449 1450 ALTER_ALTER_PARSERS: t.ClassVar = { 1451 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1452 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1453 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1454 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1455 } 1456 1457 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1458 "CHECK", 1459 "EXCLUDE", 1460 "FOREIGN KEY", 1461 "LIKE", 1462 "PERIOD", 1463 "PRIMARY KEY", 1464 "UNIQUE", 1465 "BUCKET", 1466 "TRUNCATE", 1467 } 1468 1469 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1470 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1471 "CASE": lambda self: self._parse_case(), 1472 "CONNECT_BY_ROOT": lambda self: self.expression( 1473 exp.ConnectByRoot(this=self._parse_column()) 1474 ), 1475 "IF": lambda self: self._parse_if(), 1476 } 1477 1478 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1479 TokenType.IDENTIFIER, 1480 TokenType.STRING, 1481 } 1482 1483 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1484 1485 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1486 1487 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1488 **{ 1489 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1490 }, 1491 **{ 1492 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1493 }, 1494 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1495 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1496 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1497 "CHAR": lambda self: self._parse_char(), 1498 "CHR": lambda self: self._parse_char(), 1499 "DECODE": lambda self: self._parse_decode(), 1500 "EXTRACT": lambda self: self._parse_extract(), 1501 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1502 "GAP_FILL": lambda self: self._parse_gap_fill(), 1503 "INITCAP": lambda self: self._parse_initcap(), 1504 "JSON_OBJECT": lambda self: self._parse_json_object(), 1505 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1506 "JSON_TABLE": lambda self: self._parse_json_table(), 1507 "MATCH": lambda self: self._parse_match_against(), 1508 "NORMALIZE": lambda self: self._parse_normalize(), 1509 "OPENJSON": lambda self: self._parse_open_json(), 1510 "OVERLAY": lambda self: self._parse_overlay(), 1511 "POSITION": lambda self: self._parse_position(), 1512 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1513 "STRING_AGG": lambda self: self._parse_string_agg(), 1514 "SUBSTRING": lambda self: self._parse_substring(), 1515 "TRIM": lambda self: self._parse_trim(), 1516 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1517 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1518 "XMLELEMENT": lambda self: self._parse_xml_element(), 1519 "XMLTABLE": lambda self: self._parse_xml_table(), 1520 } 1521 1522 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1523 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1524 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1525 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1526 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1527 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1528 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1529 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1530 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1531 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1532 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1533 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1534 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1535 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1536 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1537 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1538 TokenType.CLUSTER_BY: lambda self: ( 1539 "cluster", 1540 self._parse_cluster(), 1541 ), 1542 TokenType.DISTRIBUTE_BY: lambda self: ( 1543 "distribute", 1544 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1545 ), 1546 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1547 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1548 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1549 } 1550 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1551 1552 SET_PARSERS: t.ClassVar = { 1553 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1554 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1555 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1556 "TRANSACTION": lambda self: self._parse_set_transaction(), 1557 } 1558 1559 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1560 1561 TYPE_LITERAL_PARSERS: t.ClassVar = { 1562 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1563 } 1564 1565 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1566 1567 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1568 1569 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1570 1571 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1572 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1573 "ISOLATION": ( 1574 ("LEVEL", "REPEATABLE", "READ"), 1575 ("LEVEL", "READ", "COMMITTED"), 1576 ("LEVEL", "READ", "UNCOMITTED"), 1577 ("LEVEL", "SERIALIZABLE"), 1578 ), 1579 "READ": ("WRITE", "ONLY"), 1580 } 1581 1582 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1583 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1584 "DO": ("NOTHING", "UPDATE"), 1585 } 1586 1587 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1588 "INSTEAD": (("OF",),), 1589 "BEFORE": tuple(), 1590 "AFTER": tuple(), 1591 } 1592 1593 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1594 "NOT": (("DEFERRABLE",),), 1595 "DEFERRABLE": tuple(), 1596 } 1597 1598 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1599 "SCALE": ("EXTEND", "NOEXTEND"), 1600 "SHARD": ("EXTEND", "NOEXTEND"), 1601 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1602 **dict.fromkeys( 1603 ( 1604 "SESSION", 1605 "GLOBAL", 1606 "KEEP", 1607 "NOKEEP", 1608 "ORDER", 1609 "NOORDER", 1610 "NOCACHE", 1611 "CYCLE", 1612 "NOCYCLE", 1613 "NOMINVALUE", 1614 "NOMAXVALUE", 1615 "NOSCALE", 1616 "NOSHARD", 1617 ), 1618 tuple(), 1619 ), 1620 } 1621 1622 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1623 1624 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1625 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1626 ) 1627 1628 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1629 1630 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1631 "TYPE": ("EVOLUTION",), 1632 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1633 } 1634 1635 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1636 1637 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1638 ("CALLER", "SELF", "OWNER"), tuple() 1639 ) 1640 1641 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1642 "NOT": ("ENFORCED",), 1643 "MATCH": ( 1644 "FULL", 1645 "PARTIAL", 1646 "SIMPLE", 1647 ), 1648 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1649 "USING": ( 1650 "BTREE", 1651 "HASH", 1652 ), 1653 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1654 } 1655 1656 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1657 "NO": ("OTHERS",), 1658 "CURRENT": ("ROW",), 1659 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1660 } 1661 1662 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1663 1664 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1665 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1666 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1667 1668 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1669 1670 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1671 1672 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1673 1674 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1675 1676 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1677 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1678 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1679 1680 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1681 1682 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1683 1684 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1685 TokenType.CONSTRAINT, 1686 TokenType.FOREIGN_KEY, 1687 TokenType.INDEX, 1688 TokenType.KEY, 1689 TokenType.PRIMARY_KEY, 1690 TokenType.UNIQUE, 1691 } 1692 1693 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1694 1695 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1696 1697 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1698 1699 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1700 "FILE_FORMAT", 1701 "COPY_OPTIONS", 1702 "FORMAT_OPTIONS", 1703 "CREDENTIAL", 1704 } 1705 1706 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1707 1708 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1709 1710 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1711 1712 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1713 1714 # The style options for the DESCRIBE statement 1715 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1716 1717 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1718 1719 # The style options for the ANALYZE statement 1720 ANALYZE_STYLES: t.ClassVar = { 1721 "BUFFER_USAGE_LIMIT", 1722 "FULL", 1723 "LOCAL", 1724 "NO_WRITE_TO_BINLOG", 1725 "SAMPLE", 1726 "SKIP_LOCKED", 1727 "VERBOSE", 1728 } 1729 1730 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1731 "ALL": lambda self: self._parse_analyze_columns(), 1732 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1733 "DELETE": lambda self: self._parse_analyze_delete(), 1734 "DROP": lambda self: self._parse_analyze_histogram(), 1735 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1736 "LIST": lambda self: self._parse_analyze_list(), 1737 "PREDICATE": lambda self: self._parse_analyze_columns(), 1738 "UPDATE": lambda self: self._parse_analyze_histogram(), 1739 "VALIDATE": lambda self: self._parse_analyze_validate(), 1740 } 1741 1742 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1743 1744 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1745 1746 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1747 1748 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1749 1750 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1751 1752 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1753 1754 STRICT_CAST: t.ClassVar = True 1755 1756 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1757 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1758 # Controls when an aggregation's name is included in a pivoted column's name: 1759 # "agg_name_if_aliased" - only for aggregations that carry an explicit alias 1760 # "agg_name_if_aliased_or_multiple" - if aliased, or whenever there are multiple aggregations 1761 # "agg_name_if_multiple" - only when there are multiple aggregations (a lone agg is value-only) 1762 PIVOT_COLUMN_NAMING: t.ClassVar[str] = "agg_name_if_aliased" 1763 1764 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1765 1766 # Whether the table sample clause expects CSV syntax 1767 TABLESAMPLE_CSV: t.ClassVar = False 1768 1769 # The default method used for table sampling 1770 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1771 1772 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1773 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1774 1775 # Whether the TRIM function expects the characters to trim as its first argument 1776 TRIM_PATTERN_FIRST: t.ClassVar = False 1777 1778 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1779 STRING_ALIASES: t.ClassVar = False 1780 1781 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1782 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1783 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1784 1785 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1786 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1787 1788 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1789 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1790 1791 # Whether the `:` operator is used to extract a value from a VARIANT column 1792 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1793 1794 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1795 # If this is True and '(' is not found, the keyword will be treated as an identifier 1796 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1797 1798 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1799 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1800 1801 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1802 INTERVAL_SPANS: t.ClassVar = True 1803 1804 # Whether a PARTITION clause can follow a table reference 1805 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1806 1807 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1808 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1809 1810 # Whether the 'AS' keyword is optional in the CTE definition syntax 1811 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1812 1813 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1814 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1815 1816 # Whether Alter statements are allowed to contain Partition specifications 1817 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1818 1819 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1820 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1821 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1822 # as BigQuery, where all joins have the same precedence. 1823 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1824 1825 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1826 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1827 1828 # Whether map literals support arbitrary expressions as keys. 1829 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1830 # When False, keys are typically restricted to identifiers. 1831 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1832 1833 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1834 # is true for Snowflake but not for BigQuery which can also process strings 1835 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1836 1837 # Dialects like Databricks support JOINS without join criteria 1838 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1839 ADD_JOIN_ON_TRUE: t.ClassVar = False 1840 1841 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1842 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1843 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1844 1845 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1846 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1847 1848 def __init__( 1849 self, 1850 error_level: ErrorLevel | None = None, 1851 error_message_context: int = 100, 1852 max_errors: int = 3, 1853 max_nodes: int = -1, 1854 dialect: DialectType = None, 1855 ): 1856 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1857 self.error_message_context: int = error_message_context 1858 self.max_errors: int = max_errors 1859 self.max_nodes: int = max_nodes 1860 self.dialect: t.Any = _resolve_dialect(dialect) 1861 self.sql: str = "" 1862 self.errors: list[ParseError] = [] 1863 self._tokens: list[Token] = [] 1864 self._tokens_size: i64 = 0 1865 self._index: i64 = 0 1866 self._curr: Token = SENTINEL_NONE 1867 self._next: Token = SENTINEL_NONE 1868 self._prev: Token = SENTINEL_NONE 1869 self._prev_comments: list[str] = [] 1870 self._pipe_cte_counter: int = 0 1871 self._chunks: list[list[Token]] = [] 1872 self._chunk_index: i64 = 0 1873 self._node_count: int = 0 1874 1875 def reset(self) -> None: 1876 self.sql = "" 1877 self.errors = [] 1878 self._tokens = [] 1879 self._tokens_size = 0 1880 self._index = 0 1881 self._curr = SENTINEL_NONE 1882 self._next = SENTINEL_NONE 1883 self._prev = SENTINEL_NONE 1884 self._prev_comments = [] 1885 self._pipe_cte_counter = 0 1886 self._chunks = [] 1887 self._chunk_index = 0 1888 self._node_count = 0 1889 1890 def _advance(self, times: i64 = 1) -> None: 1891 index = self._index + times 1892 self._index = index 1893 tokens = self._tokens 1894 size = self._tokens_size 1895 self._curr = tokens[index] if index < size else SENTINEL_NONE 1896 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1897 1898 if index > 0: 1899 prev = tokens[index - 1] 1900 self._prev = prev 1901 self._prev_comments = prev.comments 1902 else: 1903 self._prev = SENTINEL_NONE 1904 self._prev_comments = [] 1905 1906 def _advance_chunk(self) -> None: 1907 self._index = -1 1908 self._tokens = self._chunks[self._chunk_index] 1909 self._tokens_size = i64(len(self._tokens)) 1910 self._chunk_index += 1 1911 self._advance() 1912 1913 def _retreat(self, index: i64) -> None: 1914 if index != self._index: 1915 self._advance(index - self._index) 1916 1917 def _add_comments(self, expression: exp.Expr | None) -> None: 1918 if expression and self._prev_comments: 1919 expression.add_comments(self._prev_comments) 1920 self._prev_comments = [] 1921 1922 def _match( 1923 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1924 ) -> bool: 1925 if self._curr.token_type == token_type: 1926 if advance: 1927 self._advance() 1928 self._add_comments(expression) 1929 return True 1930 return False 1931 1932 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1933 if self._curr.token_type in types: 1934 if advance: 1935 self._advance() 1936 return True 1937 return False 1938 1939 def _match_pair( 1940 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1941 ) -> bool: 1942 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1943 if advance: 1944 self._advance(2) 1945 return True 1946 return False 1947 1948 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1949 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1950 if advance: 1951 self._advance() 1952 return True 1953 return False 1954 1955 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1956 index = self._index 1957 string_type = TokenType.STRING 1958 for text in texts: 1959 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1960 self._advance() 1961 else: 1962 self._retreat(index) 1963 return False 1964 1965 if not advance: 1966 self._retreat(index) 1967 1968 return True 1969 1970 def _is_connected(self) -> bool: 1971 prev = self._prev 1972 curr = self._curr 1973 return bool(prev and curr and prev.end + 1 == curr.start) 1974 1975 def _find_sql(self, start: Token, end: Token) -> str: 1976 return self.sql[start.start : end.end + 1] 1977 1978 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1979 token = token or self._curr or self._prev or Token.string("") 1980 formatted_sql, start_context, highlight, end_context = highlight_sql( 1981 sql=self.sql, 1982 positions=[(token.start, token.end)], 1983 context_length=self.error_message_context, 1984 ) 1985 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1986 1987 error = ParseError.new( 1988 formatted_message, 1989 description=message, 1990 line=token.line, 1991 col=token.col, 1992 start_context=start_context, 1993 highlight=highlight, 1994 end_context=end_context, 1995 ) 1996 1997 if self.error_level == ErrorLevel.IMMEDIATE: 1998 raise error 1999 2000 self.errors.append(error) 2001 2002 def validate_expression(self, expression: E, args: list | None = None) -> E: 2003 if self.max_nodes > -1: 2004 self._node_count += 1 2005 if self._node_count > self.max_nodes: 2006 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2007 if self.error_level != ErrorLevel.IGNORE: 2008 for error_message in expression.error_messages(args): 2009 self.raise_error(error_message) 2010 return expression 2011 2012 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 2013 index = self._index 2014 error_level = self.error_level 2015 this: T | None = None 2016 2017 self.error_level = ErrorLevel.IMMEDIATE 2018 try: 2019 this = parse_method() 2020 except ParseError: 2021 this = None 2022 finally: 2023 if not this or retreat: 2024 self._retreat(index) 2025 self.error_level = error_level 2026 2027 return this 2028 2029 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2030 """ 2031 Parses a list of tokens and returns a list of syntax trees, one tree 2032 per parsed SQL statement. 2033 2034 Args: 2035 raw_tokens: The list of tokens. 2036 sql: The original SQL string. 2037 2038 Returns: 2039 The list of the produced syntax trees. 2040 """ 2041 return self._parse( 2042 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2043 ) 2044 2045 def parse_into( 2046 self, 2047 expression_types: exp.IntoType, 2048 raw_tokens: list[Token], 2049 sql: str | None = None, 2050 ) -> list[exp.Expr | None]: 2051 """ 2052 Parses a list of tokens into a given Expr type. If a collection of Expr 2053 types is given instead, this method will try to parse the token list into each one 2054 of them, stopping at the first for which the parsing succeeds. 2055 2056 Args: 2057 expression_types: The expression type(s) to try and parse the token list into. 2058 raw_tokens: The list of tokens. 2059 sql: The original SQL string, used to produce helpful debug messages. 2060 2061 Returns: 2062 The target Expr. 2063 """ 2064 errors = [] 2065 for expression_type in ensure_list(expression_types): 2066 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2067 if not parser: 2068 raise TypeError(f"No parser registered for {expression_type}") 2069 2070 try: 2071 return self._parse(parser, raw_tokens, sql) 2072 except ParseError as e: 2073 e.errors[0]["into_expression"] = expression_type 2074 errors.append(e) 2075 2076 raise ParseError( 2077 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2078 errors=merge_errors(errors), 2079 ) from errors[-1] 2080 2081 def check_errors(self) -> None: 2082 """Logs or raises any found errors, depending on the chosen error level setting.""" 2083 if self.error_level == ErrorLevel.WARN: 2084 for error in self.errors: 2085 logger.error(str(error)) 2086 elif self.error_level == ErrorLevel.RAISE and self.errors: 2087 raise ParseError( 2088 concat_messages(self.errors, self.max_errors), 2089 errors=merge_errors(self.errors), 2090 ) 2091 2092 def expression( 2093 self, 2094 instance: E, 2095 token: Token | None = None, 2096 comments: list[str] | None = None, 2097 ) -> E: 2098 if token: 2099 instance.update_positions(token) 2100 instance.add_comments(comments) if comments else self._add_comments(instance) 2101 if not instance.is_primitive: 2102 instance = self.validate_expression(instance) 2103 return instance 2104 2105 def _parse_batch_statements( 2106 self, 2107 parse_method: t.Callable[[Parser], exp.Expr | None], 2108 sep_first_statement: bool = True, 2109 ) -> list[exp.Expr | None]: 2110 expressions = [] 2111 2112 # Chunkification binds if/while statements with the first statement of the body 2113 if sep_first_statement: 2114 self._match(TokenType.BEGIN) 2115 expressions.append(parse_method(self)) 2116 2117 chunks_length = len(self._chunks) 2118 while self._chunk_index < chunks_length: 2119 self._advance_chunk() 2120 2121 if self._match(TokenType.ELSE, advance=False): 2122 return expressions 2123 2124 if expressions and not self._next and self._match(TokenType.END): 2125 expressions.append(exp.EndStatement()) 2126 continue 2127 2128 expressions.append(parse_method(self)) 2129 2130 if self._index < self._tokens_size: 2131 self.raise_error("Invalid expression / Unexpected token") 2132 2133 self.check_errors() 2134 2135 return expressions 2136 2137 def _parse( 2138 self, 2139 parse_method: t.Callable[[Parser], exp.Expr | None], 2140 raw_tokens: list[Token], 2141 sql: str | None = None, 2142 ) -> list[exp.Expr | None]: 2143 self.reset() 2144 self.sql = sql or "" 2145 2146 total = len(raw_tokens) 2147 chunks: list[list[Token]] = [[]] 2148 2149 for i, token in enumerate(raw_tokens): 2150 if token.token_type == TokenType.SEMICOLON: 2151 if token.comments: 2152 chunks.append([token]) 2153 2154 if i < total - 1: 2155 chunks.append([]) 2156 else: 2157 chunks[-1].append(token) 2158 2159 self._chunks = chunks 2160 2161 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2162 2163 def _warn_unsupported(self) -> None: 2164 if self._tokens_size <= 1: 2165 return 2166 2167 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2168 # interested in emitting a warning for the one being currently processed. 2169 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2170 2171 logger.warning( 2172 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2173 ) 2174 2175 def _parse_command(self) -> exp.Command: 2176 self._warn_unsupported() 2177 comments = self._prev_comments 2178 return self.expression( 2179 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2180 comments=comments, 2181 ) 2182 2183 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2184 start = self._prev 2185 exists = self._parse_exists() if allow_exists else None 2186 2187 self._match(TokenType.ON) 2188 2189 materialized = self._match_text_seq("MATERIALIZED") 2190 kind = self._match_set(self.CREATABLES) and self._prev 2191 if not kind: 2192 return self._parse_as_command(start) 2193 2194 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2195 this = self._parse_user_defined_function(kind=kind.token_type) 2196 elif kind.token_type == TokenType.TABLE: 2197 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2198 elif kind.token_type == TokenType.COLUMN: 2199 this = self._parse_column() 2200 else: 2201 this = self._parse_table_parts(schema=True) 2202 2203 self._match(TokenType.IS) 2204 2205 return self.expression( 2206 exp.Comment( 2207 this=this, 2208 kind=kind.text, 2209 expression=self._parse_string(), 2210 exists=exists, 2211 materialized=materialized, 2212 ) 2213 ) 2214 2215 def _parse_to_table( 2216 self, 2217 ) -> exp.ToTableProperty: 2218 table = self._parse_table_parts(schema=True) 2219 return self.expression(exp.ToTableProperty(this=table)) 2220 2221 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2222 def _parse_ttl(self) -> exp.Expr: 2223 def _parse_ttl_action() -> exp.Expr | None: 2224 this = self._parse_bitwise() 2225 2226 if self._match_text_seq("DELETE"): 2227 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2228 if self._match_text_seq("RECOMPRESS"): 2229 return self.expression( 2230 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2231 ) 2232 if self._match_text_seq("TO", "DISK"): 2233 return self.expression( 2234 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2235 ) 2236 if self._match_text_seq("TO", "VOLUME"): 2237 return self.expression( 2238 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2239 ) 2240 2241 return this 2242 2243 expressions = self._parse_csv(_parse_ttl_action) 2244 where = self._parse_where() 2245 group = self._parse_group() 2246 2247 aggregates = None 2248 if group and self._match(TokenType.SET): 2249 aggregates = self._parse_csv(self._parse_set_item) 2250 2251 return self.expression( 2252 exp.MergeTreeTTL( 2253 expressions=expressions, where=where, group=group, aggregates=aggregates 2254 ) 2255 ) 2256 2257 def _parse_condition(self) -> exp.Expr | None: 2258 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2259 2260 def _parse_block(self) -> exp.Block: 2261 return self.expression( 2262 exp.Block( 2263 expressions=self._parse_batch_statements( 2264 parse_method=lambda self: self._parse_statement() 2265 ) 2266 ) 2267 ) 2268 2269 def _parse_whileblock(self) -> exp.WhileBlock: 2270 return self.expression( 2271 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2272 ) 2273 2274 def _parse_statement(self) -> exp.Expr | None: 2275 if not self._curr: 2276 return None 2277 2278 if self._match_set(self.STATEMENT_PARSERS): 2279 comments = self._prev_comments 2280 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2281 stmt.add_comments(comments, prepend=True) 2282 return stmt 2283 2284 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2285 return self._parse_command() 2286 2287 if self._match_text_seq("WHILE"): 2288 return self._parse_whileblock() 2289 2290 expression = self._parse_expression() 2291 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2292 2293 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2294 expression = self._parse_pipe_syntax_query(expression) 2295 2296 return self._parse_query_modifiers(expression) 2297 2298 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2299 start = self._prev 2300 temporary = self._match(TokenType.TEMPORARY) 2301 materialized = self._match_text_seq("MATERIALIZED") 2302 iceberg = self._match_text_seq("ICEBERG") 2303 2304 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2305 if not kind or (iceberg and kind and kind != "TABLE"): 2306 return self._parse_as_command(start) 2307 2308 concurrently = self._match_text_seq("CONCURRENTLY") 2309 if_exists = exists or self._parse_exists() 2310 2311 if kind == "COLUMN": 2312 this = self._parse_column() 2313 else: 2314 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2315 2316 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2317 2318 if self._match(TokenType.L_PAREN, advance=False): 2319 expressions = self._parse_wrapped_csv(self._parse_types) 2320 else: 2321 expressions = None 2322 2323 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2324 2325 return self.expression( 2326 exp.Drop( 2327 exists=if_exists, 2328 this=this, 2329 expressions=expressions, 2330 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2331 temporary=temporary, 2332 materialized=materialized, 2333 cascade=cascade_or_restrict == "CASCADE", 2334 restrict=cascade_or_restrict == "RESTRICT", 2335 constraints=self._match_text_seq("CONSTRAINTS"), 2336 purge=self._match_text_seq("PURGE"), 2337 cluster=cluster, 2338 concurrently=concurrently, 2339 sync=self._match_text_seq("SYNC"), 2340 iceberg=iceberg, 2341 ) 2342 ) 2343 2344 def _parse_exists(self, not_: bool = False) -> bool | None: 2345 return ( 2346 self._match_text_seq("IF") 2347 and (not not_ or self._match(TokenType.NOT)) 2348 and self._match(TokenType.EXISTS) 2349 ) 2350 2351 def _parse_create(self) -> exp.Create | exp.Command: 2352 # Note: this can't be None because we've matched a statement parser 2353 start = self._prev 2354 2355 replace = ( 2356 start.token_type == TokenType.REPLACE 2357 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2358 or self._match_pair(TokenType.OR, TokenType.ALTER) 2359 ) 2360 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2361 2362 unique = self._match(TokenType.UNIQUE) 2363 2364 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2365 clustered = True 2366 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2367 "COLUMNSTORE" 2368 ): 2369 clustered = False 2370 else: 2371 clustered = None 2372 2373 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2374 self._advance() 2375 2376 properties = None 2377 create_token = self._match_set(self.CREATABLES) and self._prev 2378 2379 if not create_token: 2380 # exp.Properties.Location.POST_CREATE 2381 properties = self._parse_properties() 2382 create_token = self._match_set(self.CREATABLES) and self._prev 2383 2384 if not properties or not create_token: 2385 return self._parse_as_command(start) 2386 2387 create_token_type = t.cast(Token, create_token).token_type 2388 2389 concurrently = self._match_text_seq("CONCURRENTLY") 2390 exists = self._parse_exists(not_=True) 2391 this = None 2392 expression: exp.Expr | None = None 2393 indexes = None 2394 no_schema_binding = None 2395 begin = None 2396 clone = None 2397 2398 def extend_props(temp_props: exp.Properties | None) -> None: 2399 nonlocal properties 2400 if properties and temp_props: 2401 properties.expressions.extend(temp_props.expressions) 2402 elif temp_props: 2403 properties = temp_props 2404 2405 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2406 this = self._parse_user_defined_function(kind=create_token_type) 2407 2408 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2409 extend_props(self._parse_properties()) 2410 2411 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2412 2413 if ( 2414 not expression 2415 and create_token_type == TokenType.FUNCTION 2416 and isinstance(this, exp.UserDefinedFunction) 2417 and this.args.get("wrapped") 2418 ): 2419 pre_table_index = self._index 2420 is_table = self._match(TokenType.TABLE) 2421 2422 expression = self._parse_expression() 2423 overload_mode = bool( 2424 expression 2425 and self._curr.token_type == TokenType.COMMA 2426 and self._next.token_type == TokenType.L_PAREN 2427 ) 2428 if not overload_mode: 2429 self._retreat(pre_table_index) 2430 is_table = False 2431 expression = None 2432 else: 2433 is_table = False 2434 overload_mode = False 2435 2436 extend_props(self._parse_function_properties()) 2437 2438 if not expression: 2439 if self._match(TokenType.COMMAND): 2440 expression = self._parse_as_command(self._prev) 2441 else: 2442 begin = self._match(TokenType.BEGIN) 2443 return_ = self._match_text_seq("RETURN") 2444 2445 if self._match(TokenType.STRING, advance=False): 2446 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2447 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2448 expression = self._parse_string() 2449 extend_props(self._parse_properties()) 2450 else: 2451 expression = ( 2452 self._parse_user_defined_function_expression() 2453 if create_token_type == TokenType.FUNCTION 2454 else self._parse_block() 2455 ) 2456 2457 if return_: 2458 expression = self.expression(exp.Return(this=expression)) 2459 2460 if overload_mode and expression: 2461 expression = self._parse_macro_overloads( 2462 t.cast(exp.UserDefinedFunction, this), expression, is_table 2463 ) 2464 elif create_token_type == TokenType.INDEX: 2465 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2466 if not self._match(TokenType.ON): 2467 index = self._parse_id_var() 2468 anonymous = False 2469 else: 2470 index = None 2471 anonymous = True 2472 2473 this = self._parse_index(index=index, anonymous=anonymous) 2474 elif ( 2475 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2476 ) or create_token_type == TokenType.TRIGGER: 2477 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2478 create_token = self._prev 2479 2480 trigger_name = self._parse_id_var() 2481 if not trigger_name: 2482 return self._parse_as_command(start) 2483 2484 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2485 timing = timing_var.this if timing_var else None 2486 if not timing: 2487 return self._parse_as_command(start) 2488 2489 events = self._parse_trigger_events() 2490 if not self._match(TokenType.ON): 2491 self.raise_error("Expected ON in trigger definition") 2492 2493 table = self._parse_table_parts() 2494 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2495 deferrable, initially = self._parse_trigger_deferrable() 2496 referencing = self._parse_trigger_referencing() 2497 for_each = self._parse_trigger_for_each() 2498 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2499 self._parse_disjunction, optional=True 2500 ) 2501 execute = self._parse_trigger_execute() 2502 2503 if execute is None: 2504 return self._parse_as_command(start) 2505 2506 trigger_props = self.expression( 2507 exp.TriggerProperties( 2508 table=table, 2509 timing=timing, 2510 events=events, 2511 execute=execute, 2512 constraint=is_constraint, 2513 referenced_table=referenced_table, 2514 deferrable=deferrable, 2515 initially=initially, 2516 referencing=referencing, 2517 for_each=for_each, 2518 when=when, 2519 ) 2520 ) 2521 2522 this = trigger_name 2523 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2524 elif create_token_type == TokenType.TYPE: 2525 this = self._parse_table_parts(schema=True) 2526 if not this or not self._match(TokenType.ALIAS): 2527 return self._parse_as_command(start) 2528 2529 if self._match(TokenType.ENUM): 2530 expression = exp.DataType( 2531 this=exp.DType.ENUM, 2532 expressions=self._parse_wrapped_csv(self._parse_string), 2533 ) 2534 elif self._match(TokenType.L_PAREN, advance=False): 2535 expression = self._parse_schema() 2536 else: 2537 return self._parse_as_command(start) 2538 elif create_token_type in self.DB_CREATABLES: 2539 table_parts = self._parse_table_parts( 2540 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2541 ) 2542 2543 # exp.Properties.Location.POST_NAME 2544 self._match(TokenType.COMMA) 2545 extend_props(self._parse_properties(before=True)) 2546 2547 this = self._parse_schema(this=table_parts) 2548 2549 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2550 extend_props(self._parse_properties()) 2551 2552 has_alias = self._match(TokenType.ALIAS) 2553 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2554 # exp.Properties.Location.POST_ALIAS 2555 extend_props(self._parse_properties()) 2556 2557 if create_token_type == TokenType.SEQUENCE: 2558 expression = self._parse_types() 2559 props = self._parse_properties() 2560 if props: 2561 sequence_props = exp.SequenceProperties() 2562 options = [] 2563 for prop in props: 2564 if isinstance(prop, exp.SequenceProperties): 2565 for arg, value in prop.args.items(): 2566 if arg == "options": 2567 options.extend(value) 2568 else: 2569 sequence_props.set(arg, value) 2570 prop.pop() 2571 2572 if options: 2573 sequence_props.set("options", options) 2574 2575 props.append("expressions", sequence_props) 2576 extend_props(props) 2577 else: 2578 expression = self._parse_ddl_select() 2579 2580 # Some dialects also support using a table as an alias instead of a SELECT. 2581 # Here we fallback to this as an alternative. 2582 if not expression and has_alias: 2583 expression = self._try_parse(self._parse_table_parts) 2584 2585 if create_token_type == TokenType.TABLE: 2586 # exp.Properties.Location.POST_EXPRESSION 2587 extend_props(self._parse_properties()) 2588 2589 indexes = [] 2590 while True: 2591 index = self._parse_index() 2592 2593 # exp.Properties.Location.POST_INDEX 2594 extend_props(self._parse_properties()) 2595 if not index: 2596 break 2597 else: 2598 self._match(TokenType.COMMA) 2599 indexes.append(index) 2600 elif create_token_type == TokenType.VIEW: 2601 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2602 no_schema_binding = True 2603 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2604 extend_props(self._parse_properties()) 2605 2606 shallow = self._match_text_seq("SHALLOW") 2607 2608 if self._match_texts(self.CLONE_KEYWORDS): 2609 copy = self._prev.text.lower() == "copy" 2610 clone = self.expression( 2611 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2612 ) 2613 2614 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2615 return self._parse_as_command(start) 2616 2617 create_kind_text = create_token.text.upper() 2618 return self.expression( 2619 exp.Create( 2620 this=this, 2621 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2622 replace=replace, 2623 refresh=refresh, 2624 unique=unique, 2625 expression=expression, 2626 exists=exists, 2627 properties=properties, 2628 indexes=indexes, 2629 no_schema_binding=no_schema_binding, 2630 begin=begin, 2631 clone=clone, 2632 concurrently=concurrently, 2633 clustered=clustered, 2634 ) 2635 ) 2636 2637 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2638 seq = exp.SequenceProperties() 2639 2640 options = [] 2641 index = self._index 2642 2643 while self._curr: 2644 self._match(TokenType.COMMA) 2645 if self._match_text_seq("INCREMENT"): 2646 self._match_text_seq("BY") 2647 self._match_text_seq("=") 2648 seq.set("increment", self._parse_term()) 2649 elif self._match_text_seq("MINVALUE"): 2650 seq.set("minvalue", self._parse_term()) 2651 elif self._match_text_seq("MAXVALUE"): 2652 seq.set("maxvalue", self._parse_term()) 2653 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2654 self._match_text_seq("=") 2655 seq.set("start", self._parse_term()) 2656 elif self._match_text_seq("CACHE"): 2657 # T-SQL allows empty CACHE which is initialized dynamically 2658 seq.set("cache", self._parse_number() or True) 2659 elif self._match_text_seq("OWNED", "BY"): 2660 # "OWNED BY NONE" is the default 2661 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2662 else: 2663 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2664 if opt: 2665 options.append(opt) 2666 else: 2667 break 2668 2669 seq.set("options", options if options else None) 2670 return None if self._index == index else seq 2671 2672 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2673 events = [] 2674 2675 while True: 2676 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2677 2678 if not event_type: 2679 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2680 2681 columns = ( 2682 self._parse_csv(self._parse_column) 2683 if event_type == "UPDATE" and self._match_text_seq("OF") 2684 else None 2685 ) 2686 2687 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2688 2689 if not self._match(TokenType.OR): 2690 break 2691 2692 return events 2693 2694 def _parse_trigger_deferrable( 2695 self, 2696 ) -> tuple[str | None, str | None]: 2697 deferrable_var = self._parse_var_from_options( 2698 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2699 ) 2700 deferrable = deferrable_var.this if deferrable_var else None 2701 2702 initially = None 2703 if deferrable and self._match_text_seq("INITIALLY"): 2704 initially = ( 2705 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2706 ) 2707 2708 return deferrable, initially 2709 2710 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2711 if not self._match_text_seq(keyword): 2712 return None 2713 if not self._match_text_seq("TABLE"): 2714 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2715 self._match_text_seq("AS") 2716 return self._parse_id_var() 2717 2718 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2719 if not self._match_text_seq("REFERENCING"): 2720 return None 2721 2722 old_alias = None 2723 new_alias = None 2724 2725 while True: 2726 if alias := self._parse_trigger_referencing_clause("OLD"): 2727 if old_alias is not None: 2728 self.raise_error("Duplicate OLD clause in REFERENCING") 2729 old_alias = alias 2730 elif alias := self._parse_trigger_referencing_clause("NEW"): 2731 if new_alias is not None: 2732 self.raise_error("Duplicate NEW clause in REFERENCING") 2733 new_alias = alias 2734 else: 2735 break 2736 2737 if old_alias is None and new_alias is None: 2738 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2739 2740 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2741 2742 def _parse_trigger_for_each(self) -> str | None: 2743 if not self._match_text_seq("FOR", "EACH"): 2744 return None 2745 2746 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2747 2748 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2749 if not self._match(TokenType.EXECUTE): 2750 return None 2751 2752 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2753 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2754 2755 func_call = self._parse_column() 2756 return self.expression(exp.TriggerExecute(this=func_call)) 2757 2758 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2759 # only used for teradata currently 2760 self._match(TokenType.COMMA) 2761 2762 kwargs = { 2763 "no": self._match_text_seq("NO"), 2764 "dual": self._match_text_seq("DUAL"), 2765 "before": self._match_text_seq("BEFORE"), 2766 "default": self._match_text_seq("DEFAULT"), 2767 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2768 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2769 "after": self._match_text_seq("AFTER"), 2770 "minimum": self._match_texts(("MIN", "MINIMUM")), 2771 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2772 } 2773 2774 if self._match_texts(self.PROPERTY_PARSERS): 2775 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2776 try: 2777 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2778 except TypeError: 2779 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2780 2781 return None 2782 2783 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2784 return self._parse_wrapped_csv(self._parse_property) 2785 2786 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2787 if self._match_texts(self.PROPERTY_PARSERS): 2788 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2789 2790 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2791 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2792 2793 if self._match_text_seq("COMPOUND", "SORTKEY"): 2794 return self._parse_sortkey(compound=True) 2795 2796 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2797 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2798 2799 index = self._index 2800 2801 seq_props = self._parse_sequence_properties() 2802 if seq_props: 2803 return seq_props 2804 2805 self._retreat(index) 2806 key = self._parse_column() 2807 2808 if not self._match(TokenType.EQ): 2809 self._retreat(index) 2810 return None 2811 2812 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2813 if isinstance(key, exp.Column): 2814 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2815 2816 value = self._parse_bitwise() or self._parse_var(any_token=True) 2817 2818 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2819 if isinstance(value, exp.Column): 2820 value = exp.var(value.name) 2821 2822 return self.expression(exp.Property(this=key, value=value)) 2823 2824 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2825 if self._match_text_seq("BY"): 2826 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2827 2828 self._match(TokenType.ALIAS) 2829 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2830 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2831 2832 return self.expression( 2833 exp.FileFormatProperty( 2834 this=( 2835 self.expression( 2836 exp.InputOutputFormat( 2837 input_format=input_format, output_format=output_format 2838 ) 2839 ) 2840 if input_format or output_format 2841 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2842 ), 2843 hive_format=True, 2844 ) 2845 ) 2846 2847 def _parse_unquoted_field(self) -> exp.Expr | None: 2848 field = self._parse_field() 2849 if isinstance(field, exp.Identifier) and not field.quoted: 2850 field = exp.var(field) 2851 2852 return field 2853 2854 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2855 self._match(TokenType.EQ) 2856 self._match(TokenType.ALIAS) 2857 2858 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2859 2860 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2861 properties = [] 2862 while True: 2863 if before: 2864 prop = self._parse_property_before() 2865 else: 2866 prop = self._parse_property() 2867 if not prop: 2868 break 2869 for p in ensure_list(prop): 2870 properties.append(p) 2871 2872 if properties: 2873 return self.expression(exp.Properties(expressions=properties)) 2874 2875 return None 2876 2877 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2878 return self.expression( 2879 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2880 ) 2881 2882 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2883 return self.expression( 2884 exp.SqlSecurityProperty( 2885 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2886 ) 2887 ) 2888 2889 def _parse_settings_property(self) -> exp.SettingsProperty: 2890 return self.expression( 2891 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2892 ) 2893 2894 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2895 if self._index >= 2: 2896 pre_volatile_token = self._tokens[self._index - 2] 2897 else: 2898 pre_volatile_token = None 2899 2900 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2901 return exp.VolatileProperty() 2902 2903 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2904 2905 def _parse_retention_period(self) -> exp.Var: 2906 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2907 number = self._parse_number() 2908 number_str = f"{number} " if number else "" 2909 unit = self._parse_var(any_token=True) 2910 return exp.var(f"{number_str}{unit}") 2911 2912 def _parse_system_versioning_property( 2913 self, with_: bool = False 2914 ) -> exp.WithSystemVersioningProperty: 2915 self._match(TokenType.EQ) 2916 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2917 2918 if self._match_text_seq("OFF"): 2919 prop.set("on", False) 2920 return prop 2921 2922 self._match(TokenType.ON) 2923 if self._match(TokenType.L_PAREN): 2924 while self._curr and not self._match(TokenType.R_PAREN): 2925 if self._match_text_seq("HISTORY_TABLE", "="): 2926 prop.set("this", self._parse_table_parts()) 2927 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2928 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2929 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2930 prop.set("retention_period", self._parse_retention_period()) 2931 2932 self._match(TokenType.COMMA) 2933 2934 return prop 2935 2936 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2937 self._match(TokenType.EQ) 2938 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2939 prop = self.expression(exp.DataDeletionProperty(on=on)) 2940 2941 if self._match(TokenType.L_PAREN): 2942 while self._curr and not self._match(TokenType.R_PAREN): 2943 if self._match_text_seq("FILTER_COLUMN", "="): 2944 prop.set("filter_column", self._parse_column()) 2945 elif self._match_text_seq("RETENTION_PERIOD", "="): 2946 prop.set("retention_period", self._parse_retention_period()) 2947 2948 self._match(TokenType.COMMA) 2949 2950 return prop 2951 2952 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2953 kind = "HASH" 2954 expressions: list[exp.Expr] | None = None 2955 if self._match_text_seq("BY", "HASH"): 2956 expressions = self._parse_wrapped_csv(self._parse_id_var) 2957 elif self._match_text_seq("BY", "RANDOM"): 2958 kind = "RANDOM" 2959 2960 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2961 buckets: exp.Expr | None = None 2962 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2963 buckets = self._parse_number() 2964 2965 return self.expression( 2966 exp.DistributedByProperty( 2967 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2968 ) 2969 ) 2970 2971 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2972 self._match_text_seq("KEY") 2973 expressions = self._parse_wrapped_id_vars() 2974 return self.expression(expr_type(expressions=expressions)) 2975 2976 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2977 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2978 prop = self._parse_system_versioning_property(with_=True) 2979 self._match_r_paren() 2980 return prop 2981 2982 if self._match(TokenType.L_PAREN, advance=False): 2983 result: list[exp.Expr] = [] 2984 for i in self._parse_wrapped_properties(): 2985 result.extend(i) if isinstance(i, list) else result.append(i) 2986 return result 2987 2988 if self._match_text_seq("JOURNAL"): 2989 return self._parse_withjournaltable() 2990 2991 if self._match_texts(self.VIEW_ATTRIBUTES): 2992 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2993 2994 if self._match_text_seq("DATA"): 2995 return self._parse_withdata(no=False) 2996 elif self._match_text_seq("NO", "DATA"): 2997 return self._parse_withdata(no=True) 2998 2999 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 3000 return self._parse_serde_properties(with_=True) 3001 3002 if self._match(TokenType.SCHEMA): 3003 return self.expression( 3004 exp.WithSchemaBindingProperty( 3005 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 3006 ) 3007 ) 3008 3009 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 3010 return self.expression( 3011 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 3012 ) 3013 3014 if not self._next: 3015 return None 3016 3017 return self._parse_withisolatedloading() 3018 3019 def _parse_procedure_option(self) -> exp.Expr | None: 3020 if self._match_text_seq("EXECUTE", "AS"): 3021 return self.expression( 3022 exp.ExecuteAsProperty( 3023 this=self._parse_var_from_options( 3024 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 3025 ) 3026 or self._parse_string() 3027 ) 3028 ) 3029 3030 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 3031 3032 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 3033 def _parse_definer(self) -> exp.DefinerProperty | None: 3034 self._match(TokenType.EQ) 3035 3036 user = self._parse_id_var() 3037 self._match(TokenType.PARAMETER) 3038 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 3039 3040 if not user or not host: 3041 return None 3042 3043 return exp.DefinerProperty(this=f"{user}@{host}") 3044 3045 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 3046 self._match(TokenType.TABLE) 3047 self._match(TokenType.EQ) 3048 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 3049 3050 def _parse_log(self, no: bool = False) -> exp.LogProperty: 3051 return self.expression(exp.LogProperty(no=no)) 3052 3053 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 3054 return self.expression(exp.JournalProperty(**kwargs)) 3055 3056 def _parse_checksum(self) -> exp.ChecksumProperty: 3057 self._match(TokenType.EQ) 3058 3059 on = None 3060 if self._match(TokenType.ON): 3061 on = True 3062 elif self._match_text_seq("OFF"): 3063 on = False 3064 3065 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3066 3067 def _parse_cluster(self) -> exp.Cluster: 3068 self._match(TokenType.CLUSTER_BY) 3069 return self.expression( 3070 exp.Cluster( 3071 expressions=self._parse_csv(self._parse_column), 3072 ) 3073 ) 3074 3075 def _parse_cluster_property(self) -> exp.ClusterProperty: 3076 return self.expression( 3077 exp.ClusterProperty( 3078 expressions=self._parse_wrapped_csv(self._parse_column), 3079 ) 3080 ) 3081 3082 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3083 self._match_text_seq("BY") 3084 3085 self._match_l_paren() 3086 expressions = self._parse_csv(self._parse_column) 3087 self._match_r_paren() 3088 3089 if self._match_text_seq("SORTED", "BY"): 3090 self._match_l_paren() 3091 sorted_by = self._parse_csv(self._parse_ordered) 3092 self._match_r_paren() 3093 else: 3094 sorted_by = None 3095 3096 self._match(TokenType.INTO) 3097 buckets = self._parse_number() 3098 self._match_text_seq("BUCKETS") 3099 3100 return self.expression( 3101 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3102 ) 3103 3104 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3105 if not self._match_text_seq("GRANTS"): 3106 self._retreat(self._index - 1) 3107 return None 3108 3109 return self.expression(exp.CopyGrantsProperty()) 3110 3111 def _parse_freespace(self) -> exp.FreespaceProperty: 3112 self._match(TokenType.EQ) 3113 return self.expression( 3114 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3115 ) 3116 3117 def _parse_mergeblockratio( 3118 self, no: bool = False, default: bool = False 3119 ) -> exp.MergeBlockRatioProperty: 3120 if self._match(TokenType.EQ): 3121 return self.expression( 3122 exp.MergeBlockRatioProperty( 3123 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3124 ) 3125 ) 3126 3127 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3128 3129 def _parse_datablocksize( 3130 self, 3131 default: bool | None = None, 3132 minimum: bool | None = None, 3133 maximum: bool | None = None, 3134 ) -> exp.DataBlocksizeProperty: 3135 self._match(TokenType.EQ) 3136 size = self._parse_number() 3137 3138 units = None 3139 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3140 units = self._prev.text 3141 3142 return self.expression( 3143 exp.DataBlocksizeProperty( 3144 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3145 ) 3146 ) 3147 3148 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3149 self._match(TokenType.EQ) 3150 always = self._match_text_seq("ALWAYS") 3151 manual = self._match_text_seq("MANUAL") 3152 never = self._match_text_seq("NEVER") 3153 default = self._match_text_seq("DEFAULT") 3154 3155 autotemp = None 3156 if self._match_text_seq("AUTOTEMP"): 3157 autotemp = self._parse_schema() 3158 3159 return self.expression( 3160 exp.BlockCompressionProperty( 3161 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3162 ) 3163 ) 3164 3165 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3166 index = self._index 3167 no = self._match_text_seq("NO") 3168 concurrent = self._match_text_seq("CONCURRENT") 3169 3170 if not self._match_text_seq("ISOLATED", "LOADING"): 3171 self._retreat(index) 3172 return None 3173 3174 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3175 return self.expression( 3176 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3177 ) 3178 3179 def _parse_locking(self) -> exp.LockingProperty: 3180 if self._match(TokenType.TABLE): 3181 kind = "TABLE" 3182 elif self._match(TokenType.VIEW): 3183 kind = "VIEW" 3184 elif self._match(TokenType.ROW): 3185 kind = "ROW" 3186 elif self._match_text_seq("DATABASE"): 3187 kind = "DATABASE" 3188 else: 3189 kind = None 3190 3191 if kind in ("DATABASE", "TABLE", "VIEW"): 3192 this = self._parse_table_parts() 3193 else: 3194 this = None 3195 3196 if self._match(TokenType.FOR): 3197 for_or_in = "FOR" 3198 elif self._match(TokenType.IN): 3199 for_or_in = "IN" 3200 else: 3201 for_or_in = None 3202 3203 if self._match_text_seq("ACCESS"): 3204 lock_type = "ACCESS" 3205 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3206 lock_type = "EXCLUSIVE" 3207 elif self._match_text_seq("SHARE"): 3208 lock_type = "SHARE" 3209 elif self._match_text_seq("READ"): 3210 lock_type = "READ" 3211 elif self._match_text_seq("WRITE"): 3212 lock_type = "WRITE" 3213 elif self._match_text_seq("CHECKSUM"): 3214 lock_type = "CHECKSUM" 3215 else: 3216 lock_type = None 3217 3218 override = self._match_text_seq("OVERRIDE") 3219 3220 return self.expression( 3221 exp.LockingProperty( 3222 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3223 ) 3224 ) 3225 3226 def _parse_partition_by(self) -> list[exp.Expr]: 3227 if self._match(TokenType.PARTITION_BY): 3228 return self._parse_csv(self._parse_disjunction) 3229 return [] 3230 3231 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3232 def _parse_partition_bound_expr() -> exp.Expr | None: 3233 if self._match_text_seq("MINVALUE"): 3234 return exp.var("MINVALUE") 3235 if self._match_text_seq("MAXVALUE"): 3236 return exp.var("MAXVALUE") 3237 return self._parse_bitwise() 3238 3239 this: exp.Expr | list[exp.Expr] | None = None 3240 expression = None 3241 from_expressions = None 3242 to_expressions = None 3243 3244 if self._match(TokenType.IN): 3245 this = self._parse_wrapped_csv(self._parse_bitwise) 3246 elif self._match(TokenType.FROM): 3247 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3248 self._match_text_seq("TO") 3249 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3250 elif self._match_text_seq("WITH", "(", "MODULUS"): 3251 this = self._parse_number() 3252 self._match_text_seq(",", "REMAINDER") 3253 expression = self._parse_number() 3254 self._match_r_paren() 3255 else: 3256 self.raise_error("Failed to parse partition bound spec.") 3257 3258 return self.expression( 3259 exp.PartitionBoundSpec( 3260 this=this, 3261 expression=expression, 3262 from_expressions=from_expressions, 3263 to_expressions=to_expressions, 3264 ) 3265 ) 3266 3267 # https://www.postgresql.org/docs/current/sql-createtable.html 3268 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3269 if not self._match_text_seq("OF"): 3270 self._retreat(self._index - 1) 3271 return None 3272 3273 this = self._parse_table(schema=True) 3274 3275 if self._match(TokenType.DEFAULT): 3276 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3277 elif self._match_text_seq("FOR", "VALUES"): 3278 expression = self._parse_partition_bound_spec() 3279 else: 3280 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3281 3282 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3283 3284 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3285 self._match(TokenType.EQ) 3286 return self.expression( 3287 exp.PartitionedByProperty( 3288 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3289 ) 3290 ) 3291 3292 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3293 if self._match_text_seq("AND", "STATISTICS"): 3294 statistics = True 3295 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3296 statistics = False 3297 else: 3298 statistics = None 3299 3300 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3301 3302 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3303 if self._match_text_seq("SQL"): 3304 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3305 return None 3306 3307 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3308 if self._match_text_seq("SQL", "DATA"): 3309 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3310 return None 3311 3312 def _parse_no_property(self) -> exp.Expr | None: 3313 if self._match_text_seq("PRIMARY", "INDEX"): 3314 return exp.NoPrimaryIndexProperty() 3315 if self._match_text_seq("SQL"): 3316 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3317 return None 3318 3319 def _parse_on_property(self) -> exp.Expr | None: 3320 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3321 return exp.OnCommitProperty() 3322 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3323 return exp.OnCommitProperty(delete=True) 3324 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3325 3326 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3327 if self._match_text_seq("SQL", "DATA"): 3328 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3329 return None 3330 3331 def _parse_distkey(self) -> exp.DistKeyProperty: 3332 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3333 3334 def _parse_create_like(self) -> exp.LikeProperty | None: 3335 table = self._parse_table(schema=True) 3336 3337 options = [] 3338 while self._match_texts(("INCLUDING", "EXCLUDING")): 3339 this = self._prev.text.upper() 3340 3341 id_var = self._parse_id_var() 3342 if not id_var: 3343 return None 3344 3345 options.append( 3346 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3347 ) 3348 3349 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3350 3351 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3352 return self.expression( 3353 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3354 ) 3355 3356 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3357 self._match(TokenType.EQ) 3358 return self.expression( 3359 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3360 ) 3361 3362 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3363 self._match_text_seq("WITH", "CONNECTION") 3364 return self.expression( 3365 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3366 ) 3367 3368 def _parse_returns(self) -> exp.ReturnsProperty: 3369 value: exp.Expr | None 3370 null = None 3371 is_table = self._match(TokenType.TABLE) 3372 3373 if is_table: 3374 if self._match(TokenType.LT): 3375 value = self.expression( 3376 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3377 ) 3378 if not self._match(TokenType.GT): 3379 self.raise_error("Expecting >") 3380 else: 3381 value = self._parse_schema(exp.var("TABLE")) 3382 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3383 null = True 3384 value = None 3385 else: 3386 value = self._parse_types() 3387 3388 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3389 3390 def _parse_describe(self) -> exp.Describe: 3391 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3392 style: str | None = ( 3393 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3394 ) 3395 if self._match(TokenType.DOT): 3396 style = None 3397 self._retreat(self._index - 2) 3398 3399 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3400 3401 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3402 this = self._parse_statement() 3403 else: 3404 this = self._parse_table(schema=True) 3405 3406 properties = self._parse_properties() 3407 expressions = properties.expressions if properties else None 3408 partition = self._parse_partition() 3409 return self.expression( 3410 exp.Describe( 3411 this=this, 3412 style=style, 3413 kind=kind, 3414 expressions=expressions, 3415 partition=partition, 3416 format=format, 3417 as_json=self._match_text_seq("AS", "JSON"), 3418 ) 3419 ) 3420 3421 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3422 kind = self._prev.text.upper() 3423 expressions = [] 3424 3425 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3426 if self._match(TokenType.WHEN): 3427 expression = self._parse_disjunction() 3428 self._match(TokenType.THEN) 3429 else: 3430 expression = None 3431 3432 else_ = self._match(TokenType.ELSE) 3433 3434 if not self._match(TokenType.INTO): 3435 return None 3436 3437 return self.expression( 3438 exp.ConditionalInsert( 3439 this=self.expression( 3440 exp.Insert( 3441 this=self._parse_table(schema=True), 3442 expression=self._parse_derived_table_values(), 3443 ) 3444 ), 3445 expression=expression, 3446 else_=else_, 3447 ) 3448 ) 3449 3450 expression = parse_conditional_insert() 3451 while expression is not None: 3452 expressions.append(expression) 3453 expression = parse_conditional_insert() 3454 3455 return self.expression( 3456 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3457 comments=comments, 3458 ) 3459 3460 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3461 comments: list[str] = [] 3462 hint = self._parse_hint() 3463 overwrite = self._match(TokenType.OVERWRITE) 3464 ignore = self._match(TokenType.IGNORE) 3465 local = self._match_text_seq("LOCAL") 3466 alternative = None 3467 is_function = None 3468 3469 if self._match_text_seq("DIRECTORY"): 3470 this: exp.Expr | None = self.expression( 3471 exp.Directory( 3472 this=self._parse_var_or_string(), 3473 local=local, 3474 row_format=self._parse_row_format(match_row=True), 3475 ) 3476 ) 3477 else: 3478 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3479 comments += ensure_list(self._prev_comments) 3480 return self._parse_multitable_inserts(comments) 3481 3482 if self._match(TokenType.OR): 3483 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3484 3485 self._match(TokenType.INTO) 3486 comments += ensure_list(self._prev_comments) 3487 self._match(TokenType.TABLE) 3488 is_function = self._match(TokenType.FUNCTION) 3489 3490 this = self._parse_function() if is_function else self._parse_insert_table() 3491 3492 returning = self._parse_returning() # TSQL allows RETURNING before source 3493 3494 return self.expression( 3495 exp.Insert( 3496 hint=hint, 3497 is_function=is_function, 3498 this=this, 3499 stored=self._match_text_seq("STORED") and self._parse_stored(), 3500 by_name=self._match_text_seq("BY", "NAME"), 3501 exists=self._parse_exists(), 3502 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3503 and self._parse_disjunction(), 3504 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3505 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3506 default=self._match_text_seq("DEFAULT", "VALUES"), 3507 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3508 conflict=self._parse_on_conflict(), 3509 returning=returning or self._parse_returning(), 3510 overwrite=overwrite, 3511 alternative=alternative, 3512 ignore=ignore, 3513 source=self._match(TokenType.TABLE) and self._parse_table(), 3514 ), 3515 comments=comments, 3516 ) 3517 3518 def _parse_insert_table(self) -> exp.Expr | None: 3519 this = self._parse_table(schema=True, parse_partition=True) 3520 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3521 this.set("alias", self._parse_table_alias()) 3522 return this 3523 3524 def _parse_kill(self) -> exp.Kill: 3525 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3526 3527 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3528 3529 def _parse_on_conflict(self) -> exp.OnConflict | None: 3530 conflict = self._match_text_seq("ON", "CONFLICT") 3531 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3532 3533 if not conflict and not duplicate: 3534 return None 3535 3536 conflict_keys = None 3537 constraint = None 3538 3539 if conflict: 3540 if self._match_text_seq("ON", "CONSTRAINT"): 3541 constraint = self._parse_id_var() 3542 elif self._match(TokenType.L_PAREN): 3543 conflict_keys = self._parse_csv(self._parse_id_var) 3544 self._match_r_paren() 3545 3546 index_predicate = self._parse_where() 3547 3548 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3549 if self._prev.token_type == TokenType.UPDATE: 3550 self._match(TokenType.SET) 3551 expressions = self._parse_csv(self._parse_equality) 3552 else: 3553 expressions = None 3554 3555 return self.expression( 3556 exp.OnConflict( 3557 duplicate=duplicate, 3558 expressions=expressions, 3559 action=action, 3560 conflict_keys=conflict_keys, 3561 index_predicate=index_predicate, 3562 constraint=constraint, 3563 where=self._parse_where(), 3564 ) 3565 ) 3566 3567 def _parse_returning(self) -> exp.Returning | None: 3568 if not self._match(TokenType.RETURNING): 3569 return None 3570 return self.expression( 3571 exp.Returning( 3572 expressions=self._parse_csv(self._parse_expression), 3573 into=self._match(TokenType.INTO) and self._parse_table_part(), 3574 ) 3575 ) 3576 3577 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3578 if not self._match(TokenType.FORMAT): 3579 return None 3580 return self._parse_row_format() 3581 3582 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3583 index = self._index 3584 with_ = with_ or self._match_text_seq("WITH") 3585 3586 if not self._match(TokenType.SERDE_PROPERTIES): 3587 self._retreat(index) 3588 return None 3589 return self.expression( 3590 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3591 ) 3592 3593 def _parse_row_format( 3594 self, match_row: bool = False 3595 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3596 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3597 return None 3598 3599 if self._match_text_seq("SERDE"): 3600 this = self._parse_string() 3601 3602 serde_properties = self._parse_serde_properties() 3603 3604 return self.expression( 3605 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3606 ) 3607 3608 self._match_text_seq("DELIMITED") 3609 3610 kwargs = {} 3611 3612 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3613 kwargs["fields"] = self._parse_string() 3614 if self._match_text_seq("ESCAPED", "BY"): 3615 kwargs["escaped"] = self._parse_string() 3616 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3617 kwargs["collection_items"] = self._parse_string() 3618 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3619 kwargs["map_keys"] = self._parse_string() 3620 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3621 kwargs["lines"] = self._parse_string() 3622 if self._match_text_seq("NULL", "DEFINED", "AS"): 3623 kwargs["null"] = self._parse_string() 3624 3625 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3626 3627 def _parse_load(self) -> exp.LoadData | exp.Command: 3628 if self._match_text_seq("DATA"): 3629 local = self._match_text_seq("LOCAL") 3630 self._match_text_seq("INPATH") 3631 inpath = self._parse_string() 3632 overwrite = self._match(TokenType.OVERWRITE) 3633 temp: bool | None = None 3634 if self._match(TokenType.INTO): 3635 temp = self._match(TokenType.TEMPORARY) 3636 self._match(TokenType.TABLE) 3637 3638 return self.expression( 3639 exp.LoadData( 3640 this=self._parse_table(schema=True), 3641 local=local, 3642 overwrite=overwrite, 3643 temp=temp, 3644 inpath=inpath, 3645 files=self._match_text_seq("FROM", "FILES") 3646 and exp.Properties(expressions=self._parse_wrapped_properties()), 3647 partition=self._parse_partition(), 3648 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3649 serde=self._match_text_seq("SERDE") and self._parse_string(), 3650 ) 3651 ) 3652 return self._parse_as_command(self._prev) 3653 3654 def _parse_delete(self) -> exp.Delete: 3655 hint = self._parse_hint() 3656 3657 # This handles MySQL's "Multiple-Table Syntax" 3658 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3659 tables = None 3660 if not self._match(TokenType.FROM, advance=False): 3661 tables = self._parse_csv(self._parse_table) or None 3662 3663 returning = self._parse_returning() 3664 3665 return self.expression( 3666 exp.Delete( 3667 hint=hint, 3668 tables=tables, 3669 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3670 using=self._match(TokenType.USING) 3671 and self._parse_csv(lambda: self._parse_table(joins=True)), 3672 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3673 where=self._parse_where(), 3674 returning=returning or self._parse_returning(), 3675 order=self._parse_order(), 3676 limit=self._parse_limit(), 3677 ) 3678 ) 3679 3680 def _parse_update(self) -> exp.Update: 3681 hint = self._parse_hint() 3682 kwargs: dict[str, object] = { 3683 "hint": hint, 3684 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3685 } 3686 while self._curr: 3687 if self._match(TokenType.SET): 3688 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3689 elif self._match(TokenType.RETURNING, advance=False): 3690 kwargs["returning"] = self._parse_returning() 3691 elif self._match(TokenType.FROM, advance=False): 3692 from_ = self._parse_from(joins=True) 3693 table = from_.this if from_ else None 3694 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3695 table.set("joins", list(self._parse_joins()) or None) 3696 3697 kwargs["from_"] = from_ 3698 elif self._match(TokenType.WHERE, advance=False): 3699 kwargs["where"] = self._parse_where() 3700 elif self._match(TokenType.ORDER_BY, advance=False): 3701 kwargs["order"] = self._parse_order() 3702 elif self._match(TokenType.LIMIT, advance=False): 3703 kwargs["limit"] = self._parse_limit() 3704 else: 3705 break 3706 3707 return self.expression(exp.Update(**kwargs)) 3708 3709 def _parse_use(self) -> exp.Use: 3710 return self.expression( 3711 exp.Use( 3712 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3713 this=self._parse_table(schema=False), 3714 ) 3715 ) 3716 3717 def _parse_uncache(self) -> exp.Uncache: 3718 if not self._match(TokenType.TABLE): 3719 self.raise_error("Expecting TABLE after UNCACHE") 3720 3721 return self.expression( 3722 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3723 ) 3724 3725 def _parse_cache(self) -> exp.Cache: 3726 lazy = self._match_text_seq("LAZY") 3727 self._match(TokenType.TABLE) 3728 table = self._parse_table(schema=True) 3729 3730 options = [] 3731 if self._match_text_seq("OPTIONS"): 3732 self._match_l_paren() 3733 k = self._parse_string() 3734 self._match(TokenType.EQ) 3735 v = self._parse_string() 3736 options = [k, v] 3737 self._match_r_paren() 3738 3739 self._match(TokenType.ALIAS) 3740 return self.expression( 3741 exp.Cache( 3742 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3743 ) 3744 ) 3745 3746 def _parse_partition(self) -> exp.Partition | None: 3747 if not self._match_texts(self.PARTITION_KEYWORDS): 3748 return None 3749 3750 return self.expression( 3751 exp.Partition( 3752 subpartition=self._prev.text.upper() == "SUBPARTITION", 3753 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3754 ) 3755 ) 3756 3757 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3758 def _parse_value_expression() -> exp.Expr | None: 3759 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3760 return exp.var(self._prev.text.upper()) 3761 return self._parse_expression() 3762 3763 if self._match(TokenType.L_PAREN): 3764 expressions = self._parse_csv(_parse_value_expression) 3765 self._match_r_paren() 3766 return self.expression(exp.Tuple(expressions=expressions)) 3767 3768 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3769 expression = self._parse_expression() 3770 if expression: 3771 return self.expression(exp.Tuple(expressions=[expression])) 3772 return None 3773 3774 def _parse_projections( 3775 self, 3776 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3777 return self._parse_expressions(), None 3778 3779 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3780 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3781 this: exp.Expr | None = self._parse_simplified_pivot( 3782 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3783 ) 3784 elif self._match(TokenType.FROM): 3785 from_ = self._parse_from(joins=True, skip_from_token=True, consume_pipe=True) 3786 # Support parentheses for duckdb FROM-first syntax 3787 select = self._parse_select(from_=from_) 3788 if select: 3789 if not select.args.get("from_"): 3790 select.set("from_", from_) 3791 this = select 3792 else: 3793 this = exp.select("*").from_(t.cast(exp.From, from_)) 3794 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3795 else: 3796 this = ( 3797 self._parse_table(consume_pipe=True) 3798 if table 3799 else self._parse_select(nested=True, parse_set_operation=False) 3800 ) 3801 3802 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3803 # in case a modifier (e.g. join) is following 3804 if table and isinstance(this, exp.Values) and this.alias: 3805 alias = this.args["alias"].pop() 3806 this = exp.Table(this=this, alias=alias) 3807 3808 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3809 3810 return this 3811 3812 def _parse_select( 3813 self, 3814 nested: bool = False, 3815 table: bool = False, 3816 parse_subquery_alias: bool = True, 3817 parse_set_operation: bool = True, 3818 consume_pipe: bool = True, 3819 from_: exp.From | None = None, 3820 ) -> exp.Expr | None: 3821 query = self._parse_select_query( 3822 nested=nested, 3823 table=table, 3824 parse_subquery_alias=parse_subquery_alias, 3825 parse_set_operation=parse_set_operation, 3826 ) 3827 3828 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3829 if not query and from_: 3830 query = exp.select("*").from_(from_) 3831 if isinstance(query, exp.Query): 3832 query = self._parse_pipe_syntax_query(query) 3833 query = query.subquery(copy=False) if query and table else query 3834 3835 return query 3836 3837 def _parse_select_query( 3838 self, 3839 nested: bool = False, 3840 table: bool = False, 3841 parse_subquery_alias: bool = True, 3842 parse_set_operation: bool = True, 3843 ) -> exp.Expr | None: 3844 cte = self._parse_with() 3845 3846 if cte: 3847 this = self._parse_statement() 3848 3849 if not this: 3850 self.raise_error("Failed to parse any statement following CTE") 3851 return cte 3852 3853 while isinstance(this, exp.Subquery) and this.is_wrapper: 3854 this = this.this 3855 3856 assert this is not None 3857 if "with_" in this.arg_types: 3858 if inner_cte := this.args.get("with_"): 3859 cte.set("expressions", cte.expressions + inner_cte.expressions) 3860 if inner_cte.args.get("recursive"): 3861 cte.set("recursive", True) 3862 this.set("with_", cte) 3863 else: 3864 self.raise_error(f"{this.key} does not support CTE") 3865 this = cte 3866 3867 return this 3868 3869 # duckdb supports leading with FROM x 3870 from_ = ( 3871 self._parse_from(joins=True, consume_pipe=True) 3872 if self._match(TokenType.FROM, advance=False) 3873 else None 3874 ) 3875 3876 if self._match(TokenType.SELECT): 3877 comments = self._prev_comments 3878 3879 hint = self._parse_hint() 3880 3881 if self._next and not self._next.token_type == TokenType.DOT: 3882 all_ = self._match(TokenType.ALL) 3883 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3884 else: 3885 all_, matched_distinct = None, False 3886 3887 kind = ( 3888 self._prev.text.upper() 3889 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3890 else None 3891 ) 3892 3893 distinct: exp.Expr | None = ( 3894 self.expression( 3895 exp.Distinct( 3896 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3897 ) 3898 ) 3899 if matched_distinct 3900 else None 3901 ) 3902 3903 operation_modifiers = [] 3904 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3905 operation_modifiers.append(exp.var(self._prev.text.upper())) 3906 3907 limit = self._parse_limit(top=True) 3908 3909 # Some dialects (e.g. Redshift, T-SQL) allow SELECT TOP N DISTINCT ... 3910 if limit and not matched_distinct and not all_: 3911 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3912 if matched_distinct: 3913 distinct = self.expression( 3914 exp.Distinct( 3915 on=self._parse_value(values=False) 3916 if self._match(TokenType.ON) 3917 else None 3918 ) 3919 ) 3920 else: 3921 all_ = self._match(TokenType.ALL) 3922 3923 if all_ and distinct: 3924 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3925 3926 projections, exclude = self._parse_projections() 3927 3928 this = self.expression( 3929 exp.Select( 3930 kind=kind, 3931 hint=hint, 3932 distinct=distinct, 3933 expressions=projections, 3934 limit=limit, 3935 exclude=exclude, 3936 operation_modifiers=operation_modifiers or None, 3937 ) 3938 ) 3939 this.comments = comments 3940 3941 into = self._parse_into() 3942 if into: 3943 this.set("into", into) 3944 3945 if not from_: 3946 from_ = self._parse_from() 3947 3948 if from_: 3949 this.set("from_", from_) 3950 3951 this = self._parse_query_modifiers(this) 3952 elif (table or nested) and self._match(TokenType.L_PAREN): 3953 comments = self._prev_comments 3954 this = self._parse_wrapped_select(table=table) 3955 3956 if this: 3957 this.add_comments(comments, prepend=True) 3958 3959 # We return early here so that the UNION isn't attached to the subquery by the 3960 # following call to _parse_set_operations, but instead becomes the parent node 3961 self._match_r_paren() 3962 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3963 elif self._match(TokenType.VALUES, advance=False): 3964 this = self._parse_derived_table_values() 3965 elif from_: 3966 this = exp.select("*").from_(from_.this, copy=False) 3967 this = self._parse_query_modifiers(this) 3968 elif self._match(TokenType.SUMMARIZE): 3969 table = self._match(TokenType.TABLE) 3970 this = self._parse_select() or self._parse_string() or self._parse_table() 3971 return self.expression(exp.Summarize(this=this, table=table)) 3972 elif self._match(TokenType.DESCRIBE): 3973 this = self._parse_describe() 3974 else: 3975 this = None 3976 3977 return self._parse_set_operations(this) if parse_set_operation else this 3978 3979 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3980 self._match_text_seq("SEARCH") 3981 3982 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3983 3984 if not kind: 3985 return None 3986 3987 self._match_text_seq("FIRST", "BY") 3988 3989 return self.expression( 3990 exp.RecursiveWithSearch( 3991 kind=kind, 3992 this=self._parse_id_var(), 3993 expression=self._match_text_seq("SET") and self._parse_id_var(), 3994 using=self._match_text_seq("USING") and self._parse_id_var(), 3995 ) 3996 ) 3997 3998 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 3999 if not skip_with_token and not self._match(TokenType.WITH): 4000 return None 4001 4002 comments = self._prev_comments 4003 recursive = self._match(TokenType.RECURSIVE) 4004 4005 last_comments = None 4006 expressions = [] 4007 while True: 4008 cte = self._parse_cte() 4009 if isinstance(cte, exp.CTE): 4010 expressions.append(cte) 4011 if last_comments: 4012 cte.add_comments(last_comments) 4013 4014 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 4015 break 4016 else: 4017 self._match(TokenType.WITH) 4018 4019 last_comments = self._prev_comments 4020 4021 return self.expression( 4022 exp.With( 4023 expressions=expressions, 4024 recursive=recursive or None, 4025 search=self._parse_recursive_with_search(), 4026 ), 4027 comments=comments, 4028 ) 4029 4030 def _parse_cte(self) -> exp.CTE | None: 4031 index = self._index 4032 4033 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 4034 if not alias or not alias.this: 4035 self.raise_error("Expected CTE to have alias") 4036 4037 key_expressions = ( 4038 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 4039 ) 4040 4041 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 4042 self._retreat(index) 4043 return None 4044 4045 comments = self._prev_comments 4046 4047 if self._match_text_seq("NOT", "MATERIALIZED"): 4048 materialized = False 4049 elif self._match_text_seq("MATERIALIZED"): 4050 materialized = True 4051 else: 4052 materialized = None 4053 4054 cte = self.expression( 4055 exp.CTE( 4056 this=self._parse_wrapped(self._parse_statement), 4057 alias=alias, 4058 materialized=materialized, 4059 key_expressions=key_expressions, 4060 ), 4061 comments=comments, 4062 ) 4063 4064 values = cte.this 4065 if isinstance(values, exp.Values): 4066 if values.alias: 4067 cte.set("this", exp.select("*").from_(values)) 4068 else: 4069 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 4070 4071 return cte 4072 4073 def _parse_table_alias( 4074 self, alias_tokens: t.Collection[TokenType] | None = None 4075 ) -> exp.TableAlias | None: 4076 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 4077 # so this section tries to parse the clause version and if it fails, it treats the token 4078 # as an identifier (alias) 4079 if self._can_parse_limit_or_offset(): 4080 return None 4081 4082 any_token = self._match(TokenType.ALIAS) 4083 alias = ( 4084 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4085 or self._parse_string_as_identifier() 4086 ) 4087 4088 index = self._index 4089 if self._match(TokenType.L_PAREN): 4090 columns = self._parse_csv(self._parse_function_parameter) 4091 self._match_r_paren() if columns else self._retreat(index) 4092 else: 4093 columns = None 4094 4095 if not alias and not columns: 4096 return None 4097 4098 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4099 4100 # We bubble up comments from the Identifier to the TableAlias 4101 if isinstance(alias, exp.Identifier): 4102 table_alias.add_comments(alias.pop_comments()) 4103 4104 return table_alias 4105 4106 def _parse_subquery( 4107 self, this: exp.Expr | None, parse_alias: bool = True 4108 ) -> exp.Subquery | None: 4109 if not this: 4110 return None 4111 4112 return self.expression( 4113 exp.Subquery( 4114 this=this, 4115 pivots=self._parse_pivots(), 4116 alias=self._parse_table_alias() if parse_alias else None, 4117 sample=self._parse_table_sample(), 4118 ) 4119 ) 4120 4121 def _implicit_unnests_to_explicit(self, this: E) -> E: 4122 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4123 4124 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4125 for i, join in enumerate(this.args.get("joins") or []): 4126 table = join.this 4127 normalized_table = table.copy() 4128 normalized_table.meta["maybe_column"] = True 4129 normalized_table = _norm(normalized_table, dialect=self.dialect) 4130 4131 if isinstance(table, exp.Table) and not join.args.get("on"): 4132 if len(normalized_table.parts) > 1 and normalized_table.parts[0].name in refs: 4133 table_as_column = table.to_column() 4134 unnest = exp.Unnest(expressions=[table_as_column]) 4135 4136 # Table.to_column creates a parent Alias node that we want to convert to 4137 # a TableAlias and attach to the Unnest, so it matches the parser's output 4138 if isinstance(table.args.get("alias"), exp.TableAlias): 4139 table_as_column.replace(table_as_column.this) 4140 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4141 4142 table.replace(unnest) 4143 4144 refs.add(normalized_table.alias_or_name) 4145 4146 return this 4147 4148 @t.overload 4149 def _parse_query_modifiers(self, this: E) -> E: ... 4150 4151 @t.overload 4152 def _parse_query_modifiers(self, this: None) -> None: ... 4153 4154 def _parse_query_modifiers(self, this): 4155 if isinstance(this, self.MODIFIABLES): 4156 for join in self._parse_joins(): 4157 this.append("joins", join) 4158 for lateral in iter(self._parse_lateral, None): 4159 this.append("laterals", lateral) 4160 4161 while True: 4162 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4163 modifier_token = self._curr 4164 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4165 key, expression = parser(self) 4166 4167 if expression: 4168 if this.args.get(key): 4169 self.raise_error( 4170 f"Found multiple '{modifier_token.text.upper()}' clauses", 4171 token=modifier_token, 4172 ) 4173 4174 this.set(key, expression) 4175 if key == "limit": 4176 offset = expression.args.get("offset") 4177 expression.set("offset", None) 4178 4179 if offset: 4180 offset = exp.Offset(expression=offset) 4181 this.set("offset", offset) 4182 4183 limit_by_expressions = expression.expressions 4184 expression.set("expressions", None) 4185 offset.set("expressions", limit_by_expressions) 4186 continue 4187 break 4188 4189 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4190 this = self._implicit_unnests_to_explicit(this) 4191 4192 return this 4193 4194 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4195 start = self._curr 4196 while self._curr: 4197 self._advance() 4198 4199 end = self._tokens[self._index - 1] 4200 return exp.Hint(expressions=[self._find_sql(start, end)]) 4201 4202 def _parse_hint_function_call(self) -> exp.Expr | None: 4203 return self._parse_function_call() 4204 4205 def _parse_hint_body(self) -> exp.Hint | None: 4206 start_index = self._index 4207 should_fallback_to_string = False 4208 4209 hints = [] 4210 try: 4211 for hint in iter( 4212 lambda: self._parse_csv( 4213 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4214 ), 4215 [], 4216 ): 4217 hints.extend(hint) 4218 except ParseError: 4219 should_fallback_to_string = True 4220 4221 if should_fallback_to_string or self._curr: 4222 self._retreat(start_index) 4223 return self._parse_hint_fallback_to_string() 4224 4225 return self.expression(exp.Hint(expressions=hints)) 4226 4227 def _parse_hint(self) -> exp.Hint | None: 4228 if self._match(TokenType.HINT) and self._prev_comments: 4229 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4230 4231 return None 4232 4233 def _parse_into(self) -> exp.Into | None: 4234 if not self._match(TokenType.INTO): 4235 return None 4236 4237 temp = self._match(TokenType.TEMPORARY) 4238 unlogged = self._match_text_seq("UNLOGGED") 4239 self._match(TokenType.TABLE) 4240 4241 return self.expression( 4242 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4243 ) 4244 4245 def _parse_from( 4246 self, 4247 joins: bool = False, 4248 skip_from_token: bool = False, 4249 consume_pipe: bool = False, 4250 ) -> exp.From | None: 4251 if not skip_from_token and not self._match(TokenType.FROM): 4252 return None 4253 4254 comments = self._prev_comments 4255 return self.expression( 4256 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4257 comments=comments, 4258 ) 4259 4260 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4261 return self.expression( 4262 exp.MatchRecognizeMeasure( 4263 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4264 this=self._parse_expression(), 4265 ) 4266 ) 4267 4268 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4269 if not self._match(TokenType.MATCH_RECOGNIZE): 4270 return None 4271 4272 self._match_l_paren() 4273 4274 partition = self._parse_partition_by() 4275 order = self._parse_order() 4276 4277 measures = ( 4278 self._parse_csv(self._parse_match_recognize_measure) 4279 if self._match_text_seq("MEASURES") 4280 else None 4281 ) 4282 4283 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4284 rows = exp.var("ONE ROW PER MATCH") 4285 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4286 text = "ALL ROWS PER MATCH" 4287 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4288 text += " SHOW EMPTY MATCHES" 4289 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4290 text += " OMIT EMPTY MATCHES" 4291 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4292 text += " WITH UNMATCHED ROWS" 4293 rows = exp.var(text) 4294 else: 4295 rows = None 4296 4297 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4298 text = "AFTER MATCH SKIP" 4299 if self._match_text_seq("PAST", "LAST", "ROW"): 4300 text += " PAST LAST ROW" 4301 elif self._match_text_seq("TO", "NEXT", "ROW"): 4302 text += " TO NEXT ROW" 4303 elif self._match_text_seq("TO", "FIRST"): 4304 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4305 elif self._match_text_seq("TO", "LAST"): 4306 text += f" TO LAST {self._advance_any().text}" # type: ignore 4307 after = exp.var(text) 4308 else: 4309 after = None 4310 4311 if self._match_text_seq("PATTERN"): 4312 self._match_l_paren() 4313 4314 if not self._curr: 4315 self.raise_error("Expecting )", self._curr) 4316 4317 paren = 1 4318 start = self._curr 4319 4320 while self._curr and paren > 0: 4321 if self._curr.token_type == TokenType.L_PAREN: 4322 paren += 1 4323 if self._curr.token_type == TokenType.R_PAREN: 4324 paren -= 1 4325 4326 end = self._prev 4327 self._advance() 4328 4329 if paren > 0: 4330 self.raise_error("Expecting )", self._curr) 4331 4332 pattern = exp.var(self._find_sql(start, end)) 4333 else: 4334 pattern = None 4335 4336 define = ( 4337 self._parse_csv(self._parse_name_as_expression) 4338 if self._match_text_seq("DEFINE") 4339 else None 4340 ) 4341 4342 self._match_r_paren() 4343 4344 return self.expression( 4345 exp.MatchRecognize( 4346 partition_by=partition, 4347 order=order, 4348 measures=measures, 4349 rows=rows, 4350 after=after, 4351 pattern=pattern, 4352 define=define, 4353 alias=self._parse_table_alias(), 4354 ) 4355 ) 4356 4357 def _parse_lateral(self) -> exp.Lateral | None: 4358 cross_apply: bool | None = None 4359 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4360 cross_apply = True 4361 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4362 cross_apply = False 4363 4364 if cross_apply is not None: 4365 this = self._parse_select(table=True) 4366 view = None 4367 outer = None 4368 elif self._match(TokenType.LATERAL): 4369 this = self._parse_select(table=True) 4370 view = self._match(TokenType.VIEW) 4371 outer = self._match(TokenType.OUTER) 4372 else: 4373 return None 4374 4375 if not this: 4376 this = ( 4377 self._parse_unnest() 4378 or self._parse_function() 4379 or self._parse_id_var(any_token=False) 4380 ) 4381 4382 while self._match(TokenType.DOT): 4383 this = exp.Dot( 4384 this=this, 4385 expression=self._parse_function() or self._parse_id_var(any_token=False), 4386 ) 4387 4388 ordinality: bool | None = None 4389 4390 if view: 4391 table = self._parse_id_var(any_token=False) 4392 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4393 table_alias: exp.TableAlias | None = self.expression( 4394 exp.TableAlias(this=table, columns=columns) 4395 ) 4396 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4397 # We move the alias from the lateral's child node to the lateral itself 4398 table_alias = this.args["alias"].pop() 4399 else: 4400 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4401 table_alias = self._parse_table_alias() 4402 4403 return self.expression( 4404 exp.Lateral( 4405 this=this, 4406 view=view, 4407 outer=outer, 4408 alias=table_alias, 4409 cross_apply=cross_apply, 4410 ordinality=ordinality, 4411 ) 4412 ) 4413 4414 def _parse_stream(self) -> exp.Stream | None: 4415 index = self._index 4416 if self._match(TokenType.STREAM): 4417 if this := self._try_parse(self._parse_table): 4418 return self.expression(exp.Stream(this=this)) 4419 self._retreat(index) 4420 return None 4421 4422 def _parse_join_parts( 4423 self, 4424 ) -> tuple[Token | None, Token | None, Token | None]: 4425 return ( 4426 self._prev if self._match_set(self.JOIN_METHODS) else None, 4427 self._prev if self._match_set(self.JOIN_SIDES) else None, 4428 self._prev if self._match_set(self.JOIN_KINDS) else None, 4429 ) 4430 4431 def _parse_using_identifiers(self) -> list[exp.Expr]: 4432 def _parse_column_as_identifier() -> exp.Expr | None: 4433 this = self._parse_column() 4434 if isinstance(this, exp.Column): 4435 return this.this 4436 return this 4437 4438 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4439 4440 def _parse_join( 4441 self, 4442 skip_join_token: bool = False, 4443 parse_bracket: bool = False, 4444 alias_tokens: t.Collection[TokenType] | None = None, 4445 ) -> exp.Join | None: 4446 if self._match(TokenType.COMMA): 4447 table = self._try_parse(lambda: self._parse_table(alias_tokens=alias_tokens)) 4448 cross_join = self.expression(exp.Join(this=table)) if table else None 4449 4450 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4451 cross_join.set("kind", "CROSS") 4452 4453 return cross_join 4454 4455 index = self._index 4456 method, side, kind = self._parse_join_parts() 4457 directed = self._match_text_seq("DIRECTED") 4458 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4459 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4460 join_comments = self._prev_comments 4461 4462 if not skip_join_token and not join: 4463 self._retreat(index) 4464 kind = None 4465 method = None 4466 side = None 4467 4468 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4469 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4470 4471 if not skip_join_token and not join and not outer_apply and not cross_apply: 4472 return None 4473 4474 kwargs: dict[str, t.Any] = { 4475 "this": self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens) 4476 } 4477 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4478 kwargs["expressions"] = self._parse_csv( 4479 lambda: self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens) 4480 ) 4481 4482 if method: 4483 kwargs["method"] = method.text.upper() 4484 if side: 4485 kwargs["side"] = side.text.upper() 4486 if kind: 4487 kwargs["kind"] = kind.text.upper() 4488 if hint: 4489 kwargs["hint"] = hint 4490 4491 if self._match(TokenType.MATCH_CONDITION): 4492 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4493 4494 if self._match(TokenType.ON): 4495 kwargs["on"] = self._parse_disjunction() 4496 elif self._match(TokenType.USING): 4497 kwargs["using"] = self._parse_using_identifiers() 4498 elif ( 4499 not method 4500 and not (outer_apply or cross_apply) 4501 and not isinstance(kwargs["this"], exp.Unnest) 4502 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4503 ): 4504 index = self._index 4505 joins: list | None = list(self._parse_joins(alias_tokens=alias_tokens)) 4506 4507 if joins and self._match(TokenType.ON): 4508 kwargs["on"] = self._parse_disjunction() 4509 elif joins and self._match(TokenType.USING): 4510 kwargs["using"] = self._parse_using_identifiers() 4511 else: 4512 joins = None 4513 self._retreat(index) 4514 4515 kwargs["this"].set("joins", joins if joins else None) 4516 4517 kwargs["pivots"] = self._parse_pivots() 4518 4519 comments = [c for token in (method, side, kind) if token for c in token.comments] 4520 comments = (join_comments or []) + comments 4521 4522 if ( 4523 self.ADD_JOIN_ON_TRUE 4524 and not kwargs.get("on") 4525 and not kwargs.get("using") 4526 and not kwargs.get("method") 4527 and kwargs.get("kind") in (None, "INNER", "OUTER") 4528 ): 4529 kwargs["on"] = exp.true() 4530 4531 if directed: 4532 kwargs["directed"] = directed 4533 4534 return self.expression(exp.Join(**kwargs), comments=comments) 4535 4536 def _parse_opclass(self) -> exp.Expr | None: 4537 this = self._parse_disjunction() 4538 4539 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4540 return this 4541 4542 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4543 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4544 4545 return this 4546 4547 def _parse_index_params(self) -> exp.IndexParameters: 4548 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4549 4550 if self._match(TokenType.L_PAREN, advance=False): 4551 columns = self._parse_wrapped_csv(self._parse_with_operator) 4552 else: 4553 columns = None 4554 4555 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4556 partition_by = self._parse_partition_by() 4557 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4558 tablespace = ( 4559 self._parse_var(any_token=True) 4560 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4561 else None 4562 ) 4563 where = self._parse_where() 4564 4565 on = self._parse_field() if self._match(TokenType.ON) else None 4566 4567 return self.expression( 4568 exp.IndexParameters( 4569 using=using, 4570 columns=columns, 4571 include=include, 4572 partition_by=partition_by, 4573 where=where, 4574 with_storage=with_storage, 4575 tablespace=tablespace, 4576 on=on, 4577 ) 4578 ) 4579 4580 def _parse_index( 4581 self, index: exp.Expr | None = None, anonymous: bool = False 4582 ) -> exp.Index | None: 4583 if index or anonymous: 4584 unique = None 4585 primary = None 4586 amp = None 4587 4588 self._match(TokenType.ON) 4589 self._match(TokenType.TABLE) # hive 4590 table = self._parse_table_parts(schema=True) 4591 else: 4592 unique = self._match(TokenType.UNIQUE) 4593 primary = self._match_text_seq("PRIMARY") 4594 amp = self._match_text_seq("AMP") 4595 4596 if not self._match(TokenType.INDEX): 4597 return None 4598 4599 index = self._parse_id_var() 4600 table = None 4601 4602 params = self._parse_index_params() 4603 4604 return self.expression( 4605 exp.Index( 4606 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4607 ) 4608 ) 4609 4610 def _parse_table_hints(self) -> list[exp.Expr] | None: 4611 hints: list[exp.Expr] = [] 4612 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4613 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4614 hints.append( 4615 self.expression( 4616 exp.WithTableHint( 4617 expressions=self._parse_csv( 4618 lambda: self._parse_function() or self._parse_var(any_token=True) 4619 ) 4620 ) 4621 ) 4622 ) 4623 self._match_r_paren() 4624 else: 4625 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4626 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4627 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4628 4629 self._match_set((TokenType.INDEX, TokenType.KEY)) 4630 if self._match(TokenType.FOR): 4631 hint.set("target", self._advance_any() and self._prev.text.upper()) 4632 4633 hint.set("expressions", self._parse_wrapped_id_vars()) 4634 hints.append(hint) 4635 4636 return hints or None 4637 4638 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4639 return ( 4640 (not schema and self._parse_function(optional_parens=False)) 4641 or self._parse_id_var(any_token=False) 4642 or self._parse_string_as_identifier() 4643 or self._parse_placeholder() 4644 ) 4645 4646 def _parse_table_parts_fast(self) -> exp.Table | None: 4647 index = self._index 4648 parts: list[exp.Identifier] | None = None 4649 all_comments: list[str] | None = None 4650 4651 while self._match_set(self.IDENTIFIER_TOKENS): 4652 token = self._prev 4653 comments = self._prev_comments 4654 4655 has_dot = self._match(TokenType.DOT) 4656 curr_tt = self._curr.token_type 4657 4658 if not has_dot: 4659 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4660 self._retreat(index) 4661 return None 4662 elif curr_tt not in self.IDENTIFIER_TOKENS: 4663 self._retreat(index) 4664 return None 4665 4666 if parts is None: 4667 parts = [] 4668 4669 if comments: 4670 if all_comments is None: 4671 all_comments = [] 4672 all_comments.extend(comments) 4673 self._prev_comments = [] 4674 4675 parts.append( 4676 self.expression( 4677 exp.Identifier( 4678 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4679 ), 4680 token, 4681 ) 4682 ) 4683 4684 if not has_dot: 4685 break 4686 4687 if parts is None: 4688 return None 4689 4690 n = len(parts) 4691 4692 if n == 1: 4693 table: exp.Table = exp.Table(this=parts[0]) 4694 elif n == 2: 4695 table = exp.Table(this=parts[1], db=parts[0]) 4696 elif n >= 3: 4697 this: exp.Identifier | exp.Dot = parts[2] 4698 for i in range(3, n): 4699 this = exp.Dot(this=this, expression=parts[i]) 4700 4701 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4702 4703 if table is None: 4704 self._retreat(index) 4705 elif all_comments: 4706 table.add_comments(all_comments) 4707 return table 4708 4709 def _parse_table_parts( 4710 self, 4711 schema: bool = False, 4712 is_db_reference: bool = False, 4713 wildcard: bool = False, 4714 fast: bool = False, 4715 ) -> exp.Table | exp.Dot | None: 4716 if fast: 4717 return self._parse_table_parts_fast() 4718 4719 catalog: exp.Expr | str | None = None 4720 db: exp.Expr | str | None = None 4721 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4722 4723 while self._match(TokenType.DOT): 4724 if catalog: 4725 # This allows nesting the table in arbitrarily many dot expressions if needed 4726 table = self.expression( 4727 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4728 ) 4729 else: 4730 catalog = db 4731 db = table 4732 # "" used for tsql FROM a..b case 4733 table = self._parse_table_part(schema=schema) or "" 4734 4735 if ( 4736 wildcard 4737 and self._is_connected() 4738 and (isinstance(table, exp.Identifier) or not table) 4739 and self._match(TokenType.STAR) 4740 ): 4741 if isinstance(table, exp.Identifier): 4742 table.args["this"] += "*" 4743 else: 4744 table = exp.Identifier(this="*") 4745 4746 if is_db_reference: 4747 catalog = db 4748 db = table 4749 table = None 4750 4751 if not table and not is_db_reference: 4752 self.raise_error(f"Expected table name but got {self._curr}") 4753 if not db and is_db_reference: 4754 self.raise_error(f"Expected database name but got {self._curr}") 4755 4756 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4757 4758 # Bubble up comments from identifier parts to the Table 4759 comments = [] 4760 for part in table.parts: 4761 if part_comments := part.pop_comments(): 4762 comments.extend(part_comments) 4763 if comments: 4764 table.add_comments(comments) 4765 4766 changes = self._parse_changes() 4767 if changes: 4768 table.set("changes", changes) 4769 4770 at_before = self._parse_historical_data() 4771 if at_before: 4772 table.set("when", at_before) 4773 4774 pivots = self._parse_pivots() 4775 if pivots: 4776 table.set("pivots", pivots) 4777 4778 return table 4779 4780 def _parse_table( 4781 self, 4782 schema: bool = False, 4783 joins: bool = False, 4784 alias_tokens: t.Collection[TokenType] | None = None, 4785 parse_bracket: bool = False, 4786 is_db_reference: bool = False, 4787 parse_partition: bool = False, 4788 consume_pipe: bool = False, 4789 ) -> exp.Expr | None: 4790 if not schema and not is_db_reference and not consume_pipe and not joins: 4791 index = self._index 4792 table = self._parse_table_parts(fast=True) 4793 4794 if table is not None: 4795 curr_tt = self._curr.token_type 4796 next_tt = self._next.token_type 4797 4798 fast_terminators = self.TABLE_TERMINATORS 4799 4800 # only return the table if we're sure there are no other operators 4801 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4802 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4803 return table 4804 4805 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4806 4807 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4808 if alias := self._parse_table_alias( 4809 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4810 ): 4811 table.set("alias", alias) 4812 4813 if self._curr.token_type in fast_terminators: 4814 return table 4815 4816 self._retreat(index) 4817 4818 if stream := self._parse_stream(): 4819 return stream 4820 4821 if lateral := self._parse_lateral(): 4822 return lateral 4823 4824 if unnest := self._parse_unnest(): 4825 return unnest 4826 4827 if values := self._parse_derived_table_values(): 4828 return values 4829 4830 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4831 if not subquery.args.get("pivots"): 4832 subquery.set("pivots", self._parse_pivots()) 4833 if joins: 4834 for join in self._parse_joins(): 4835 subquery.append("joins", join) 4836 return subquery 4837 4838 bracket = parse_bracket and self._parse_bracket(None) 4839 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4840 4841 rows_from_tables = ( 4842 self._parse_wrapped_csv(self._parse_table) 4843 if self._match_text_seq("ROWS", "FROM") 4844 else None 4845 ) 4846 rows_from = ( 4847 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4848 ) 4849 4850 only = self._match(TokenType.ONLY) 4851 4852 this = t.cast( 4853 exp.Expr, 4854 bracket 4855 or rows_from 4856 or self._parse_bracket( 4857 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4858 ), 4859 ) 4860 4861 if only: 4862 this.set("only", only) 4863 4864 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4865 self._match(TokenType.STAR) 4866 4867 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4868 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4869 this.set("partition", self._parse_partition()) 4870 4871 if schema: 4872 return self._parse_schema(this=this) 4873 4874 if self.dialect.ALIAS_POST_VERSION: 4875 this.set("version", self._parse_version()) 4876 4877 if self.dialect.ALIAS_POST_TABLESAMPLE: 4878 this.set("sample", self._parse_table_sample()) 4879 4880 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4881 if alias: 4882 this.set("alias", alias) 4883 4884 if self._match(TokenType.INDEXED_BY): 4885 this.set("indexed", self._parse_table_parts()) 4886 elif self._match_text_seq("NOT", "INDEXED"): 4887 this.set("indexed", False) 4888 4889 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4890 return self.expression( 4891 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4892 ) 4893 4894 this.set("hints", self._parse_table_hints()) 4895 4896 if not this.args.get("pivots"): 4897 this.set("pivots", self._parse_pivots()) 4898 4899 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4900 this.set("sample", self._parse_table_sample()) 4901 4902 if not self.dialect.ALIAS_POST_VERSION: 4903 this.set("version", self._parse_version()) 4904 4905 if joins: 4906 for join in self._parse_joins(alias_tokens=alias_tokens): 4907 this.append("joins", join) 4908 4909 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4910 this.set("ordinality", True) 4911 this.set("alias", self._parse_table_alias()) 4912 4913 return this 4914 4915 def _parse_version(self) -> exp.Version | None: 4916 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4917 this = "TIMESTAMP" 4918 elif self._match(TokenType.VERSION_SNAPSHOT): 4919 this = "VERSION" 4920 else: 4921 return None 4922 4923 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4924 kind = self._prev.text.upper() 4925 start = self._parse_bitwise() 4926 self._match_texts(("TO", "AND")) 4927 end = self._parse_bitwise() 4928 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4929 elif self._match_text_seq("CONTAINED", "IN"): 4930 kind = "CONTAINED IN" 4931 expression = self.expression( 4932 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4933 ) 4934 elif self._match(TokenType.ALL): 4935 kind = "ALL" 4936 expression = None 4937 else: 4938 self._match_text_seq("AS", "OF") 4939 kind = "AS OF" 4940 expression = self._parse_type() 4941 4942 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4943 4944 def _parse_historical_data(self) -> exp.HistoricalData | None: 4945 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4946 index = self._index 4947 historical_data = None 4948 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4949 this = self._prev.text.upper() 4950 kind = ( 4951 self._match(TokenType.L_PAREN) 4952 and self._match_texts(self.HISTORICAL_DATA_KIND) 4953 and self._prev.text.upper() 4954 ) 4955 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4956 4957 if expression: 4958 self._match_r_paren() 4959 historical_data = self.expression( 4960 exp.HistoricalData(this=this, kind=kind, expression=expression) 4961 ) 4962 else: 4963 self._retreat(index) 4964 4965 return historical_data 4966 4967 def _parse_changes(self) -> exp.Changes | None: 4968 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4969 return None 4970 4971 information = self._parse_var(any_token=True) 4972 self._match_r_paren() 4973 4974 return self.expression( 4975 exp.Changes( 4976 information=information, 4977 at_before=self._parse_historical_data(), 4978 end=self._parse_historical_data(), 4979 ) 4980 ) 4981 4982 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4983 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4984 return None 4985 4986 self._advance() 4987 4988 expressions = self._parse_wrapped_csv(self._parse_equality) 4989 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4990 4991 alias = self._parse_table_alias() if with_alias else None 4992 4993 if alias: 4994 if self.dialect.UNNEST_COLUMN_ONLY: 4995 if alias.args.get("columns"): 4996 self.raise_error("Unexpected extra column alias in unnest.") 4997 4998 alias.set("columns", [alias.this]) 4999 alias.set("this", None) 5000 5001 columns = alias.args.get("columns") or [] 5002 if offset and len(expressions) < len(columns): 5003 offset = columns.pop() 5004 5005 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 5006 self._match(TokenType.ALIAS) 5007 offset = self._parse_id_var( 5008 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 5009 ) or exp.to_identifier("offset") 5010 5011 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 5012 5013 def _parse_derived_table_values(self) -> exp.Values | None: 5014 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 5015 if not is_derived and not ( 5016 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 5017 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 5018 ): 5019 return None 5020 5021 expressions = self._parse_csv(self._parse_value) 5022 alias = self._parse_table_alias() 5023 5024 if is_derived: 5025 self._match_r_paren() 5026 5027 return self.expression( 5028 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 5029 ) 5030 5031 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 5032 if not self._match(TokenType.TABLE_SAMPLE) and not ( 5033 as_modifier and self._match_text_seq("USING", "SAMPLE") 5034 ): 5035 return None 5036 5037 bucket_numerator = None 5038 bucket_denominator = None 5039 bucket_field = None 5040 percent = None 5041 size = None 5042 seed = None 5043 5044 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 5045 matched_l_paren = self._match(TokenType.L_PAREN) 5046 5047 if self.TABLESAMPLE_CSV: 5048 num = None 5049 expressions = self._parse_csv(self._parse_primary) 5050 else: 5051 expressions = None 5052 num = ( 5053 self._parse_factor() 5054 if self._match(TokenType.NUMBER, advance=False) 5055 else self._parse_primary() or self._parse_placeholder() 5056 ) 5057 5058 if self._match_text_seq("BUCKET"): 5059 bucket_numerator = self._parse_number() 5060 self._match_text_seq("OUT", "OF") 5061 bucket_denominator = bucket_denominator = self._parse_number() 5062 self._match(TokenType.ON) 5063 bucket_field = self._parse_field() 5064 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 5065 percent = num 5066 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 5067 size = num 5068 else: 5069 percent = num 5070 5071 if matched_l_paren: 5072 self._match_r_paren() 5073 5074 if self._match(TokenType.L_PAREN): 5075 method = self._parse_var(upper=True) 5076 seed = self._match(TokenType.COMMA) and self._parse_number() 5077 self._match_r_paren() 5078 elif self._match_texts(("SEED", "REPEATABLE")): 5079 seed = self._parse_wrapped(self._parse_number) 5080 5081 if not method and self.DEFAULT_SAMPLING_METHOD: 5082 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 5083 5084 return self.expression( 5085 exp.TableSample( 5086 expressions=expressions, 5087 method=method, 5088 bucket_numerator=bucket_numerator, 5089 bucket_denominator=bucket_denominator, 5090 bucket_field=bucket_field, 5091 percent=percent, 5092 size=size, 5093 seed=seed, 5094 ) 5095 ) 5096 5097 def _parse_pivots(self) -> list[exp.Pivot] | None: 5098 if self._curr.token_type not in (TokenType.PIVOT, TokenType.UNPIVOT): 5099 return None 5100 return list(iter(self._parse_pivot, None)) or None 5101 5102 def _parse_joins( 5103 self, alias_tokens: t.Collection[TokenType] | None = None 5104 ) -> t.Iterator[exp.Join]: 5105 return iter(lambda: self._parse_join(alias_tokens=alias_tokens), None) 5106 5107 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5108 if not self._match(TokenType.INTO): 5109 return None 5110 5111 return self.expression( 5112 exp.UnpivotColumns( 5113 this=self._match_text_seq("NAME") and self._parse_column(), 5114 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5115 ) 5116 ) 5117 5118 # https://duckdb.org/docs/sql/statements/pivot 5119 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5120 def _parse_on() -> exp.Expr | None: 5121 this = self._parse_bitwise() 5122 5123 if self._match(TokenType.IN): 5124 # PIVOT ... ON col IN (row_val1, row_val2) 5125 return self._parse_in(this) 5126 if self._match(TokenType.ALIAS, advance=False): 5127 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5128 return self._parse_alias(this) 5129 5130 return this 5131 5132 this = self._parse_table() 5133 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5134 into = self._parse_unpivot_columns() 5135 using = self._match(TokenType.USING) and self._parse_csv( 5136 lambda: self._parse_alias(self._parse_column()) 5137 ) 5138 group = self._parse_group() 5139 5140 return self.expression( 5141 exp.Pivot( 5142 this=this, 5143 expressions=expressions, 5144 using=using, 5145 group=group, 5146 unpivot=is_unpivot, 5147 into=into, 5148 ) 5149 ) 5150 5151 def _parse_pivot_in(self) -> exp.In: 5152 def _parse_aliased_expression() -> exp.Expr | None: 5153 this = self._parse_select_or_expression() 5154 5155 self._match(TokenType.ALIAS) 5156 alias = self._parse_bitwise() 5157 if alias: 5158 if isinstance(alias, exp.Column) and not alias.db: 5159 alias = alias.this 5160 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5161 5162 return this 5163 5164 value = self._parse_column() 5165 5166 if not self._match(TokenType.IN): 5167 self.raise_error("Expecting IN") 5168 5169 if self._match(TokenType.L_PAREN): 5170 if self._match(TokenType.ANY): 5171 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5172 else: 5173 exprs = self._parse_csv(_parse_aliased_expression) 5174 self._match_r_paren() 5175 return self.expression(exp.In(this=value, expressions=exprs)) 5176 5177 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5178 5179 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5180 func = self._parse_function() 5181 if not func: 5182 if self._prev.token_type == TokenType.COMMA: 5183 return None 5184 self.raise_error("Expecting an aggregation function in PIVOT") 5185 5186 return self._parse_alias(func) 5187 5188 def _parse_pivot(self) -> exp.Pivot | None: 5189 index = self._index 5190 include_nulls = None 5191 5192 if self._match(TokenType.PIVOT): 5193 unpivot = False 5194 elif self._match(TokenType.UNPIVOT): 5195 unpivot = True 5196 5197 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5198 if self._match_text_seq("INCLUDE", "NULLS"): 5199 include_nulls = True 5200 elif self._match_text_seq("EXCLUDE", "NULLS"): 5201 include_nulls = False 5202 else: 5203 return None 5204 5205 expressions = [] 5206 5207 if not self._match(TokenType.L_PAREN): 5208 self._retreat(index) 5209 return None 5210 5211 if unpivot: 5212 expressions = self._parse_csv(self._parse_column) 5213 else: 5214 expressions = self._parse_csv(self._parse_pivot_aggregation) 5215 5216 if not expressions: 5217 self.raise_error("Failed to parse PIVOT's aggregation list") 5218 5219 if not self._match(TokenType.FOR): 5220 self.raise_error("Expecting FOR") 5221 5222 fields = [] 5223 while True: 5224 field = self._try_parse(self._parse_pivot_in) 5225 if not field: 5226 break 5227 fields.append(field) 5228 5229 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5230 self._parse_bitwise 5231 ) 5232 5233 group = self._parse_group() 5234 5235 self._match_r_paren() 5236 5237 pivot = self.expression( 5238 exp.Pivot( 5239 expressions=expressions, 5240 fields=fields, 5241 unpivot=unpivot, 5242 include_nulls=include_nulls, 5243 default_on_null=default_on_null, 5244 group=group, 5245 ) 5246 ) 5247 5248 if unpivot: 5249 pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions]) 5250 for pivot_field in pivot.fields: 5251 if isinstance(pivot_field, exp.In): 5252 pivot_field.set("this", _unpivot_target(pivot_field.this)) 5253 5254 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5255 pivot.set("alias", self._parse_table_alias()) 5256 5257 if not unpivot: 5258 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5259 5260 columns: list[exp.Expr] = [] 5261 all_fields = [] 5262 for pivot_field in pivot.fields: 5263 pivot_field_expressions = pivot_field.expressions 5264 5265 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5266 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5267 continue 5268 5269 all_fields.append( 5270 [ 5271 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5272 for fld in pivot_field_expressions 5273 ] 5274 ) 5275 5276 if all_fields: 5277 if names: 5278 all_fields.append(names) 5279 5280 # Generate all possible combinations of the pivot columns 5281 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5282 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5283 for fld_parts_tuple in itertools.product(*all_fields): 5284 fld_parts = list(fld_parts_tuple) 5285 5286 if names and self.PREFIXED_PIVOT_COLUMNS: 5287 # Move the "name" to the front of the list 5288 fld_parts.insert(0, fld_parts.pop(-1)) 5289 5290 columns.append(exp.to_identifier("_".join(fld_parts))) 5291 5292 pivot.set("columns", columns) 5293 pivot.set("identify_pivot_strings", self.IDENTIFY_PIVOT_STRINGS) 5294 pivot.set("prefixed_pivot_columns", self.PREFIXED_PIVOT_COLUMNS) 5295 pivot.set("pivot_column_naming", self.PIVOT_COLUMN_NAMING) 5296 5297 return pivot 5298 5299 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5300 return [agg.alias for agg in aggregations if agg.alias] 5301 5302 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5303 if not skip_where_token and not self._match(TokenType.PREWHERE): 5304 return None 5305 5306 comments = self._prev_comments 5307 return self.expression( 5308 exp.PreWhere(this=self._parse_disjunction()), 5309 comments=comments, 5310 ) 5311 5312 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5313 if not skip_where_token and not self._match(TokenType.WHERE): 5314 return None 5315 5316 comments = self._prev_comments 5317 return self.expression( 5318 exp.Where(this=self._parse_disjunction()), 5319 comments=comments, 5320 ) 5321 5322 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5323 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5324 return None 5325 comments = self._prev_comments 5326 5327 elements: dict[str, t.Any] = defaultdict(list) 5328 5329 if self._match(TokenType.ALL): 5330 elements["all"] = True 5331 elif self._match(TokenType.DISTINCT): 5332 elements["all"] = False 5333 5334 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5335 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5336 5337 while True: 5338 index = self._index 5339 5340 elements["expressions"].extend( 5341 self._parse_csv( 5342 lambda: ( 5343 None 5344 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5345 else self._parse_disjunction() 5346 ) 5347 ) 5348 ) 5349 5350 before_with_index = self._index 5351 with_prefix = self._match(TokenType.WITH) 5352 5353 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5354 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5355 elements[key].append(cube_or_rollup) 5356 elif grouping_sets := self._parse_grouping_sets(): 5357 elements["grouping_sets"].append(grouping_sets) 5358 elif self._match_text_seq("TOTALS"): 5359 elements["totals"] = True # type: ignore 5360 5361 if before_with_index <= self._index <= before_with_index + 1: 5362 self._retreat(before_with_index) 5363 break 5364 5365 if index == self._index: 5366 break 5367 5368 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5369 5370 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5371 if self._match(TokenType.CUBE): 5372 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5373 elif self._match(TokenType.ROLLUP): 5374 kind = exp.Rollup 5375 else: 5376 return None 5377 5378 return self.expression( 5379 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5380 ) 5381 5382 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5383 if self._match(TokenType.GROUPING_SETS): 5384 return self.expression( 5385 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5386 ) 5387 return None 5388 5389 def _parse_grouping_set(self) -> exp.Expr | None: 5390 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5391 5392 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5393 if not skip_having_token and not self._match(TokenType.HAVING): 5394 return None 5395 comments = self._prev_comments 5396 return self.expression( 5397 exp.Having(this=self._parse_disjunction()), 5398 comments=comments, 5399 ) 5400 5401 def _parse_qualify(self) -> exp.Qualify | None: 5402 if not self._match(TokenType.QUALIFY): 5403 return None 5404 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5405 5406 def _parse_connect_with_prior(self) -> exp.Expr | None: 5407 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5408 exp.Prior(this=self._parse_bitwise()) 5409 ) 5410 connect = self._parse_disjunction() 5411 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5412 return connect 5413 5414 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5415 if skip_start_token: 5416 start = None 5417 elif self._match(TokenType.START_WITH): 5418 start = self._parse_disjunction() 5419 else: 5420 return None 5421 5422 self._match(TokenType.CONNECT_BY) 5423 nocycle = self._match_text_seq("NOCYCLE") 5424 connect = self._parse_connect_with_prior() 5425 5426 if not start and self._match(TokenType.START_WITH): 5427 start = self._parse_disjunction() 5428 5429 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5430 5431 def _parse_name_as_expression(self) -> exp.Expr | None: 5432 this = self._parse_id_var(any_token=True) 5433 if self._match(TokenType.ALIAS): 5434 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5435 return this 5436 5437 def _parse_interpolate(self) -> list[exp.Expr] | None: 5438 if self._match_text_seq("INTERPOLATE"): 5439 return self._parse_wrapped_csv(self._parse_name_as_expression) 5440 return None 5441 5442 def _parse_order( 5443 self, this: exp.Expr | None = None, skip_order_token: bool = False 5444 ) -> exp.Expr | None: 5445 siblings = None 5446 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5447 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5448 return this 5449 5450 siblings = True 5451 5452 comments = self._prev_comments 5453 return self.expression( 5454 exp.Order( 5455 this=this, 5456 expressions=self._parse_csv(self._parse_ordered), 5457 siblings=siblings, 5458 ), 5459 comments=comments, 5460 ) 5461 5462 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5463 if not self._match(token): 5464 return None 5465 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5466 5467 def _parse_ordered( 5468 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5469 ) -> exp.Ordered | None: 5470 this = parse_method() if parse_method else self._parse_disjunction() 5471 if not this: 5472 return None 5473 5474 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5475 this = exp.var("ALL") 5476 5477 asc = self._match(TokenType.ASC) 5478 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5479 5480 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5481 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5482 5483 nulls_first = is_nulls_first or False 5484 explicitly_null_ordered = is_nulls_first or is_nulls_last 5485 5486 if ( 5487 not explicitly_null_ordered 5488 and ( 5489 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5490 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5491 ) 5492 and self.dialect.NULL_ORDERING != "nulls_are_last" 5493 ): 5494 nulls_first = True 5495 5496 if self._match_text_seq("WITH", "FILL"): 5497 with_fill = self.expression( 5498 exp.WithFill( 5499 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5500 to=self._match_text_seq("TO") and self._parse_bitwise(), 5501 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5502 interpolate=self._parse_interpolate(), 5503 ) 5504 ) 5505 else: 5506 with_fill = None 5507 5508 return self.expression( 5509 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5510 ) 5511 5512 def _parse_limit_options(self) -> exp.LimitOptions | None: 5513 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5514 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5515 self._match_text_seq("ONLY") 5516 with_ties = self._match_text_seq("WITH", "TIES") 5517 5518 if not (percent or rows or with_ties): 5519 return None 5520 5521 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5522 5523 def _parse_limit( 5524 self, 5525 this: exp.Expr | None = None, 5526 top: bool = False, 5527 skip_limit_token: bool = False, 5528 ) -> exp.Expr | None: 5529 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5530 comments = self._prev_comments 5531 if top: 5532 limit_paren = self._match(TokenType.L_PAREN) 5533 expression = ( 5534 self._parse_term() or self._parse_select() 5535 if limit_paren 5536 else self._parse_number() 5537 ) 5538 5539 if limit_paren: 5540 self._match_r_paren() 5541 5542 else: 5543 if self.dialect.SUPPORTS_LIMIT_ALL and self._match(TokenType.ALL): 5544 return this 5545 5546 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5547 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5548 # consume the factor plus parse the percentage separately 5549 index = self._index 5550 expression = self._try_parse(self._parse_term) 5551 if isinstance(expression, exp.Mod): 5552 self._retreat(index) 5553 expression = self._parse_factor() 5554 elif not expression: 5555 expression = self._parse_factor() 5556 limit_options = self._parse_limit_options() 5557 5558 if self._match(TokenType.COMMA): 5559 offset = expression 5560 expression = self._parse_term() 5561 else: 5562 offset = None 5563 5564 limit_exp = self.expression( 5565 exp.Limit( 5566 this=this, 5567 expression=expression, 5568 offset=offset, 5569 limit_options=limit_options, 5570 expressions=self._parse_limit_by(), 5571 ), 5572 comments=comments, 5573 ) 5574 5575 return limit_exp 5576 5577 if self._match(TokenType.FETCH): 5578 direction = ( 5579 self._prev.text.upper() 5580 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5581 else "FIRST" 5582 ) 5583 5584 count = self._parse_field(tokens=self.FETCH_TOKENS) 5585 5586 return self.expression( 5587 exp.Fetch( 5588 direction=direction, count=count, limit_options=self._parse_limit_options() 5589 ) 5590 ) 5591 5592 return this 5593 5594 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5595 if not self._match(TokenType.OFFSET): 5596 return this 5597 5598 count = self._parse_term() 5599 self._match_set((TokenType.ROW, TokenType.ROWS)) 5600 5601 return self.expression( 5602 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5603 ) 5604 5605 def _can_parse_limit_or_offset(self) -> bool: 5606 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5607 return False 5608 5609 index = self._index 5610 result = bool( 5611 self._try_parse(self._parse_limit, retreat=True) 5612 or self._try_parse(self._parse_offset, retreat=True) 5613 ) 5614 self._retreat(index) 5615 5616 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5617 if self._next.token_type == TokenType.MATCH_CONDITION: 5618 result = False 5619 5620 return result 5621 5622 def _can_parse_named_window(self) -> bool: 5623 # `WINDOW` is in ID_VAR_TOKENS so it could be mistakenly consumed as an implicit alias. 5624 # Refuse only when the following tokens look like a named-window clause: `WINDOW <id> AS (`. 5625 if not self._match(TokenType.WINDOW, advance=False): 5626 return False 5627 5628 name = self._tokens[self._index + 1] if self._index + 1 < len(self._tokens) else None 5629 if name is None or name.token_type not in self.ID_VAR_TOKENS: 5630 return False 5631 5632 alias_tok = self._tokens[self._index + 2] if self._index + 2 < len(self._tokens) else None 5633 if alias_tok is None or alias_tok.token_type != TokenType.ALIAS: 5634 return False 5635 5636 body = self._tokens[self._index + 3] if self._index + 3 < len(self._tokens) else None 5637 return body is not None and body.token_type == TokenType.L_PAREN 5638 5639 def _parse_limit_by(self) -> list[exp.Expr] | None: 5640 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5641 5642 def _parse_locks(self) -> list[exp.Lock]: 5643 locks = [] 5644 while True: 5645 update, key = None, None 5646 if self._match_text_seq("FOR", "UPDATE"): 5647 update = True 5648 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5649 "LOCK", "IN", "SHARE", "MODE" 5650 ): 5651 update = False 5652 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5653 update, key = False, True 5654 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5655 update, key = True, True 5656 else: 5657 break 5658 5659 expressions = None 5660 if self._match_text_seq("OF"): 5661 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5662 5663 wait: bool | exp.Expr | None = None 5664 if self._match_text_seq("NOWAIT"): 5665 wait = True 5666 elif self._match_text_seq("WAIT"): 5667 wait = self._parse_primary() 5668 elif self._match_text_seq("SKIP", "LOCKED"): 5669 wait = False 5670 5671 locks.append( 5672 self.expression( 5673 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5674 ) 5675 ) 5676 5677 return locks 5678 5679 def parse_set_operation( 5680 self, this: exp.Expr | None, consume_pipe: bool = False 5681 ) -> exp.Expr | None: 5682 start = self._index 5683 _, side_token, kind_token = self._parse_join_parts() 5684 5685 side = side_token.text if side_token else None 5686 kind = kind_token.text if kind_token else None 5687 5688 if not self._match_set(self.SET_OPERATIONS): 5689 self._retreat(start) 5690 return None 5691 5692 token_type = self._prev.token_type 5693 5694 if token_type == TokenType.UNION: 5695 operation: type[exp.SetOperation] = exp.Union 5696 elif token_type == TokenType.EXCEPT: 5697 operation = exp.Except 5698 else: 5699 operation = exp.Intersect 5700 5701 comments = self._prev.comments 5702 5703 if self._match(TokenType.DISTINCT): 5704 distinct: bool | None = True 5705 elif self._match(TokenType.ALL): 5706 distinct = False 5707 else: 5708 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5709 if distinct is None: 5710 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5711 5712 by_name = ( 5713 self._match_text_seq("BY", "NAME") 5714 or self._match_text_seq("STRICT", "CORRESPONDING") 5715 or None 5716 ) 5717 if self._match_text_seq("CORRESPONDING"): 5718 by_name = True 5719 if not side and not kind: 5720 kind = "INNER" 5721 5722 on_column_list = None 5723 if by_name and self._match_texts(("ON", "BY")): 5724 on_column_list = self._parse_wrapped_csv(self._parse_column) 5725 5726 expression = self._parse_select( 5727 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5728 ) 5729 5730 return self.expression( 5731 operation( 5732 this=this, 5733 distinct=distinct, 5734 by_name=by_name, 5735 expression=expression, 5736 side=side, 5737 kind=kind, 5738 on=on_column_list, 5739 ), 5740 comments=comments, 5741 ) 5742 5743 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5744 while this: 5745 setop = self.parse_set_operation(this) 5746 if not setop: 5747 break 5748 this = setop 5749 5750 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5751 expression = this.expression 5752 5753 if expression: 5754 for arg in self.SET_OP_MODIFIERS: 5755 expr = expression.args.get(arg) 5756 if expr: 5757 this.set(arg, expr.pop()) 5758 5759 return this 5760 5761 def _parse_expression(self) -> exp.Expr | None: 5762 return self._parse_alias(self._parse_assignment()) 5763 5764 def _parse_assignment(self) -> exp.Expr | None: 5765 this = self._parse_disjunction() 5766 if not this and self._next.token_type in self.ASSIGNMENT: 5767 # This allows us to parse <non-identifier token> := <expr> 5768 this = exp.column( 5769 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5770 ) 5771 5772 while self._match_set(self.ASSIGNMENT): 5773 if isinstance(this, exp.Column) and len(this.parts) == 1: 5774 this = this.this 5775 5776 comments = self._prev_comments 5777 this = self.expression( 5778 self.ASSIGNMENT[self._prev.token_type]( 5779 this=this, expression=self._parse_assignment() 5780 ), 5781 comments=comments, 5782 ) 5783 5784 return this 5785 5786 def _parse_disjunction(self) -> exp.Expr | None: 5787 this = self._parse_conjunction() 5788 while self._match_set(self.DISJUNCTION): 5789 comments = self._prev_comments 5790 this = self.expression( 5791 self.DISJUNCTION[self._prev.token_type]( 5792 this=this, expression=self._parse_conjunction() 5793 ), 5794 comments=comments, 5795 ) 5796 return this 5797 5798 def _parse_conjunction(self) -> exp.Expr | None: 5799 this = self._parse_equality() 5800 while self._match_set(self.CONJUNCTION): 5801 comments = self._prev_comments 5802 this = self.expression( 5803 self.CONJUNCTION[self._prev.token_type]( 5804 this=this, expression=self._parse_equality() 5805 ), 5806 comments=comments, 5807 ) 5808 return this 5809 5810 def _parse_equality(self) -> exp.Expr | None: 5811 this = self._parse_comparison() 5812 while self._match_set(self.EQUALITY): 5813 comments = self._prev_comments 5814 this = self.expression( 5815 self.EQUALITY[self._prev.token_type]( 5816 this=this, expression=self._parse_comparison() 5817 ), 5818 comments=comments, 5819 ) 5820 return this 5821 5822 def _parse_comparison(self) -> exp.Expr | None: 5823 this = self._parse_range() 5824 while self._match_set(self.COMPARISON): 5825 comments = self._prev_comments 5826 this = self.expression( 5827 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5828 comments=comments, 5829 ) 5830 return this 5831 5832 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5833 this = this or self._parse_bitwise() 5834 negate = self._match(TokenType.NOT) 5835 5836 if self._match_set(self.RANGE_PARSERS): 5837 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5838 if not expression: 5839 return this 5840 5841 this = expression 5842 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5843 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5844 5845 # Postgres supports ISNULL and NOTNULL for conditions. 5846 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5847 if self._match(TokenType.NOTNULL): 5848 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5849 this = self.expression(exp.Not(this=this)) 5850 5851 if negate: 5852 this = self._negate_range(this) 5853 5854 if self._match(TokenType.IS): 5855 this = self._parse_is(this) 5856 5857 return this 5858 5859 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5860 if not this: 5861 return this 5862 5863 expression = this.this if isinstance(this, exp.Escape) else this 5864 if isinstance(expression, (exp.Like, exp.ILike)): 5865 expression.set("negate", True) 5866 return this 5867 5868 return self.expression(exp.Not(this=this)) 5869 5870 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5871 index = self._index - 1 5872 negate = self._match(TokenType.NOT) 5873 5874 if self._match_text_seq("DISTINCT", "FROM"): 5875 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5876 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5877 5878 if self._match(TokenType.JSON): 5879 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5880 5881 if self._match_text_seq("WITH"): 5882 _with = True 5883 elif self._match_text_seq("WITHOUT"): 5884 _with = False 5885 else: 5886 _with = None 5887 5888 unique = self._match(TokenType.UNIQUE) 5889 self._match_text_seq("KEYS") 5890 expression: exp.Expr | None = self.expression( 5891 exp.JSON(this=kind, with_=_with, unique=unique) 5892 ) 5893 else: 5894 expression = self._parse_null() or self._parse_bitwise() 5895 if not expression: 5896 self._retreat(index) 5897 return None 5898 5899 this = self.expression(exp.Is(this=this, expression=expression)) 5900 this = self.expression(exp.Not(this=this)) if negate else this 5901 return self._parse_column_ops(this) 5902 5903 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5904 unnest = self._parse_unnest(with_alias=False) 5905 if unnest: 5906 this = self.expression(exp.In(this=this, unnest=unnest)) 5907 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5908 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5909 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5910 5911 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5912 this = self.expression( 5913 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5914 ) 5915 else: 5916 this = self.expression(exp.In(this=this, expressions=expressions)) 5917 5918 if matched_l_paren: 5919 self._match_r_paren(this) 5920 elif not self._match(TokenType.R_BRACKET, expression=this): 5921 self.raise_error("Expecting ]") 5922 else: 5923 this = self.expression(exp.In(this=this, field=self._parse_column())) 5924 5925 return this 5926 5927 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5928 symmetric = None 5929 if self._match_text_seq("SYMMETRIC"): 5930 symmetric = True 5931 elif self._match_text_seq("ASYMMETRIC"): 5932 symmetric = False 5933 5934 low = self._parse_bitwise() 5935 self._match(TokenType.AND) 5936 high = self._parse_bitwise() 5937 5938 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5939 5940 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5941 if not self._match(TokenType.ESCAPE): 5942 return this 5943 return self.expression( 5944 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5945 ) 5946 5947 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5948 # handle day-time format interval span with omitted units: 5949 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5950 interval_span_units_omitted = None 5951 if ( 5952 this 5953 and this.is_string 5954 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5955 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5956 ): 5957 index = self._index 5958 5959 # Var "TO" Var 5960 first_unit = self._parse_var(any_token=True, upper=True) 5961 second_unit = None 5962 if first_unit and self._match_text_seq("TO"): 5963 second_unit = self._parse_var(any_token=True, upper=True) 5964 5965 interval_span_units_omitted = not (first_unit and second_unit) 5966 5967 self._retreat(index) 5968 5969 if interval_span_units_omitted: 5970 unit = None 5971 else: 5972 unit = self._parse_function() 5973 if not unit and ( 5974 self._curr.token_type == TokenType.VAR 5975 or self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS 5976 ): 5977 unit = self._parse_var(any_token=True, upper=True) 5978 5979 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5980 # each INTERVAL expression into this canonical form so it's easy to transpile 5981 if this and this.is_number: 5982 this = exp.Literal.string(this.to_py()) 5983 elif this and this.is_string: 5984 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5985 if parts and unit: 5986 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5987 unit = None 5988 self._retreat(self._index - 1) 5989 5990 if len(parts) == 1: 5991 this = exp.Literal.string(parts[0][0]) 5992 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5993 5994 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5995 unit = self.expression( 5996 exp.IntervalSpan( 5997 this=unit, 5998 expression=self._parse_function() 5999 or self._parse_var(any_token=True, upper=True), 6000 ) 6001 ) 6002 6003 return self.expression(exp.Interval(this=this, unit=unit)) 6004 6005 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 6006 index = self._index 6007 6008 if not self._match(TokenType.INTERVAL) and require_interval: 6009 return None 6010 6011 if self._match(TokenType.STRING, advance=False): 6012 this = self._parse_primary() 6013 else: 6014 this = self._parse_term() 6015 6016 if not this or ( 6017 isinstance(this, exp.Column) 6018 and not this.table 6019 and not this.this.quoted 6020 and self._curr 6021 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 6022 ): 6023 self._retreat(index) 6024 return None 6025 6026 interval = self._parse_interval_span(this) 6027 6028 index = self._index 6029 self._match(TokenType.PLUS) 6030 6031 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 6032 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 6033 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 6034 6035 self._retreat(index) 6036 return interval 6037 6038 def _parse_bitwise(self) -> exp.Expr | None: 6039 this = self._parse_term() 6040 6041 while True: 6042 if self._match_set(self.BITWISE): 6043 this = self.expression( 6044 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 6045 ) 6046 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 6047 this = self.expression( 6048 exp.DPipe( 6049 this=this, 6050 expression=self._parse_term(), 6051 safe=not self.dialect.STRICT_STRING_CONCAT, 6052 ) 6053 ) 6054 elif self._match(TokenType.DQMARK): 6055 this = self.expression( 6056 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 6057 ) 6058 elif self._match_pair(TokenType.LT, TokenType.LT): 6059 this = self.expression( 6060 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 6061 ) 6062 elif self._match_pair(TokenType.GT, TokenType.GT): 6063 this = self.expression( 6064 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 6065 ) 6066 else: 6067 break 6068 6069 return this 6070 6071 def _parse_term(self) -> exp.Expr | None: 6072 this = self._parse_factor() 6073 6074 while self._match_set(self.TERM): 6075 klass = self.TERM[self._prev.token_type] 6076 comments = self._prev_comments 6077 expression = self._parse_factor() 6078 6079 this = self.expression(klass(this=this, expression=expression), comments=comments) 6080 6081 if isinstance(this, exp.Collate): 6082 expr = this.expression 6083 6084 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 6085 # fallback to Identifier / Var 6086 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 6087 ident = expr.this 6088 if isinstance(ident, exp.Identifier): 6089 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 6090 6091 return this 6092 6093 def _parse_factor(self) -> exp.Expr | None: 6094 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 6095 this = self._parse_at_time_zone(parse_method()) 6096 6097 while self._match_set(self.FACTOR): 6098 klass = self.FACTOR[self._prev.token_type] 6099 comments = self._prev_comments 6100 expression = parse_method() 6101 6102 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 6103 self._retreat(self._index - 1) 6104 return this 6105 6106 this = self.expression(klass(this=this, expression=expression), comments=comments) 6107 6108 if isinstance(this, exp.Div): 6109 this.set("typed", self.dialect.TYPED_DIVISION) 6110 this.set("safe", self.dialect.SAFE_DIVISION) 6111 6112 return this 6113 6114 def _parse_exponent(self) -> exp.Expr | None: 6115 this = self._parse_unary() 6116 while self._match_set(self.EXPONENT): 6117 comments = self._prev_comments 6118 this = self.expression( 6119 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 6120 comments=comments, 6121 ) 6122 return this 6123 6124 def _parse_unary(self) -> exp.Expr | None: 6125 if self._match_set(self.UNARY_PARSERS): 6126 return self.UNARY_PARSERS[self._prev.token_type](self) 6127 return self._parse_type() 6128 6129 def _parse_type( 6130 self, parse_interval: bool = True, fallback_to_identifier: bool = False 6131 ) -> exp.Expr | None: 6132 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 6133 return atom 6134 6135 if interval := parse_interval and self._parse_interval(): 6136 return self._parse_column_ops(interval) 6137 6138 index = self._index 6139 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6140 6141 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6142 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6143 if isinstance(data_type, exp.Cast): 6144 # This constructor can contain ops directly after it, for instance struct unnesting: 6145 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6146 return self._parse_column_ops(data_type) 6147 6148 if data_type: 6149 index2 = self._index 6150 this = self._parse_primary() 6151 6152 if isinstance(this, exp.Literal): 6153 literal = this.name 6154 this = self._parse_column_ops(this) 6155 6156 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6157 if parser: 6158 return parser(self, this, data_type) 6159 6160 if ( 6161 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6162 and data_type.is_type(exp.DType.TIMESTAMP) 6163 and TIME_ZONE_RE.search(literal) 6164 ): 6165 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6166 6167 return self.expression(exp.Cast(this=this, to=data_type)) 6168 6169 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6170 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6171 # 6172 # If the index difference here is greater than 1, that means the parser itself must have 6173 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6174 # 6175 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6176 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6177 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6178 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6179 # 6180 # In these cases, we don't really want to return the converted type, but instead retreat 6181 # and try to parse a Column or Identifier in the section below. 6182 if data_type.expressions and index2 - index > 1: 6183 self._retreat(index2) 6184 return self._parse_column_ops(data_type) 6185 6186 self._retreat(index) 6187 6188 if fallback_to_identifier: 6189 return self._parse_id_var() 6190 6191 return self._parse_column() 6192 6193 def _parse_type_size(self) -> exp.DataTypeParam | None: 6194 this = self._parse_type() 6195 if not this: 6196 return None 6197 6198 if isinstance(this, exp.Column) and not this.table: 6199 this = exp.var(this.name.upper()) 6200 6201 return self.expression( 6202 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6203 ) 6204 6205 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6206 type_name = identifier.name 6207 6208 while self._match(TokenType.DOT): 6209 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6210 6211 return exp.DataType.from_str(type_name, dialect=self.dialect, udt=True) 6212 6213 def _parse_types( 6214 self, 6215 check_func: bool = False, 6216 schema: bool = False, 6217 allow_identifiers: bool = True, 6218 with_collation: bool = False, 6219 ) -> exp.Expr | None: 6220 index = self._index 6221 this: exp.Expr | None = None 6222 6223 if self._match_set(self.TYPE_TOKENS): 6224 type_token = self._prev.token_type 6225 else: 6226 type_token = None 6227 identifier = allow_identifiers and self._parse_id_var( 6228 any_token=False, tokens=(TokenType.VAR,) 6229 ) 6230 if isinstance(identifier, exp.Identifier): 6231 try: 6232 tokens = self.dialect.tokenize(identifier.name) 6233 except TokenError: 6234 tokens = None 6235 6236 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6237 if len(tokens) > 1: 6238 return exp.DataType.from_str(identifier.name, dialect=self.dialect) 6239 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6240 this = self._parse_user_defined_type(identifier) 6241 else: 6242 self._retreat(self._index - 1) 6243 return None 6244 else: 6245 return None 6246 6247 if type_token == TokenType.PSEUDO_TYPE: 6248 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6249 6250 if type_token == TokenType.OBJECT_IDENTIFIER: 6251 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6252 6253 # https://materialize.com/docs/sql/types/map/ 6254 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6255 key_type = self._parse_types( 6256 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6257 ) 6258 if not self._match(TokenType.FARROW): 6259 self._retreat(index) 6260 return None 6261 6262 value_type = self._parse_types( 6263 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6264 ) 6265 if not self._match(TokenType.R_BRACKET): 6266 self._retreat(index) 6267 return None 6268 6269 return exp.DataType( 6270 this=exp.DType.MAP, 6271 expressions=[key_type, value_type], 6272 nested=True, 6273 ) 6274 6275 nested = type_token in self.NESTED_TYPE_TOKENS 6276 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6277 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6278 expressions = None 6279 maybe_func = False 6280 6281 if self._match(TokenType.L_PAREN): 6282 if is_struct: 6283 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6284 elif nested: 6285 expressions = self._parse_csv( 6286 lambda: self._parse_types( 6287 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6288 ) 6289 ) 6290 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6291 this = expressions[0] 6292 this.set("nullable", True) 6293 self._match_r_paren() 6294 return this 6295 elif type_token in self.ENUM_TYPE_TOKENS: 6296 expressions = self._parse_csv(self._parse_equality) 6297 elif type_token == TokenType.JSON: 6298 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6299 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6300 expressions = self._parse_csv(self._parse_json_type_arg) 6301 elif is_aggregate: 6302 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6303 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6304 ) 6305 if not func_or_ident: 6306 return None 6307 expressions = [func_or_ident] 6308 if self._match(TokenType.COMMA): 6309 expressions.extend( 6310 self._parse_csv( 6311 lambda: self._parse_types( 6312 check_func=check_func, 6313 schema=schema, 6314 allow_identifiers=allow_identifiers, 6315 ) 6316 ) 6317 ) 6318 else: 6319 expressions = self._parse_csv(self._parse_type_size) 6320 6321 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6322 if type_token == TokenType.VECTOR and len(expressions) == 2: 6323 expressions = self._parse_vector_expressions(expressions) 6324 6325 if not self._match(TokenType.R_PAREN): 6326 self._retreat(index) 6327 return None 6328 6329 maybe_func = True 6330 6331 values: list[exp.Expr] | None = None 6332 6333 if nested and self._match(TokenType.LT): 6334 if is_struct: 6335 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6336 else: 6337 expressions = self._parse_csv( 6338 lambda: self._parse_types( 6339 check_func=check_func, 6340 schema=schema, 6341 allow_identifiers=allow_identifiers, 6342 with_collation=True, 6343 ) 6344 ) 6345 6346 if not self._match(TokenType.GT): 6347 self.raise_error("Expecting >") 6348 6349 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6350 values = self._parse_csv(self._parse_disjunction) 6351 if not values and is_struct: 6352 values = None 6353 self._retreat(self._index - 1) 6354 else: 6355 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6356 6357 if type_token in self.TIMESTAMPS: 6358 if self._match_text_seq("WITH", "TIME", "ZONE"): 6359 maybe_func = False 6360 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6361 this = exp.DataType(this=tz_type, expressions=expressions) 6362 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6363 maybe_func = False 6364 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6365 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6366 maybe_func = False 6367 elif type_token == TokenType.INTERVAL: 6368 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6369 unit = self._parse_var(upper=True) 6370 if self._match_text_seq("TO"): 6371 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6372 6373 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6374 else: 6375 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6376 elif type_token == TokenType.VOID: 6377 this = exp.DataType(this=exp.DType.NULL) 6378 6379 if maybe_func and check_func: 6380 index2 = self._index 6381 peek = self._parse_string() 6382 6383 if not peek: 6384 self._retreat(index) 6385 return None 6386 6387 self._retreat(index2) 6388 6389 if not this: 6390 assert type_token is not None 6391 if self._match_text_seq("UNSIGNED"): 6392 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6393 if not unsigned_type_token: 6394 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6395 6396 type_token = unsigned_type_token or type_token 6397 6398 # NULLABLE without parentheses can be a column (Presto/Trino) 6399 if type_token == TokenType.NULLABLE and not expressions: 6400 self._retreat(index) 6401 return None 6402 6403 this = exp.DataType( 6404 this=exp.DType[type_token.name], 6405 expressions=expressions, 6406 nested=nested, 6407 ) 6408 6409 # Empty arrays/structs are allowed 6410 if values is not None: 6411 cls = exp.Struct if is_struct else exp.Array 6412 this = exp.cast(cls(expressions=values), this, copy=False) 6413 6414 elif expressions: 6415 this.set("expressions", expressions) 6416 6417 # https://materialize.com/docs/sql/types/list/#type-name 6418 while self._match(TokenType.LIST): 6419 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6420 6421 index = self._index 6422 6423 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6424 matched_array = self._match(TokenType.ARRAY) 6425 6426 while self._curr: 6427 datatype_token = self._prev.token_type 6428 matched_l_bracket = self._match(TokenType.L_BRACKET) 6429 6430 if (not matched_l_bracket and not matched_array) or ( 6431 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6432 ): 6433 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6434 # not to be confused with the fixed size array parsing 6435 break 6436 6437 matched_array = False 6438 values = self._parse_csv(self._parse_disjunction) or None 6439 if ( 6440 values 6441 and not schema 6442 and ( 6443 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6444 or datatype_token == TokenType.ARRAY 6445 or not self._match(TokenType.R_BRACKET, advance=False) 6446 ) 6447 ): 6448 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6449 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6450 self._retreat(index) 6451 break 6452 6453 this = exp.DataType( 6454 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6455 ) 6456 self._match(TokenType.R_BRACKET) 6457 6458 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6459 converter = self.TYPE_CONVERTERS.get(this.this) 6460 if converter: 6461 this = converter(t.cast(exp.DataType, this)) 6462 6463 if with_collation and isinstance(this, exp.DataType) and self._match(TokenType.COLLATE): 6464 this.set("collate", self._parse_identifier() or self._parse_column()) 6465 6466 return this 6467 6468 def _parse_json_type_arg(self) -> exp.Expr | None: 6469 """Parse a single argument to ClickHouse's JSON type.""" 6470 6471 # SKIP col or SKIP REGEXP 'pattern' 6472 if self._match_text_seq("SKIP"): 6473 regexp = self._match(TokenType.RLIKE) 6474 arg = self._parse_column() 6475 if isinstance(arg, exp.Column): 6476 arg = arg.to_dot() 6477 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6478 6479 param_or_col = self._parse_column() 6480 if not isinstance(param_or_col, exp.Column): 6481 return None 6482 6483 # Parameter: name=value (e.g., max_dynamic_paths=2) 6484 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6485 param = param_or_col.name 6486 value = self._parse_primary() 6487 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6488 6489 # Column type hint: col_name Type 6490 col = param_or_col.to_dot() 6491 kind = self._parse_types(check_func=False, allow_identifiers=False) 6492 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6493 6494 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6495 return [exp.DataType.from_str(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6496 6497 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6498 index = self._index 6499 6500 if ( 6501 self._curr 6502 and self._next 6503 and self._curr.token_type in self.TYPE_TOKENS 6504 and self._next.token_type in self.TYPE_TOKENS 6505 ): 6506 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6507 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6508 this = self._parse_id_var() 6509 else: 6510 this = ( 6511 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6512 or self._parse_id_var() 6513 ) 6514 6515 self._match(TokenType.COLON) 6516 6517 if ( 6518 type_required 6519 and not isinstance(this, exp.DataType) 6520 and not self._match_set(self.TYPE_TOKENS, advance=False) 6521 ): 6522 self._retreat(index) 6523 return self._parse_types() 6524 6525 return self._parse_column_def(this) 6526 6527 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6528 if not self._match_text_seq("AT", "TIME", "ZONE"): 6529 return this 6530 return self._parse_at_time_zone( 6531 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6532 ) 6533 6534 def _parse_atom(self) -> exp.Expr | None: 6535 if ( 6536 self._curr.token_type in self.IDENTIFIER_TOKENS 6537 and (column := self._parse_column()) is not None 6538 ): 6539 return column 6540 6541 token = self._curr 6542 token_type = token.token_type 6543 6544 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6545 return None 6546 6547 next_type = self._next.token_type 6548 6549 if ( 6550 next_type in self.COLUMN_OPERATORS 6551 or next_type in self.COLUMN_POSTFIX_TOKENS 6552 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6553 ): 6554 return None 6555 6556 self._advance() 6557 return primary_parser(self, token) 6558 6559 def _parse_column(self) -> exp.Expr | None: 6560 column: exp.Expr | None = self._parse_column_parts_fast() 6561 if column is None: 6562 this = self._parse_column_reference() 6563 if not this: 6564 this = self._parse_bracket(this) 6565 column = self._parse_column_ops(this) if this else this 6566 6567 if column: 6568 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6569 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6570 if self.COLON_IS_VARIANT_EXTRACT: 6571 column = self._parse_colon_as_variant_extract(column) 6572 6573 return column 6574 6575 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6576 """Fast path for simple column and dot references (a, a.b, ...). 6577 6578 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6579 that nothing complex follows. If it does, retreats and returns None so 6580 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6581 """ 6582 index = self._index 6583 parts: list[exp.Identifier] | None = None 6584 all_comments: list[str] | None = None 6585 6586 while self._match_set(self.IDENTIFIER_TOKENS): 6587 token = self._prev 6588 comments = self._prev_comments 6589 6590 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6591 self._retreat(index) 6592 return None 6593 6594 has_dot = self._match(TokenType.DOT) 6595 curr_tt = self._curr.token_type 6596 6597 if not has_dot: 6598 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6599 self._retreat(index) 6600 return None 6601 elif curr_tt not in self.IDENTIFIER_TOKENS: 6602 self._retreat(index) 6603 return None 6604 6605 if parts is None: 6606 parts = [] 6607 6608 if comments: 6609 if all_comments is None: 6610 all_comments = [] 6611 all_comments.extend(comments) 6612 self._prev_comments = [] 6613 6614 parts.append( 6615 self.expression( 6616 exp.Identifier( 6617 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6618 ), 6619 token, 6620 ) 6621 ) 6622 6623 if not has_dot: 6624 break 6625 6626 if parts is None: 6627 return None 6628 6629 n = len(parts) 6630 6631 if n == 1: 6632 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6633 elif n == 2: 6634 column = exp.Column(this=parts[1], table=parts[0]) 6635 elif n == 3: 6636 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6637 else: 6638 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6639 6640 for i in range(4, n): 6641 column = exp.Dot(this=column, expression=parts[i]) 6642 6643 if all_comments: 6644 column.add_comments(all_comments) 6645 6646 return column 6647 6648 def _parse_column_reference(self) -> exp.Expr | None: 6649 this = self._parse_field() 6650 if ( 6651 not this 6652 and self._match(TokenType.VALUES, advance=False) 6653 and self.VALUES_FOLLOWED_BY_PAREN 6654 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6655 ): 6656 this = self._parse_id_var() 6657 6658 if isinstance(this, exp.Identifier): 6659 # We bubble up comments from the Identifier to the Column 6660 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6661 6662 return this 6663 6664 def _build_json_extract( 6665 self, 6666 this: exp.Expr | None, 6667 path_parts: list[exp.JSONPathPart], 6668 escape: bool | None, 6669 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6670 if len(path_parts) > 1: 6671 this = self.expression( 6672 exp.JSONExtract( 6673 this=this, 6674 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6675 variant_extract=True, 6676 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6677 ) 6678 ) 6679 path_parts = [exp.JSONPathRoot()] 6680 6681 return this, path_parts 6682 6683 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6684 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6685 escape = None 6686 6687 while self._match(TokenType.COLON): 6688 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6689 6690 if key: 6691 if isinstance(key, exp.Identifier) and key.quoted: 6692 escape = True 6693 path_parts.append(exp.JSONPathKey(this=key.name)) 6694 6695 while True: 6696 if self._match(TokenType.DOT): 6697 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6698 6699 if next_key: 6700 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6701 escape = True 6702 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6703 elif self._match(TokenType.L_BRACKET): 6704 bracket_expr = self._parse_bracket_key_value() 6705 6706 if not self._match(TokenType.R_BRACKET): 6707 self.raise_error("Expected ]") 6708 6709 if bracket_expr: 6710 if bracket_expr.is_string: 6711 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6712 escape = True 6713 elif bracket_expr.is_star: 6714 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6715 elif bracket_expr.is_number: 6716 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6717 else: 6718 this, path_parts = self._build_json_extract(this, path_parts, escape) 6719 escape = None 6720 6721 this = self.expression( 6722 exp.Bracket( 6723 this=this, expressions=[bracket_expr], json_access=True 6724 ), 6725 ) 6726 6727 elif self._match(TokenType.DCOLON): 6728 this, path_parts = self._build_json_extract(this, path_parts, escape) 6729 escape = None 6730 6731 cast_type = self._parse_types() 6732 if cast_type: 6733 this = self.expression(exp.Cast(this=this, to=cast_type)) 6734 else: 6735 self.raise_error("Expected type after '::'") 6736 else: 6737 break 6738 6739 this, _ = self._build_json_extract(this, path_parts, escape) 6740 6741 return this 6742 6743 def _parse_dcolon(self) -> exp.Expr | None: 6744 return self._parse_types() 6745 6746 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6747 while self._curr.token_type in self.BRACKETS: 6748 this = self._parse_bracket(this) 6749 6750 column_operators = self.COLUMN_OPERATORS 6751 cast_column_operators = self.CAST_COLUMN_OPERATORS 6752 while self._curr: 6753 op_token = self._curr.token_type 6754 6755 if op_token not in column_operators: 6756 break 6757 op = column_operators[op_token] 6758 self._advance() 6759 6760 if op_token in cast_column_operators: 6761 field = self._parse_dcolon() 6762 if not field: 6763 self.raise_error("Expected type") 6764 elif op and self._curr: 6765 field = self._parse_column_reference() or self._parse_bitwise() 6766 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6767 field = self._parse_column_ops(field) 6768 else: 6769 field = self._parse_field(any_token=True, anonymous_func=True) 6770 6771 # Function calls can be qualified, e.g., x.y.FOO() 6772 # This converts the final AST to a series of Dots leading to the function call 6773 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6774 if isinstance(field, (exp.Func, exp.Window)) and this: 6775 this = this.transform( 6776 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6777 ) 6778 6779 if op: 6780 this = op(self, this, field) 6781 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6782 this = self.expression( 6783 exp.Column( 6784 this=field, 6785 table=this.this, 6786 db=this.args.get("table"), 6787 catalog=this.args.get("db"), 6788 ), 6789 comments=this.comments, 6790 ) 6791 elif isinstance(field, exp.Window): 6792 # Move the exp.Dot's to the window's function 6793 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6794 field.set("this", window_func) 6795 this = field 6796 else: 6797 this = self.expression(exp.Dot(this=this, expression=field)) 6798 6799 if field and field.comments: 6800 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6801 6802 this = self._parse_bracket(this) 6803 6804 return this 6805 6806 def _parse_paren(self) -> exp.Expr | None: 6807 if not self._match(TokenType.L_PAREN): 6808 return None 6809 6810 comments = self._prev_comments 6811 query = self._parse_select() 6812 6813 if query: 6814 expressions = [query] 6815 else: 6816 expressions = self._parse_expressions() 6817 6818 this = seq_get(expressions, 0) 6819 6820 if not this and self._match(TokenType.R_PAREN, advance=False): 6821 this = self.expression(exp.Tuple()) 6822 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6823 this = self._parse_subquery(this=this, parse_alias=False) 6824 elif isinstance(this, (exp.Subquery, exp.Values)): 6825 this = self._parse_subquery( 6826 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6827 parse_alias=False, 6828 ) 6829 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6830 this = self.expression(exp.Tuple(expressions=expressions)) 6831 else: 6832 this = self.expression(exp.Paren(this=this)) 6833 6834 if this: 6835 this.add_comments(comments) 6836 6837 self._match_r_paren(expression=this) 6838 6839 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6840 return self._parse_window(this) 6841 6842 return this 6843 6844 def _parse_primary(self) -> exp.Expr | None: 6845 if self._match_set(self.PRIMARY_PARSERS): 6846 token_type = self._prev.token_type 6847 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6848 6849 if token_type == TokenType.STRING: 6850 expressions = [primary] 6851 while self._match(TokenType.STRING): 6852 expressions.append(exp.Literal.string(self._prev.text)) 6853 6854 if len(expressions) > 1: 6855 return self.expression( 6856 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6857 ) 6858 6859 return primary 6860 6861 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6862 return exp.Literal.number(f"0.{self._prev.text}") 6863 6864 return self._parse_paren() 6865 6866 def _parse_field( 6867 self, 6868 any_token: bool = False, 6869 tokens: t.Collection[TokenType] | None = None, 6870 anonymous_func: bool = False, 6871 ) -> exp.Expr | None: 6872 if anonymous_func: 6873 field = ( 6874 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6875 or self._parse_primary() 6876 ) 6877 else: 6878 field = self._parse_primary() or self._parse_function( 6879 anonymous=anonymous_func, any_token=any_token 6880 ) 6881 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6882 6883 def _parse_function( 6884 self, 6885 functions: dict[str, t.Callable] | None = None, 6886 anonymous: bool = False, 6887 optional_parens: bool = True, 6888 any_token: bool = False, 6889 ) -> exp.Expr | None: 6890 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6891 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6892 fn_syntax = False 6893 if ( 6894 self._match(TokenType.L_BRACE, advance=False) 6895 and self._next 6896 and self._next.text.upper() == "FN" 6897 ): 6898 self._advance(2) 6899 fn_syntax = True 6900 6901 func = self._parse_function_call( 6902 functions=functions, 6903 anonymous=anonymous, 6904 optional_parens=optional_parens, 6905 any_token=any_token, 6906 ) 6907 6908 if fn_syntax: 6909 self._match(TokenType.R_BRACE) 6910 6911 return func 6912 6913 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6914 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6915 6916 def _parse_function_call( 6917 self, 6918 functions: dict[str, t.Callable] | None = None, 6919 anonymous: bool = False, 6920 optional_parens: bool = True, 6921 any_token: bool = False, 6922 ) -> exp.Expr | None: 6923 if not self._curr: 6924 return None 6925 6926 comments = self._curr.comments 6927 prev = self._prev 6928 token = self._curr 6929 token_type = self._curr.token_type 6930 this: str | exp.Expr = self._curr.text 6931 upper = self._curr.text.upper() 6932 6933 after_dot = prev.token_type == TokenType.DOT 6934 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6935 if ( 6936 optional_parens 6937 and parser 6938 and token_type not in self.INVALID_FUNC_NAME_TOKENS 6939 and not after_dot 6940 ): 6941 self._advance() 6942 return self._parse_window(parser(self)) 6943 6944 if self._next.token_type != TokenType.L_PAREN: 6945 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS and not after_dot: 6946 self._advance() 6947 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6948 6949 return None 6950 6951 if any_token: 6952 if token_type in self.RESERVED_TOKENS: 6953 return None 6954 elif token_type not in self.FUNC_TOKENS: 6955 return None 6956 6957 self._advance(2) 6958 6959 parser = self.FUNCTION_PARSERS.get(upper) 6960 if parser and not anonymous: 6961 result = parser(self) 6962 else: 6963 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6964 6965 if subquery_predicate: 6966 expr = None 6967 if self._curr.token_type in self.SUBQUERY_TOKENS: 6968 expr = self._parse_select() 6969 self._match_r_paren() 6970 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6971 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6972 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6973 self._advance(-1) 6974 expr = self._parse_bitwise() 6975 6976 if expr: 6977 return self.expression(subquery_predicate(this=expr), comments=comments) 6978 6979 if functions is None: 6980 functions = self.FUNCTIONS 6981 6982 function = functions.get(upper) 6983 known_function = function and not anonymous 6984 6985 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6986 args = self._parse_function_args(alias) 6987 6988 post_func_comments = self._curr.comments if self._curr else None 6989 if known_function and post_func_comments: 6990 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6991 # call we'll construct it as exp.Anonymous, even if it's "known" 6992 if any( 6993 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6994 for comment in post_func_comments 6995 ): 6996 known_function = False 6997 6998 if alias and known_function: 6999 args = self._kv_to_prop_eq(args) 7000 7001 if known_function: 7002 func_builder = t.cast(t.Callable, function) 7003 7004 # mypyc compiled functions don't have __code__, so we use 7005 # try/except to check if func_builder accepts 'dialect'. 7006 try: 7007 func = func_builder(args) 7008 except TypeError: 7009 func = func_builder(args, dialect=self.dialect) 7010 7011 func = self.validate_expression(func, args) 7012 if self.dialect.PRESERVE_ORIGINAL_NAMES: 7013 func.meta["name"] = this 7014 7015 result = func 7016 else: 7017 if token_type == TokenType.IDENTIFIER: 7018 this = exp.Identifier(this=this, quoted=True).update_positions(token) 7019 7020 result = self.expression(exp.Anonymous(this=this, expressions=args)) 7021 7022 result = result.update_positions(token) 7023 7024 if isinstance(result, exp.Expr): 7025 result.add_comments(comments) 7026 7027 if parser: 7028 self._match(TokenType.R_PAREN, expression=result) 7029 else: 7030 self._match_r_paren(result) 7031 return self._parse_window(result) 7032 7033 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 7034 return expression 7035 7036 def _kv_to_prop_eq( 7037 self, expressions: list[exp.Expr], parse_map: bool = False 7038 ) -> list[exp.Expr]: 7039 transformed = [] 7040 7041 for index, e in enumerate(expressions): 7042 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 7043 if isinstance(e, exp.Alias): 7044 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 7045 7046 if not isinstance(e, exp.PropertyEQ): 7047 e = self.expression( 7048 exp.PropertyEQ( 7049 this=e.this if parse_map else exp.to_identifier(e.this.name), 7050 expression=e.expression, 7051 ) 7052 ) 7053 7054 if isinstance(e.this, exp.Column): 7055 e.this.replace(e.this.this) 7056 else: 7057 e = self._to_prop_eq(e, index) 7058 7059 transformed.append(e) 7060 7061 return transformed 7062 7063 def _parse_function_properties(self) -> exp.Properties | None: 7064 # Skip the generic `key = value` fallback in _parse_property since this 7065 # runs post-AS where a function body like `name = expr` can be misread 7066 # as a property. 7067 properties = [] 7068 while True: 7069 if self._match_texts(self.PROPERTY_PARSERS): 7070 prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self) 7071 elif self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 7072 prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 7073 else: 7074 break 7075 for p in ensure_list(prop): 7076 properties.append(p) 7077 7078 return self.expression(exp.Properties(expressions=properties)) if properties else None 7079 7080 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 7081 return self._parse_statement() 7082 7083 def _parse_function_parameter(self) -> exp.Expr | None: 7084 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 7085 7086 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 7087 this = self._parse_table_parts(schema=True) 7088 7089 if not self._match(TokenType.L_PAREN): 7090 return this 7091 7092 expressions = self._parse_csv(self._parse_function_parameter) 7093 self._match_r_paren() 7094 return self.expression( 7095 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 7096 ) 7097 7098 def _parse_macro_overloads( 7099 self, 7100 this: exp.UserDefinedFunction, 7101 first_body: exp.Expr, 7102 first_is_table: bool = False, 7103 ) -> exp.MacroOverloads: 7104 overloads = [ 7105 self.expression( 7106 exp.MacroOverload( 7107 this=first_body, 7108 expressions=this.expressions or None, 7109 is_table=first_is_table, 7110 ) 7111 ) 7112 ] 7113 this.set("expressions", None) 7114 this.set("wrapped", False) 7115 7116 while self._match(TokenType.COMMA): 7117 if not self._match(TokenType.L_PAREN): 7118 break 7119 7120 params = self._parse_csv(self._parse_function_parameter) 7121 self._match_r_paren() 7122 7123 if not self._match(TokenType.ALIAS): 7124 break 7125 7126 is_table = self._match(TokenType.TABLE) 7127 body = self._parse_expression() 7128 macro = exp.MacroOverload(this=body, expressions=params, is_table=is_table) 7129 overloads.append(self.expression(macro)) 7130 7131 return self.expression(exp.MacroOverloads(expressions=overloads)) 7132 7133 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 7134 literal = self._parse_primary() 7135 if literal: 7136 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 7137 7138 return self._identifier_expression(token) 7139 7140 def _parse_session_parameter(self) -> exp.SessionParameter: 7141 kind = None 7142 this = self._parse_id_var() or self._parse_primary() 7143 7144 if this and self._match(TokenType.DOT): 7145 kind = this.name 7146 this = self._parse_var() or self._parse_primary() 7147 7148 return self.expression(exp.SessionParameter(this=this, kind=kind)) 7149 7150 def _parse_lambda_arg(self) -> exp.Expr | None: 7151 return self._parse_id_var() 7152 7153 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 7154 next_token_type = self._next.token_type 7155 7156 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 7157 if ( 7158 next_token_type in self.LAMBDA_ARG_TERMINATORS 7159 and (atom := self._parse_atom()) is not None 7160 ): 7161 return atom 7162 7163 index = self._index 7164 7165 if self._match(TokenType.L_PAREN): 7166 expressions = t.cast( 7167 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 7168 ) 7169 7170 if not self._match(TokenType.R_PAREN): 7171 self._retreat(index) 7172 elif self._match_set(self.LAMBDAS): 7173 return self.LAMBDAS[self._prev.token_type](self, expressions) 7174 else: 7175 self._retreat(index) 7176 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 7177 expressions = [self._parse_lambda_arg()] 7178 7179 if self._match_set(self.LAMBDAS): 7180 return self.LAMBDAS[self._prev.token_type](self, expressions) 7181 7182 self._retreat(index) 7183 7184 this: exp.Expr | None 7185 7186 if self._match(TokenType.DISTINCT): 7187 this = self.expression( 7188 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 7189 ) 7190 else: 7191 self._match(TokenType.ALL) # ALL is the default/no-op aggregate modifier (SQL-92) 7192 this = self._parse_select_or_expression(alias=alias) 7193 7194 return self._parse_limit( 7195 self._parse_respect_or_ignore_nulls( 7196 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 7197 ) 7198 ) 7199 7200 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7201 index = self._index 7202 if not self._match(TokenType.L_PAREN): 7203 return this 7204 7205 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7206 # expr can be of both types 7207 if self._match_set(self.SELECT_START_TOKENS): 7208 self._retreat(index) 7209 return this 7210 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7211 self._match_r_paren() 7212 return self.expression(exp.Schema(this=this, expressions=args)) 7213 7214 def _parse_field_def(self) -> exp.Expr | None: 7215 return self._parse_column_def(self._parse_field(any_token=True)) 7216 7217 def _parse_column_def( 7218 self, this: exp.Expr | None, computed_column: bool = True 7219 ) -> exp.Expr | None: 7220 # column defs are not really columns, they're identifiers 7221 if isinstance(this, exp.Column): 7222 this = this.this 7223 7224 if not computed_column: 7225 self._match(TokenType.ALIAS) 7226 7227 kind = self._parse_types(schema=True) 7228 7229 if self._match_text_seq("FOR", "ORDINALITY"): 7230 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7231 7232 constraints: list[exp.Expr] = [] 7233 7234 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7235 ("ALIAS", "MATERIALIZED") 7236 ): 7237 persisted = self._prev.text.upper() == "MATERIALIZED" 7238 constraint_kind = exp.ComputedColumnConstraint( 7239 this=self._parse_disjunction(), 7240 persisted=persisted or self._match_text_seq("PERSISTED"), 7241 data_type=exp.Var(this="AUTO") 7242 if self._match_text_seq("AUTO") 7243 else self._parse_types(), 7244 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7245 ) 7246 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7247 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7248 in_out_constraint = self.expression( 7249 exp.InOutColumnConstraint( 7250 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7251 ) 7252 ) 7253 constraints.append(in_out_constraint) 7254 kind = self._parse_types() 7255 elif ( 7256 kind 7257 and self._match(TokenType.ALIAS, advance=False) 7258 and ( 7259 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7260 or self._next.token_type == TokenType.L_PAREN 7261 ) 7262 ): 7263 self._advance() 7264 constraints.append( 7265 self.expression( 7266 exp.ColumnConstraint( 7267 kind=exp.ComputedColumnConstraint( 7268 this=self._parse_disjunction(), 7269 persisted=self._match_texts(("STORED", "VIRTUAL")) 7270 and self._prev.text.upper() == "STORED", 7271 ) 7272 ) 7273 ) 7274 ) 7275 7276 while True: 7277 constraint = self._parse_column_constraint() 7278 if not constraint: 7279 break 7280 constraints.append(constraint) 7281 7282 if not kind and not constraints: 7283 return this 7284 7285 position = None 7286 if self._match_texts(("FIRST", "AFTER")): 7287 pos = self._prev.text 7288 position = self.expression(exp.ColumnPosition(this=self._parse_column(), position=pos)) 7289 7290 return self.expression( 7291 exp.ColumnDef(this=this, kind=kind, constraints=constraints, position=position) 7292 ) 7293 7294 def _parse_auto_increment( 7295 self, 7296 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7297 start = None 7298 increment = None 7299 order = None 7300 7301 if self._match(TokenType.L_PAREN, advance=False): 7302 args = self._parse_wrapped_csv(self._parse_bitwise) 7303 start = seq_get(args, 0) 7304 increment = seq_get(args, 1) 7305 elif self._match_text_seq("START"): 7306 start = self._parse_bitwise() 7307 self._match_text_seq("INCREMENT") 7308 increment = self._parse_bitwise() 7309 if self._match_text_seq("ORDER"): 7310 order = True 7311 elif self._match_text_seq("NOORDER"): 7312 order = False 7313 7314 if start and increment: 7315 return exp.GeneratedAsIdentityColumnConstraint( 7316 start=start, increment=increment, this=False, order=order 7317 ) 7318 7319 return exp.AutoIncrementColumnConstraint() 7320 7321 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7322 if not self._match(TokenType.L_PAREN, advance=False): 7323 return None 7324 7325 return self.expression( 7326 exp.CheckColumnConstraint( 7327 this=self._parse_wrapped(self._parse_assignment), 7328 enforced=self._match_text_seq("ENFORCED"), 7329 ) 7330 ) 7331 7332 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7333 if not self._match_text_seq("REFRESH"): 7334 self._retreat(self._index - 1) 7335 return None 7336 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7337 7338 def _parse_compress(self) -> exp.CompressColumnConstraint: 7339 if self._match(TokenType.L_PAREN, advance=False): 7340 return self.expression( 7341 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7342 ) 7343 7344 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7345 7346 def _parse_generated_as_identity( 7347 self, 7348 ) -> ( 7349 exp.GeneratedAsIdentityColumnConstraint 7350 | exp.ComputedColumnConstraint 7351 | exp.GeneratedAsRowColumnConstraint 7352 ): 7353 if self._match_text_seq("BY", "DEFAULT"): 7354 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7355 this = self.expression( 7356 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7357 ) 7358 else: 7359 self._match_text_seq("ALWAYS") 7360 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7361 7362 self._match(TokenType.ALIAS) 7363 7364 if self._match_text_seq("ROW"): 7365 start = self._match_text_seq("START") 7366 if not start: 7367 self._match(TokenType.END) 7368 hidden = self._match_text_seq("HIDDEN") 7369 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7370 7371 identity = self._match_text_seq("IDENTITY") 7372 7373 if self._match(TokenType.L_PAREN): 7374 if self._match(TokenType.START_WITH): 7375 this.set("start", self._parse_bitwise()) 7376 if self._match_text_seq("INCREMENT", "BY"): 7377 this.set("increment", self._parse_bitwise()) 7378 if self._match_text_seq("MINVALUE"): 7379 this.set("minvalue", self._parse_bitwise()) 7380 if self._match_text_seq("MAXVALUE"): 7381 this.set("maxvalue", self._parse_bitwise()) 7382 7383 if self._match_text_seq("CYCLE"): 7384 this.set("cycle", True) 7385 elif self._match_text_seq("NO", "CYCLE"): 7386 this.set("cycle", False) 7387 7388 if not identity: 7389 this.set("expression", self._parse_range()) 7390 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7391 args = self._parse_csv(self._parse_bitwise) 7392 this.set("start", seq_get(args, 0)) 7393 this.set("increment", seq_get(args, 1)) 7394 7395 self._match_r_paren() 7396 7397 return this 7398 7399 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7400 self._match_text_seq("LENGTH") 7401 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7402 7403 def _parse_not_constraint(self) -> exp.Expr | None: 7404 if self._match_text_seq("NULL"): 7405 return self.expression(exp.NotNullColumnConstraint()) 7406 if self._match_text_seq("CASESPECIFIC"): 7407 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7408 if self._match_text_seq("FOR", "REPLICATION"): 7409 return self.expression(exp.NotForReplicationColumnConstraint()) 7410 7411 # Unconsume the `NOT` token 7412 self._retreat(self._index - 1) 7413 return None 7414 7415 def _parse_column_constraint(self) -> exp.Expr | None: 7416 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7417 7418 procedure_option_follows = ( 7419 self._match(TokenType.WITH, advance=False) 7420 and self._next 7421 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7422 ) 7423 7424 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7425 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7426 if not constraint: 7427 self._retreat(self._index - 1) 7428 return None 7429 7430 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7431 7432 return this 7433 7434 def _parse_constraint(self) -> exp.Expr | None: 7435 if not self._match(TokenType.CONSTRAINT): 7436 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7437 7438 return self.expression( 7439 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7440 ) 7441 7442 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7443 constraints = [] 7444 while True: 7445 constraint = self._parse_unnamed_constraint() or self._parse_function() 7446 if not constraint: 7447 break 7448 constraints.append(constraint) 7449 7450 return constraints 7451 7452 def _parse_unnamed_constraint( 7453 self, constraints: t.Collection[str] | None = None 7454 ) -> exp.Expr | None: 7455 index = self._index 7456 7457 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7458 constraints or self.CONSTRAINT_PARSERS 7459 ): 7460 return None 7461 7462 constraint_key = self._prev.text.upper() 7463 if constraint_key not in self.CONSTRAINT_PARSERS: 7464 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7465 7466 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7467 if not result: 7468 self._retreat(index) 7469 7470 return result 7471 7472 def _parse_unique_key(self) -> exp.Expr | None: 7473 if ( 7474 self._curr 7475 and self._curr.token_type != TokenType.IDENTIFIER 7476 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7477 ): 7478 return None 7479 return self._parse_id_var(any_token=False) 7480 7481 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7482 self._match_texts(("KEY", "INDEX")) 7483 return self.expression( 7484 exp.UniqueColumnConstraint( 7485 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7486 this=self._parse_schema(self._parse_unique_key()), 7487 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7488 on_conflict=self._parse_on_conflict(), 7489 options=self._parse_key_constraint_options(), 7490 ) 7491 ) 7492 7493 def _parse_key_constraint_options(self) -> list[str]: 7494 options = [] 7495 while True: 7496 if not self._curr: 7497 break 7498 7499 if self._match(TokenType.ON): 7500 action = None 7501 on = self._advance_any() and self._prev.text 7502 7503 if self._match_text_seq("NO", "ACTION"): 7504 action = "NO ACTION" 7505 elif self._match_text_seq("CASCADE"): 7506 action = "CASCADE" 7507 elif self._match_text_seq("RESTRICT"): 7508 action = "RESTRICT" 7509 elif self._match_pair(TokenType.SET, TokenType.NULL): 7510 action = "SET NULL" 7511 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7512 action = "SET DEFAULT" 7513 else: 7514 self.raise_error("Invalid key constraint") 7515 7516 options.append(f"ON {on} {action}") 7517 else: 7518 var = self._parse_var_from_options( 7519 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7520 ) 7521 if not var: 7522 break 7523 options.append(var.name) 7524 7525 return options 7526 7527 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7528 if match and not self._match(TokenType.REFERENCES): 7529 return None 7530 7531 expressions: list | None = None 7532 this = self._parse_table(schema=True) 7533 options = self._parse_key_constraint_options() 7534 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7535 7536 def _parse_foreign_key(self) -> exp.ForeignKey: 7537 expressions = ( 7538 self._parse_wrapped_id_vars() 7539 if not self._match(TokenType.REFERENCES, advance=False) 7540 else None 7541 ) 7542 reference = self._parse_references() 7543 on_options = {} 7544 7545 while self._match(TokenType.ON): 7546 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7547 self.raise_error("Expected DELETE or UPDATE") 7548 7549 kind = self._prev.text.lower() 7550 7551 if self._match_text_seq("NO", "ACTION"): 7552 action = "NO ACTION" 7553 elif self._match(TokenType.SET): 7554 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7555 action = "SET " + self._prev.text.upper() 7556 else: 7557 self._advance() 7558 action = self._prev.text.upper() 7559 7560 on_options[kind] = action 7561 7562 return self.expression( 7563 exp.ForeignKey( 7564 expressions=expressions, 7565 reference=reference, 7566 options=self._parse_key_constraint_options(), 7567 **on_options, 7568 ) 7569 ) 7570 7571 def _parse_primary_key_part(self) -> exp.Expr | None: 7572 return self._parse_field() 7573 7574 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7575 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7576 self._retreat(self._index - 1) 7577 return None 7578 7579 id_vars = self._parse_wrapped_id_vars() 7580 return self.expression( 7581 exp.PeriodForSystemTimeConstraint( 7582 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7583 ) 7584 ) 7585 7586 def _parse_primary_key( 7587 self, 7588 wrapped_optional: bool = False, 7589 in_props: bool = False, 7590 named_primary_key: bool = False, 7591 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7592 desc = ( 7593 self._prev.token_type == TokenType.DESC 7594 if self._match_set((TokenType.ASC, TokenType.DESC)) 7595 else None 7596 ) 7597 7598 this = None 7599 if ( 7600 named_primary_key 7601 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7602 and self._next 7603 and self._next.token_type == TokenType.L_PAREN 7604 ): 7605 this = self._parse_id_var() 7606 7607 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7608 return self.expression( 7609 exp.PrimaryKeyColumnConstraint( 7610 desc=desc, options=self._parse_key_constraint_options() 7611 ) 7612 ) 7613 7614 expressions = self._parse_wrapped_csv( 7615 self._parse_primary_key_part, optional=wrapped_optional 7616 ) 7617 7618 return self.expression( 7619 exp.PrimaryKey( 7620 this=this, 7621 expressions=expressions, 7622 include=self._parse_index_params(), 7623 options=self._parse_key_constraint_options(), 7624 ) 7625 ) 7626 7627 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7628 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7629 7630 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7631 """ 7632 Parses a datetime column in ODBC format. We parse the column into the corresponding 7633 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7634 same as we did for `DATE('yyyy-mm-dd')`. 7635 7636 Reference: 7637 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7638 """ 7639 self._match(TokenType.VAR) 7640 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7641 expression = self.expression(exp_class(this=self._parse_string())) 7642 if not self._match(TokenType.R_BRACE): 7643 self.raise_error("Expected }") 7644 return expression 7645 7646 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7647 if not self._match_set(self.BRACKETS): 7648 return this 7649 7650 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7651 map_token = seq_get(self._tokens, self._index - 2) 7652 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7653 else: 7654 parse_map = False 7655 7656 bracket_kind = self._prev.token_type 7657 if ( 7658 bracket_kind == TokenType.L_BRACE 7659 and self._curr 7660 and self._curr.token_type == TokenType.VAR 7661 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7662 ): 7663 return self._parse_odbc_datetime_literal() 7664 7665 expressions = self._parse_csv( 7666 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7667 ) 7668 7669 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7670 self.raise_error("Expected ]") 7671 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7672 self.raise_error("Expected }") 7673 7674 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7675 if bracket_kind == TokenType.L_BRACE: 7676 this = self.expression( 7677 exp.Struct( 7678 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7679 ) 7680 ) 7681 elif not this: 7682 this = build_array_constructor( 7683 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7684 ) 7685 else: 7686 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7687 if constructor_type: 7688 return build_array_constructor( 7689 constructor_type, 7690 args=expressions, 7691 bracket_kind=bracket_kind, 7692 dialect=self.dialect, 7693 ) 7694 7695 expressions = apply_index_offset( 7696 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7697 ) 7698 this = self.expression( 7699 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7700 ) 7701 7702 self._add_comments(this) 7703 return self._parse_bracket(this) 7704 7705 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7706 if not self._match(TokenType.COLON): 7707 return this 7708 7709 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7710 self._advance() 7711 end: exp.Expr | None = -exp.Literal.number("1") 7712 else: 7713 end = self._parse_assignment() 7714 step = self._parse_unary() if self._match(TokenType.COLON) else None 7715 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7716 7717 def _parse_case(self) -> exp.Expr | None: 7718 if self._match(TokenType.DOT, advance=False): 7719 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7720 self._retreat(self._index - 1) 7721 return None 7722 7723 ifs = [] 7724 default = None 7725 7726 comments = self._prev_comments 7727 expression = self._parse_disjunction() 7728 7729 while self._match(TokenType.WHEN): 7730 this = self._parse_disjunction() 7731 self._match(TokenType.THEN) 7732 then = self._parse_disjunction() 7733 ifs.append(self.expression(exp.If(this=this, true=then))) 7734 7735 if self._match(TokenType.ELSE): 7736 default = self._parse_disjunction() 7737 7738 if not self._match(TokenType.END): 7739 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7740 default = exp.column("interval") 7741 else: 7742 self.raise_error("Expected END after CASE", self._prev) 7743 7744 return self.expression( 7745 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7746 ) 7747 7748 def _parse_if(self) -> exp.Expr | None: 7749 if self._match(TokenType.L_PAREN): 7750 args = self._parse_csv( 7751 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7752 ) 7753 this = self.validate_expression(exp.If.from_arg_list(args), args) 7754 self._match_r_paren() 7755 else: 7756 index = self._index - 1 7757 7758 if self.NO_PAREN_IF_COMMANDS and index == 0: 7759 return self._parse_as_command(self._prev) 7760 7761 condition = self._parse_disjunction() 7762 7763 if not condition: 7764 self._retreat(index) 7765 return None 7766 7767 self._match(TokenType.THEN) 7768 true = self._parse_disjunction() 7769 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7770 self._match(TokenType.END) 7771 this = self.expression(exp.If(this=condition, true=true, false=false)) 7772 7773 return this 7774 7775 def _parse_next_value_for(self) -> exp.Expr | None: 7776 if not self._match_text_seq("VALUE", "FOR"): 7777 self._retreat(self._index - 1) 7778 return None 7779 7780 return self.expression( 7781 exp.NextValueFor( 7782 this=self._parse_column(), 7783 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7784 ) 7785 ) 7786 7787 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7788 this = self._parse_function() or self._parse_var_or_string(upper=True) 7789 7790 if self._match(TokenType.FROM): 7791 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7792 7793 if not self._match(TokenType.COMMA): 7794 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7795 7796 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7797 7798 def _parse_gap_fill(self) -> exp.GapFill: 7799 self._match(TokenType.TABLE) 7800 this = self._parse_table() 7801 7802 self._match(TokenType.COMMA) 7803 args = [this, *self._parse_csv(self._parse_lambda)] 7804 7805 gap_fill = exp.GapFill.from_arg_list(args) 7806 return self.validate_expression(gap_fill, args) 7807 7808 def _parse_char(self) -> exp.Chr: 7809 return self.expression( 7810 exp.Chr( 7811 expressions=self._parse_csv(self._parse_assignment), 7812 charset=self._match(TokenType.USING) and self._parse_charset_name(), 7813 ) 7814 ) 7815 7816 def _parse_charset_name(self) -> exp.Expr | None: 7817 """ 7818 Parse a charset name after USING or CHARACTER SET. Dialects that need to preserve quoting 7819 for specific name shapes override this. 7820 """ 7821 return self._parse_var( 7822 tokens={TokenType.BINARY, TokenType.IDENTIFIER}, 7823 ) 7824 7825 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7826 this = self._parse_assignment() 7827 7828 if not self._match(TokenType.ALIAS): 7829 if self._match(TokenType.COMMA): 7830 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7831 7832 self.raise_error("Expected AS after CAST") 7833 7834 fmt = None 7835 to = self._parse_types(with_collation=True) 7836 7837 default = None 7838 if self._match(TokenType.DEFAULT): 7839 default = self._parse_bitwise() 7840 self._match_text_seq("ON", "CONVERSION", "ERROR") 7841 7842 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7843 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7844 fmt = self._parse_at_time_zone(fmt_string) 7845 7846 if not to: 7847 to = exp.DType.UNKNOWN.into_expr() 7848 if to.this in exp.DataType.TEMPORAL_TYPES: 7849 this = self.expression( 7850 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7851 this=this, 7852 format=exp.Literal.string( 7853 format_time( 7854 fmt_string.this if fmt_string else "", 7855 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7856 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7857 ) 7858 ), 7859 safe=safe, 7860 ) 7861 ) 7862 7863 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7864 this.set("zone", fmt.args["zone"]) 7865 return this 7866 elif not to: 7867 self.raise_error("Expected TYPE after CAST") 7868 elif isinstance(to, exp.Identifier): 7869 to = exp.DataType.from_str(to.name, dialect=self.dialect, udt=True) 7870 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7871 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7872 7873 return self.build_cast( 7874 strict=strict, 7875 this=this, 7876 to=to, 7877 format=fmt, 7878 safe=safe, 7879 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7880 default=default, 7881 ) 7882 7883 def _parse_string_agg(self) -> exp.GroupConcat: 7884 if self._match(TokenType.DISTINCT): 7885 args: list[exp.Expr | None] = [ 7886 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7887 ] 7888 if self._match(TokenType.COMMA): 7889 args.extend(self._parse_csv(self._parse_disjunction)) 7890 else: 7891 args = self._parse_csv(self._parse_disjunction) # type: ignore 7892 7893 if self._match_text_seq("ON", "OVERFLOW"): 7894 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7895 if self._match_text_seq("ERROR"): 7896 on_overflow: exp.Expr | None = exp.var("ERROR") 7897 else: 7898 self._match_text_seq("TRUNCATE") 7899 on_overflow = self.expression( 7900 exp.OverflowTruncateBehavior( 7901 this=self._parse_string(), 7902 with_count=( 7903 self._match_text_seq("WITH", "COUNT") 7904 or not self._match_text_seq("WITHOUT", "COUNT") 7905 ), 7906 ) 7907 ) 7908 else: 7909 on_overflow = None 7910 7911 index = self._index 7912 if not self._match(TokenType.R_PAREN) and args: 7913 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7914 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7915 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7916 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7917 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7918 7919 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7920 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7921 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7922 if not self._match_text_seq("WITHIN", "GROUP"): 7923 self._retreat(index) 7924 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7925 7926 # The corresponding match_r_paren will be called in parse_function (caller) 7927 self._match_l_paren() 7928 7929 return self.expression( 7930 exp.GroupConcat( 7931 this=self._parse_order(this=seq_get(args, 0)), 7932 separator=seq_get(args, 1), 7933 on_overflow=on_overflow, 7934 ) 7935 ) 7936 7937 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7938 this = self._parse_bitwise() 7939 7940 if self._match(TokenType.USING): 7941 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_charset_name()) 7942 elif self._match(TokenType.COMMA): 7943 to = self._parse_types() 7944 else: 7945 to = None 7946 7947 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7948 7949 def _parse_xml_element(self) -> exp.XMLElement: 7950 if self._match_text_seq("EVALNAME"): 7951 evalname = True 7952 this = self._parse_bitwise() 7953 else: 7954 evalname = None 7955 self._match_text_seq("NAME") 7956 this = self._parse_id_var() 7957 7958 return self.expression( 7959 exp.XMLElement( 7960 this=this, 7961 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7962 evalname=evalname, 7963 ) 7964 ) 7965 7966 def _parse_xml_table(self) -> exp.XMLTable: 7967 namespaces = None 7968 passing = None 7969 columns = None 7970 7971 if self._match_text_seq("XMLNAMESPACES", "("): 7972 namespaces = self._parse_xml_namespace() 7973 self._match_text_seq(")", ",") 7974 7975 this = self._parse_string() 7976 7977 if self._match_text_seq("PASSING"): 7978 # The BY VALUE keywords are optional and are provided for semantic clarity 7979 self._match_text_seq("BY", "VALUE") 7980 passing = self._parse_csv(self._parse_column) 7981 7982 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7983 7984 if self._match_text_seq("COLUMNS"): 7985 columns = self._parse_csv(self._parse_field_def) 7986 7987 return self.expression( 7988 exp.XMLTable( 7989 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7990 ) 7991 ) 7992 7993 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7994 namespaces = [] 7995 7996 while True: 7997 if self._match(TokenType.DEFAULT): 7998 uri = self._parse_string() 7999 else: 8000 uri = self._parse_alias(self._parse_string()) 8001 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 8002 if not self._match(TokenType.COMMA): 8003 break 8004 8005 return namespaces 8006 8007 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 8008 args = self._parse_csv(self._parse_disjunction) 8009 8010 if len(args) < 3: 8011 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 8012 8013 return self.expression(exp.DecodeCase(expressions=args)) 8014 8015 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 8016 self._match_text_seq("KEY") 8017 key = self._parse_column() 8018 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 8019 self._match_text_seq("VALUE") 8020 value = self._parse_bitwise() 8021 8022 if not key and not value: 8023 return None 8024 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 8025 8026 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 8027 if not this or not self._match_text_seq("FORMAT", "JSON"): 8028 return this 8029 8030 return self.expression(exp.FormatJson(this=this)) 8031 8032 def _parse_on_condition(self) -> exp.OnCondition | None: 8033 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 8034 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 8035 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 8036 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 8037 else: 8038 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 8039 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 8040 8041 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 8042 8043 if not empty and not error and not null: 8044 return None 8045 8046 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 8047 8048 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 8049 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 8050 for value in values: 8051 if self._match_text_seq(value, "ON", on): 8052 return f"{value} ON {on}" 8053 8054 index = self._index 8055 if self._match(TokenType.DEFAULT): 8056 default_value = self._parse_bitwise() 8057 if self._match_text_seq("ON", on): 8058 return default_value 8059 8060 self._retreat(index) 8061 8062 return None 8063 8064 @t.overload 8065 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 8066 8067 @t.overload 8068 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 8069 8070 def _parse_json_object(self, agg=False): 8071 star = self._parse_star() 8072 expressions = ( 8073 [star] 8074 if star 8075 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 8076 ) 8077 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 8078 8079 unique_keys = None 8080 if self._match_text_seq("WITH", "UNIQUE"): 8081 unique_keys = True 8082 elif self._match_text_seq("WITHOUT", "UNIQUE"): 8083 unique_keys = False 8084 8085 self._match_text_seq("KEYS") 8086 8087 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 8088 self._parse_type() 8089 ) 8090 encoding = self._match_text_seq("ENCODING") and self._parse_var() 8091 8092 return self.expression( 8093 (exp.JSONObjectAgg if agg else exp.JSONObject)( 8094 expressions=expressions, 8095 null_handling=null_handling, 8096 unique_keys=unique_keys, 8097 return_type=return_type, 8098 encoding=encoding, 8099 ) 8100 ) 8101 8102 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 8103 def _parse_json_column_def(self) -> exp.JSONColumnDef: 8104 if not self._match_text_seq("NESTED"): 8105 this = self._parse_id_var() 8106 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 8107 kind = self._parse_types(allow_identifiers=False) 8108 nested = None 8109 else: 8110 this = None 8111 ordinality = None 8112 kind = None 8113 nested = True 8114 8115 format_json = self._match_text_seq("FORMAT", "JSON") 8116 path = self._match_text_seq("PATH") and self._parse_string() 8117 nested_schema = nested and self._parse_json_schema() 8118 8119 return self.expression( 8120 exp.JSONColumnDef( 8121 this=this, 8122 kind=kind, 8123 path=path, 8124 nested_schema=nested_schema, 8125 ordinality=ordinality, 8126 format_json=format_json, 8127 ) 8128 ) 8129 8130 def _parse_json_schema(self) -> exp.JSONSchema: 8131 self._match_text_seq("COLUMNS") 8132 return self.expression( 8133 exp.JSONSchema( 8134 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 8135 ) 8136 ) 8137 8138 def _parse_json_table(self) -> exp.JSONTable: 8139 this = self._parse_format_json(self._parse_bitwise()) 8140 path = self._match(TokenType.COMMA) and self._parse_string() 8141 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 8142 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 8143 schema = self._parse_json_schema() 8144 8145 return exp.JSONTable( 8146 this=this, 8147 schema=schema, 8148 path=path, 8149 error_handling=error_handling, 8150 empty_handling=empty_handling, 8151 ) 8152 8153 def _parse_match_against(self) -> exp.MatchAgainst: 8154 if self._match_text_seq("TABLE"): 8155 # parse SingleStore MATCH(TABLE ...) syntax 8156 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 8157 expressions = [] 8158 table = self._parse_table() 8159 if table: 8160 expressions = [table] 8161 else: 8162 expressions = self._parse_csv(self._parse_column) 8163 8164 self._match_text_seq(")", "AGAINST", "(") 8165 8166 this = self._parse_string() 8167 8168 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 8169 modifier = "IN NATURAL LANGUAGE MODE" 8170 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8171 modifier = f"{modifier} WITH QUERY EXPANSION" 8172 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 8173 modifier = "IN BOOLEAN MODE" 8174 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8175 modifier = "WITH QUERY EXPANSION" 8176 else: 8177 modifier = None 8178 8179 return self.expression( 8180 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 8181 ) 8182 8183 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 8184 def _parse_open_json(self) -> exp.OpenJSON: 8185 this = self._parse_bitwise() 8186 path = self._match(TokenType.COMMA) and self._parse_string() 8187 8188 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 8189 this = self._parse_field(any_token=True) 8190 kind = self._parse_types() 8191 path = self._parse_string() 8192 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 8193 8194 return self.expression( 8195 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 8196 ) 8197 8198 expressions = None 8199 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 8200 self._match_l_paren() 8201 expressions = self._parse_csv(_parse_open_json_column_def) 8202 8203 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 8204 8205 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 8206 args = self._parse_csv(self._parse_bitwise) 8207 8208 if self._match(TokenType.IN): 8209 return self.expression( 8210 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 8211 ) 8212 8213 if haystack_first: 8214 haystack = seq_get(args, 0) 8215 needle = seq_get(args, 1) 8216 else: 8217 haystack = seq_get(args, 1) 8218 needle = seq_get(args, 0) 8219 8220 return self.expression( 8221 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8222 ) 8223 8224 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8225 args = self._parse_csv(self._parse_table) 8226 return exp.JoinHint(this=func_name.upper(), expressions=args) 8227 8228 def _parse_substring(self) -> exp.Substring: 8229 # Postgres supports the form: substring(string [from int] [for int]) 8230 # (despite being undocumented, the reverse order also works) 8231 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8232 8233 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8234 8235 start, length = None, None 8236 8237 while self._curr: 8238 if self._match(TokenType.FROM): 8239 start = self._parse_bitwise() 8240 elif self._match(TokenType.FOR): 8241 if not start: 8242 start = exp.Literal.number(1) 8243 length = self._parse_bitwise() 8244 else: 8245 break 8246 8247 if start: 8248 args.append(start) 8249 if length: 8250 args.append(length) 8251 8252 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8253 8254 def _parse_trim(self) -> exp.Trim: 8255 # https://www.w3resource.com/sql/character-functions/trim.php 8256 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8257 8258 position = None 8259 collation = None 8260 expression = None 8261 8262 if self._match_texts(self.TRIM_TYPES): 8263 position = self._prev.text.upper() 8264 8265 this = self._parse_bitwise() 8266 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8267 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8268 expression = self._parse_bitwise() 8269 8270 if invert_order: 8271 this, expression = expression, this 8272 8273 if self._match(TokenType.COLLATE): 8274 collation = self._parse_bitwise() 8275 8276 return self.expression( 8277 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8278 ) 8279 8280 def _parse_window_clause(self) -> list[exp.Expr] | None: 8281 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8282 8283 def _parse_named_window(self) -> exp.Expr | None: 8284 return self._parse_window(self._parse_id_var(), alias=True) 8285 8286 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8287 if self._curr.token_type == TokenType.VAR: 8288 if self._match_text_seq("IGNORE", "NULLS"): 8289 return self.expression(exp.IgnoreNulls(this=this)) 8290 if self._match_text_seq("RESPECT", "NULLS"): 8291 return self.expression(exp.RespectNulls(this=this)) 8292 return this 8293 8294 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8295 if self._match(TokenType.HAVING): 8296 self._match_texts(("MAX", "MIN")) 8297 max = self._prev.text.upper() != "MIN" 8298 return self.expression( 8299 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8300 ) 8301 8302 return this 8303 8304 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8305 func = this 8306 comments = func.comments if isinstance(func, exp.Expr) else None 8307 8308 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8309 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8310 if self._match_text_seq("WITHIN", "GROUP"): 8311 order = self._parse_wrapped(self._parse_order) 8312 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8313 8314 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8315 self._match(TokenType.WHERE) 8316 this = self.expression( 8317 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8318 ) 8319 self._match_r_paren() 8320 8321 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8322 # Some dialects choose to implement and some do not. 8323 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8324 8325 # There is some code above in _parse_lambda that handles 8326 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8327 8328 # The below changes handle 8329 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8330 8331 # Oracle allows both formats 8332 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8333 # and Snowflake chose to do the same for familiarity 8334 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8335 if isinstance(this, exp.AggFunc): 8336 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8337 8338 if ignore_respect and ignore_respect is not this: 8339 ignore_respect.replace(ignore_respect.this) 8340 this = self.expression(ignore_respect.__class__(this=this)) 8341 8342 this = self._parse_respect_or_ignore_nulls(this) 8343 8344 # bigquery select from window x AS (partition by ...) 8345 if alias: 8346 over = None 8347 self._match(TokenType.ALIAS) 8348 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8349 return this 8350 else: 8351 over = self._prev.text.upper() 8352 8353 if comments and isinstance(func, exp.Expr): 8354 func.pop_comments() 8355 8356 if not self._match(TokenType.L_PAREN): 8357 return self.expression( 8358 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8359 ) 8360 8361 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8362 8363 first: bool | None = True if self._match(TokenType.FIRST) else None 8364 if self._match_text_seq("LAST"): 8365 first = False 8366 8367 partition, order = self._parse_partition_and_order() 8368 kind = ( 8369 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8370 ) and self._prev.text 8371 8372 if kind: 8373 self._match(TokenType.BETWEEN) 8374 start = self._parse_window_spec() 8375 8376 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8377 exclude = ( 8378 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8379 if self._match_text_seq("EXCLUDE") 8380 else None 8381 ) 8382 8383 spec = self.expression( 8384 exp.WindowSpec( 8385 kind=kind, 8386 start=start["value"], 8387 start_side=start["side"], 8388 end=end.get("value"), 8389 end_side=end.get("side"), 8390 exclude=exclude, 8391 ) 8392 ) 8393 else: 8394 spec = None 8395 8396 self._match_r_paren() 8397 8398 window = self.expression( 8399 exp.Window( 8400 this=this, 8401 partition_by=partition, 8402 order=order, 8403 spec=spec, 8404 alias=window_alias, 8405 over=over, 8406 first=first, 8407 ), 8408 comments=comments, 8409 ) 8410 8411 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8412 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8413 return self._parse_window(window, alias=alias) 8414 8415 return window 8416 8417 def _parse_partition_and_order( 8418 self, 8419 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8420 return self._parse_partition_by(), self._parse_order() 8421 8422 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8423 self._match(TokenType.BETWEEN) 8424 8425 return { 8426 "value": ( 8427 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8428 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8429 or self._parse_bitwise() 8430 ), 8431 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8432 } 8433 8434 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8435 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8436 # so this section tries to parse the clause version and if it fails, it treats the token 8437 # as an identifier (alias) 8438 if self._can_parse_limit_or_offset(): 8439 return this 8440 8441 # WINDOW is in ID_VAR_TOKENS, so it can be consumed as an implicit alias. Detect the 8442 # named-window clause shape (`WINDOW <ident> AS (...)`) and avoid swallowing it. 8443 if self._can_parse_named_window(): 8444 return this 8445 8446 any_token = self._match(TokenType.ALIAS) 8447 comments = self._prev_comments 8448 8449 if explicit and not any_token: 8450 return this 8451 8452 if self._match(TokenType.L_PAREN): 8453 aliases = self.expression( 8454 exp.Aliases( 8455 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8456 ), 8457 comments=comments, 8458 ) 8459 self._match_r_paren(aliases) 8460 return aliases 8461 8462 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8463 self.STRING_ALIASES and self._parse_string_as_identifier() 8464 ) 8465 8466 if alias: 8467 comments.extend(alias.pop_comments()) 8468 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8469 column = this.this 8470 8471 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8472 if not this.comments and column and column.comments: 8473 this.comments = column.pop_comments() 8474 8475 return this 8476 8477 def _parse_id_var( 8478 self, 8479 any_token: bool = True, 8480 tokens: t.Collection[TokenType] | None = None, 8481 ) -> exp.Expr | None: 8482 expression = self._parse_identifier() 8483 if not expression and ( 8484 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8485 ): 8486 quoted = self._prev.token_type == TokenType.STRING 8487 expression = self._identifier_expression(quoted=quoted) 8488 8489 return expression 8490 8491 def _parse_string(self) -> exp.Expr | None: 8492 if self._match_set(self.STRING_PARSERS): 8493 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8494 return self._parse_placeholder() 8495 8496 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8497 if not self._match(TokenType.STRING): 8498 return None 8499 output = exp.to_identifier(self._prev.text, quoted=True) 8500 output.update_positions(self._prev) 8501 return output 8502 8503 def _parse_number(self) -> exp.Expr | None: 8504 if self._match_set(self.NUMERIC_PARSERS): 8505 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8506 return self._parse_placeholder() 8507 8508 def _parse_identifier(self) -> exp.Expr | None: 8509 if self._match(TokenType.IDENTIFIER): 8510 return self._identifier_expression(quoted=True) 8511 return self._parse_placeholder() 8512 8513 def _parse_var( 8514 self, 8515 any_token: bool = False, 8516 tokens: t.Collection[TokenType] | None = None, 8517 upper: bool = False, 8518 ) -> exp.Expr | None: 8519 if ( 8520 (any_token and self._advance_any()) 8521 or self._match(TokenType.VAR) 8522 or (self._match_set(tokens) if tokens else False) 8523 ): 8524 return self.expression( 8525 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8526 ) 8527 return self._parse_placeholder() 8528 8529 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8530 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8531 self._advance() 8532 return self._prev 8533 return None 8534 8535 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8536 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8537 8538 def _parse_primary_or_var(self) -> exp.Expr | None: 8539 return self._parse_primary() or self._parse_var(any_token=True) 8540 8541 def _parse_null(self) -> exp.Expr | None: 8542 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8543 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8544 return self._parse_placeholder() 8545 8546 def _parse_boolean(self) -> exp.Expr | None: 8547 if self._match(TokenType.TRUE): 8548 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8549 if self._match(TokenType.FALSE): 8550 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8551 return self._parse_placeholder() 8552 8553 def _parse_star(self) -> exp.Expr | None: 8554 if self._match(TokenType.STAR): 8555 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8556 return self._parse_placeholder() 8557 8558 def _parse_parameter(self) -> exp.Parameter: 8559 this = self._parse_identifier() or self._parse_primary_or_var() 8560 return self.expression(exp.Parameter(this=this)) 8561 8562 def _parse_placeholder(self) -> exp.Expr | None: 8563 if self._match_set(self.PLACEHOLDER_PARSERS): 8564 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8565 if placeholder: 8566 return placeholder 8567 self._advance(-1) 8568 return None 8569 8570 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8571 if not self._match_texts(keywords): 8572 return None 8573 if self._match(TokenType.L_PAREN, advance=False): 8574 return self._parse_wrapped_csv(self._parse_expression) 8575 8576 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8577 return [expression] if expression else None 8578 8579 def _parse_csv( 8580 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8581 ) -> list[T]: 8582 parse_result = parse_method() 8583 items = [parse_result] if parse_result is not None else [] 8584 8585 while self._match(sep): 8586 if isinstance(parse_result, exp.Expr): 8587 self._add_comments(parse_result) 8588 parse_result = parse_method() 8589 if parse_result is not None: 8590 items.append(parse_result) 8591 8592 return items 8593 8594 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8595 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8596 8597 def _parse_wrapped_csv( 8598 self, 8599 parse_method: t.Callable[[], T | None], 8600 sep: TokenType = TokenType.COMMA, 8601 optional: bool = False, 8602 ) -> list[T]: 8603 return self._parse_wrapped( 8604 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8605 ) 8606 8607 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8608 wrapped = self._match(TokenType.L_PAREN) 8609 if not wrapped and not optional: 8610 self.raise_error("Expecting (") 8611 parse_result = parse_method() 8612 if wrapped: 8613 self._match_r_paren() 8614 return parse_result 8615 8616 def _parse_expressions(self) -> list[exp.Expr]: 8617 return self._parse_csv(self._parse_expression) 8618 8619 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8620 return ( 8621 self._parse_set_operations( 8622 self._parse_alias(self._parse_assignment(), explicit=True) 8623 if alias 8624 else self._parse_assignment() 8625 ) 8626 or self._parse_select() 8627 ) 8628 8629 def _parse_ddl_select(self) -> exp.Expr | None: 8630 return self._parse_query_modifiers( 8631 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8632 ) 8633 8634 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8635 this = None 8636 if self._match_texts(self.TRANSACTION_KIND): 8637 this = self._prev.text 8638 8639 self._match_texts(("TRANSACTION", "WORK")) 8640 8641 modes = [] 8642 while True: 8643 mode = [] 8644 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8645 mode.append(self._prev.text) 8646 8647 if mode: 8648 modes.append(" ".join(mode)) 8649 if not self._match(TokenType.COMMA): 8650 break 8651 8652 return self.expression(exp.Transaction(this=this, modes=modes)) 8653 8654 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8655 chain = None 8656 savepoint = None 8657 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8658 8659 self._match_texts(("TRANSACTION", "WORK")) 8660 8661 if self._match_text_seq("TO"): 8662 self._match_text_seq("SAVEPOINT") 8663 savepoint = self._parse_id_var() 8664 8665 if self._match(TokenType.AND): 8666 chain = not self._match_text_seq("NO") 8667 self._match_text_seq("CHAIN") 8668 8669 if is_rollback: 8670 return self.expression(exp.Rollback(savepoint=savepoint)) 8671 8672 return self.expression(exp.Commit(chain=chain)) 8673 8674 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8675 if self._match(TokenType.TABLE): 8676 kind = "TABLE" 8677 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8678 kind = "MATERIALIZED VIEW" 8679 else: 8680 kind = "" 8681 8682 this = self._parse_string() or self._parse_table() 8683 if not kind and not isinstance(this, exp.Literal): 8684 return self._parse_as_command(self._prev) 8685 8686 return self.expression(exp.Refresh(this=this, kind=kind)) 8687 8688 def _parse_column_def_with_exists(self): 8689 start = self._index 8690 self._match(TokenType.COLUMN) 8691 8692 exists_column = self._parse_exists(not_=True) 8693 expression = self._parse_field_def() 8694 8695 if not isinstance(expression, exp.ColumnDef): 8696 self._retreat(start) 8697 return None 8698 8699 expression.set("exists", exists_column) 8700 8701 return expression 8702 8703 def _parse_add_column(self) -> exp.ColumnDef | None: 8704 if not self._prev.text.upper() == "ADD": 8705 return None 8706 8707 return self._parse_column_def_with_exists() 8708 8709 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8710 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8711 if drop and not isinstance(drop, exp.Command): 8712 drop.set("kind", drop.args.get("kind", "COLUMN")) 8713 return drop 8714 8715 def _parse_alter_drop_action(self) -> exp.Expr | None: 8716 return self._parse_drop_column() 8717 8718 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8719 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8720 return self.expression( 8721 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8722 ) 8723 8724 def _parse_alter_table_add(self) -> list[exp.Expr]: 8725 def _parse_add_alteration() -> exp.Expr | None: 8726 self._match_text_seq("ADD") 8727 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8728 return self.expression( 8729 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8730 ) 8731 8732 column_def = self._parse_add_column() 8733 if isinstance(column_def, exp.ColumnDef): 8734 return column_def 8735 8736 exists = self._parse_exists(not_=True) 8737 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8738 return self.expression( 8739 exp.AddPartition( 8740 exists=exists, 8741 this=self._parse_field(any_token=True), 8742 location=self._match_text_seq("LOCATION", advance=False) 8743 and self._parse_property(), 8744 ) 8745 ) 8746 8747 return None 8748 8749 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8750 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8751 or self._match_text_seq("COLUMNS") 8752 ): 8753 schema = self._parse_schema() 8754 8755 return ( 8756 ensure_list(schema) 8757 if schema 8758 else self._parse_csv(self._parse_column_def_with_exists) 8759 ) 8760 8761 return self._parse_csv(_parse_add_alteration) 8762 8763 def _parse_alter_table_alter(self) -> exp.Expr | None: 8764 if self._match_texts(self.ALTER_ALTER_PARSERS): 8765 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8766 8767 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8768 # keyword after ALTER we default to parsing this statement 8769 self._match(TokenType.COLUMN) 8770 column = self._parse_field(any_token=True) 8771 8772 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8773 return self.expression(exp.AlterColumn(this=column, drop=True)) 8774 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8775 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8776 if self._match(TokenType.COMMENT): 8777 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8778 if self._match_text_seq("DROP", "NOT", "NULL"): 8779 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8780 if self._match_text_seq("SET", "NOT", "NULL"): 8781 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8782 8783 if self._match_text_seq("SET", "VISIBLE"): 8784 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8785 if self._match_text_seq("SET", "INVISIBLE"): 8786 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8787 8788 self._match_text_seq("SET", "DATA") 8789 self._match_text_seq("TYPE") 8790 return self.expression( 8791 exp.AlterColumn( 8792 this=column, 8793 dtype=self._parse_types(), 8794 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8795 using=self._match(TokenType.USING) and self._parse_disjunction(), 8796 ) 8797 ) 8798 8799 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8800 if self._match_texts(("ALL", "EVEN", "AUTO")): 8801 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8802 8803 self._match_text_seq("KEY", "DISTKEY") 8804 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8805 8806 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8807 if compound: 8808 self._match_text_seq("SORTKEY") 8809 8810 if self._match(TokenType.L_PAREN, advance=False): 8811 return self.expression( 8812 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8813 ) 8814 8815 self._match_texts(("AUTO", "NONE")) 8816 return self.expression( 8817 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8818 ) 8819 8820 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8821 index = self._index - 1 8822 8823 partition_exists = self._parse_exists() 8824 if self._match(TokenType.PARTITION, advance=False): 8825 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8826 8827 self._retreat(index) 8828 return self._parse_csv(self._parse_alter_drop_action) 8829 8830 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8831 if self._match(TokenType.COLUMN) or ( 8832 not self.ALTER_RENAME_REQUIRES_COLUMN and not self._match_text_seq("TO", advance=False) 8833 ): 8834 exists = self._parse_exists() 8835 old_column = self._parse_column() 8836 to = self._match_text_seq("TO") 8837 new_column = self._parse_column() 8838 8839 if old_column is None or not to or new_column is None: 8840 return None 8841 8842 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8843 8844 self._match_text_seq("TO") 8845 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8846 8847 def _parse_alter_table_set(self) -> exp.AlterSet: 8848 alter_set = self.expression(exp.AlterSet()) 8849 8850 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8851 "TABLE", "PROPERTIES" 8852 ): 8853 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8854 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8855 alter_set.set("expressions", [self._parse_assignment()]) 8856 elif self._match_texts(("LOGGED", "UNLOGGED")): 8857 alter_set.set("option", exp.var(self._prev.text.upper())) 8858 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8859 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8860 elif self._match_text_seq("LOCATION"): 8861 alter_set.set("location", self._parse_field()) 8862 elif self._match_text_seq("ACCESS", "METHOD"): 8863 alter_set.set("access_method", self._parse_field()) 8864 elif self._match_text_seq("TABLESPACE"): 8865 alter_set.set("tablespace", self._parse_field()) 8866 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8867 alter_set.set("file_format", [self._parse_field()]) 8868 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8869 alter_set.set("file_format", self._parse_wrapped_options()) 8870 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8871 alter_set.set("copy_options", self._parse_wrapped_options()) 8872 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8873 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8874 else: 8875 if self._match_text_seq("SERDE"): 8876 alter_set.set("serde", self._parse_field()) 8877 8878 properties = self._parse_wrapped(self._parse_properties, optional=True) 8879 alter_set.set("expressions", [properties]) 8880 8881 return alter_set 8882 8883 def _parse_alter_session(self) -> exp.AlterSession: 8884 """Parse ALTER SESSION SET/UNSET statements.""" 8885 if self._match(TokenType.SET): 8886 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8887 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8888 8889 self._match_text_seq("UNSET") 8890 expressions = self._parse_csv( 8891 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8892 ) 8893 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8894 8895 def _parse_alter(self) -> exp.Alter | exp.Command: 8896 start = self._prev 8897 8898 iceberg = self._match_text_seq("ICEBERG") 8899 8900 alter_token = self._match_set(self.ALTERABLES) and self._prev 8901 if not alter_token: 8902 return self._parse_as_command(start) 8903 if iceberg and alter_token.token_type != TokenType.TABLE: 8904 return self._parse_as_command(start) 8905 8906 exists = self._parse_exists() 8907 only = self._match_text_seq("ONLY") 8908 8909 if alter_token.token_type == TokenType.SESSION: 8910 this = None 8911 check = None 8912 cluster = None 8913 else: 8914 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8915 check = self._match_text_seq("WITH", "CHECK") 8916 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8917 8918 if self._next: 8919 self._advance() 8920 8921 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8922 if parser: 8923 actions = ensure_list(parser(self)) 8924 not_valid = self._match_text_seq("NOT", "VALID") 8925 options = self._parse_csv(self._parse_property) 8926 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8927 8928 if not self._curr and actions: 8929 return self.expression( 8930 exp.Alter( 8931 this=this, 8932 kind=alter_token.text.upper(), 8933 exists=exists, 8934 actions=actions, 8935 only=only, 8936 options=options, 8937 cluster=cluster, 8938 not_valid=not_valid, 8939 check=check, 8940 cascade=cascade, 8941 iceberg=iceberg, 8942 ) 8943 ) 8944 8945 return self._parse_as_command(start) 8946 8947 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8948 start = self._prev 8949 # https://duckdb.org/docs/sql/statements/analyze 8950 if not self._curr: 8951 return self.expression(exp.Analyze()) 8952 8953 options = [] 8954 while self._match_texts(self.ANALYZE_STYLES): 8955 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8956 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8957 else: 8958 options.append(self._prev.text.upper()) 8959 8960 this: exp.Expr | None = None 8961 inner_expression: exp.Expr | None = None 8962 8963 kind = self._curr.text.upper() if self._curr else None 8964 8965 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8966 this = self._parse_table_parts() 8967 elif self._match_text_seq("TABLES"): 8968 if self._match_set((TokenType.FROM, TokenType.IN)): 8969 kind = f"{kind} {self._prev.text.upper()}" 8970 this = self._parse_table(schema=True, is_db_reference=True) 8971 elif self._match_text_seq("DATABASE"): 8972 this = self._parse_table(schema=True, is_db_reference=True) 8973 elif self._match_text_seq("CLUSTER"): 8974 this = self._parse_table() 8975 # Try matching inner expr keywords before fallback to parse table. 8976 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8977 kind = None 8978 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8979 else: 8980 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8981 kind = None 8982 this = self._parse_table_parts() 8983 8984 partition = self._try_parse(self._parse_partition) 8985 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8986 return self._parse_as_command(start) 8987 8988 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8989 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8990 "WITH", "ASYNC", "MODE" 8991 ): 8992 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8993 else: 8994 mode = None 8995 8996 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8997 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8998 8999 properties = self._parse_properties() 9000 return self.expression( 9001 exp.Analyze( 9002 kind=kind, 9003 this=this, 9004 mode=mode, 9005 partition=partition, 9006 properties=properties, 9007 expression=inner_expression, 9008 options=options, 9009 ) 9010 ) 9011 9012 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 9013 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 9014 this = None 9015 kind = self._prev.text.upper() 9016 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 9017 expressions = [] 9018 9019 if not self._match_text_seq("STATISTICS"): 9020 self.raise_error("Expecting token STATISTICS") 9021 9022 if self._match_text_seq("NOSCAN"): 9023 this = "NOSCAN" 9024 elif self._match(TokenType.FOR): 9025 if self._match_text_seq("ALL", "COLUMNS"): 9026 this = "FOR ALL COLUMNS" 9027 if self._match_texts("COLUMNS"): 9028 this = "FOR COLUMNS" 9029 expressions = self._parse_csv(self._parse_column_reference) 9030 elif self._match_text_seq("SAMPLE"): 9031 sample = self._parse_number() 9032 expressions = [ 9033 self.expression( 9034 exp.AnalyzeSample( 9035 sample=sample, 9036 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 9037 ) 9038 ) 9039 ] 9040 9041 return self.expression( 9042 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 9043 ) 9044 9045 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 9046 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 9047 kind = None 9048 this = None 9049 expression: exp.Expr | None = None 9050 if self._match_text_seq("REF", "UPDATE"): 9051 kind = "REF" 9052 this = "UPDATE" 9053 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 9054 this = "UPDATE SET DANGLING TO NULL" 9055 elif self._match_text_seq("STRUCTURE"): 9056 kind = "STRUCTURE" 9057 if self._match_text_seq("CASCADE", "FAST"): 9058 this = "CASCADE FAST" 9059 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 9060 ("ONLINE", "OFFLINE") 9061 ): 9062 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 9063 expression = self._parse_into() 9064 9065 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 9066 9067 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 9068 this = self._prev.text.upper() 9069 if self._match_text_seq("COLUMNS"): 9070 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 9071 return None 9072 9073 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 9074 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 9075 if self._match_text_seq("STATISTICS"): 9076 return self.expression(exp.AnalyzeDelete(kind=kind)) 9077 return None 9078 9079 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 9080 if self._match_text_seq("CHAINED", "ROWS"): 9081 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 9082 return None 9083 9084 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 9085 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 9086 this = self._prev.text.upper() 9087 expression: exp.Expr | None = None 9088 expressions = [] 9089 update_options = None 9090 9091 if self._match_text_seq("HISTOGRAM", "ON"): 9092 expressions = self._parse_csv(self._parse_column_reference) 9093 with_expressions = [] 9094 while self._match(TokenType.WITH): 9095 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 9096 if self._match_texts(("SYNC", "ASYNC")): 9097 if self._match_text_seq("MODE", advance=False): 9098 with_expressions.append(f"{self._prev.text.upper()} MODE") 9099 self._advance() 9100 else: 9101 buckets = self._parse_number() 9102 if self._match_text_seq("BUCKETS"): 9103 with_expressions.append(f"{buckets} BUCKETS") 9104 if with_expressions: 9105 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 9106 9107 if self._match_texts(("MANUAL", "AUTO")) and self._match( 9108 TokenType.UPDATE, advance=False 9109 ): 9110 update_options = self._prev.text.upper() 9111 self._advance() 9112 elif self._match_text_seq("USING", "DATA"): 9113 expression = self.expression(exp.UsingData(this=self._parse_string())) 9114 9115 return self.expression( 9116 exp.AnalyzeHistogram( 9117 this=this, 9118 expressions=expressions, 9119 expression=expression, 9120 update_options=update_options, 9121 ) 9122 ) 9123 9124 def _parse_merge(self) -> exp.Merge: 9125 self._match(TokenType.INTO) 9126 target = self._parse_table() 9127 9128 if target and self._match(TokenType.ALIAS, advance=False): 9129 target.set("alias", self._parse_table_alias()) 9130 9131 self._match(TokenType.USING) 9132 using = self._parse_table() 9133 9134 return self.expression( 9135 exp.Merge( 9136 this=target, 9137 using=using, 9138 on=self._match(TokenType.ON) and self._parse_disjunction(), 9139 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 9140 whens=self._parse_when_matched(), 9141 returning=self._parse_returning(), 9142 ) 9143 ) 9144 9145 def _parse_when_matched(self) -> exp.Whens: 9146 whens = [] 9147 9148 while self._match(TokenType.WHEN): 9149 matched = not self._match(TokenType.NOT) 9150 self._match_text_seq("MATCHED") 9151 source = ( 9152 False 9153 if self._match_text_seq("BY", "TARGET") 9154 else self._match_text_seq("BY", "SOURCE") 9155 ) 9156 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 9157 9158 self._match(TokenType.THEN) 9159 9160 if self._match(TokenType.INSERT): 9161 this = self._parse_star() 9162 if this: 9163 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 9164 else: 9165 then = self.expression( 9166 exp.Insert( 9167 this=exp.var("ROW") 9168 if self._match_text_seq("ROW") 9169 else self._parse_value(values=False), 9170 expression=self._match_text_seq("VALUES") and self._parse_value(), 9171 where=self._parse_where(), 9172 ) 9173 ) 9174 elif self._match(TokenType.UPDATE): 9175 expressions = self._parse_star() 9176 if expressions: 9177 then = self.expression(exp.Update(expressions=expressions)) 9178 else: 9179 then = self.expression( 9180 exp.Update( 9181 expressions=self._match(TokenType.SET) 9182 and self._parse_csv(self._parse_equality), 9183 where=self._parse_where(), 9184 ) 9185 ) 9186 elif self._match(TokenType.DELETE): 9187 then = self.expression(exp.Var(this=self._prev.text)) 9188 else: 9189 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 9190 9191 whens.append( 9192 self.expression( 9193 exp.When(matched=matched, source=source, condition=condition, then=then) 9194 ) 9195 ) 9196 return self.expression(exp.Whens(expressions=whens)) 9197 9198 def _parse_show(self) -> exp.Expr | None: 9199 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 9200 if parser: 9201 return parser(self) 9202 return self._parse_as_command(self._prev) 9203 9204 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 9205 index = self._index 9206 9207 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 9208 return self._parse_set_transaction(global_=kind == "GLOBAL") 9209 9210 left = self._parse_primary() or self._parse_column() 9211 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 9212 9213 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 9214 self._retreat(index) 9215 return None 9216 9217 right = self._parse_statement() or self._parse_id_var() 9218 if isinstance(right, (exp.Column, exp.Identifier)): 9219 right = exp.var(right.name) 9220 9221 this = self.expression(exp.EQ(this=left, expression=right)) 9222 return self.expression(exp.SetItem(this=this, kind=kind)) 9223 9224 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9225 self._match_text_seq("TRANSACTION") 9226 characteristics = self._parse_csv( 9227 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9228 ) 9229 return self.expression( 9230 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9231 ) 9232 9233 def _parse_set_item(self) -> exp.Expr | None: 9234 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9235 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9236 9237 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9238 index = self._index 9239 set_ = self.expression( 9240 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9241 ) 9242 9243 if self._curr: 9244 self._retreat(index) 9245 return self._parse_as_command(self._prev) 9246 9247 return set_ 9248 9249 def _parse_var_from_options( 9250 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9251 ) -> exp.Var | None: 9252 start = self._curr 9253 if not start: 9254 return None 9255 9256 option = start.text.upper() 9257 continuations = options.get(option) 9258 9259 index = self._index 9260 self._advance() 9261 for keywords in continuations or []: 9262 if isinstance(keywords, str): 9263 keywords = (keywords,) 9264 9265 if self._match_text_seq(*keywords): 9266 option = f"{option} {' '.join(keywords)}" 9267 break 9268 else: 9269 if continuations or continuations is None: 9270 if raise_unmatched: 9271 self.raise_error(f"Unknown option {option}") 9272 9273 self._retreat(index) 9274 return None 9275 9276 return exp.var(option) 9277 9278 def _parse_as_command(self, start: Token) -> exp.Command: 9279 while self._curr: 9280 self._advance() 9281 text = self._find_sql(start, self._prev) 9282 size = len(start.text) 9283 self._warn_unsupported() 9284 return exp.Command(this=text[:size], expression=text[size:]) 9285 9286 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9287 settings = [] 9288 9289 self._match_l_paren() 9290 kind = self._parse_id_var() 9291 9292 if self._match(TokenType.L_PAREN): 9293 while True: 9294 key = self._parse_id_var() 9295 value = self._parse_function() or self._parse_primary_or_var() 9296 if not key and value is None: 9297 break 9298 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9299 self._match(TokenType.R_PAREN) 9300 9301 self._match_r_paren() 9302 9303 return self.expression( 9304 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9305 ) 9306 9307 def _parse_dict_range(self, this: str) -> exp.DictRange: 9308 self._match_l_paren() 9309 has_min = self._match_text_seq("MIN") 9310 if has_min: 9311 min = self._parse_var() or self._parse_primary() 9312 self._match_text_seq("MAX") 9313 max = self._parse_var() or self._parse_primary() 9314 else: 9315 max = self._parse_var() or self._parse_primary() 9316 min = exp.Literal.number(0) 9317 self._match_r_paren() 9318 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9319 9320 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9321 index = self._index 9322 expression = self._parse_column() 9323 position = self._match(TokenType.COMMA) and self._parse_column() 9324 9325 if not self._match(TokenType.IN): 9326 self._retreat(index - 1) 9327 return None 9328 iterator = self._parse_column() 9329 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9330 return self.expression( 9331 exp.Comprehension( 9332 this=this, 9333 expression=expression, 9334 position=position, 9335 iterator=iterator, 9336 condition=condition, 9337 ) 9338 ) 9339 9340 def _parse_heredoc(self) -> exp.Heredoc | None: 9341 if self._match(TokenType.HEREDOC_STRING): 9342 return self.expression(exp.Heredoc(this=self._prev.text)) 9343 9344 if not self._match_text_seq("$"): 9345 return None 9346 9347 tags = ["$"] 9348 tag_text = None 9349 9350 if self._is_connected(): 9351 self._advance() 9352 tags.append(self._prev.text.upper()) 9353 else: 9354 self.raise_error("No closing $ found") 9355 9356 if tags[-1] != "$": 9357 if self._is_connected() and self._match_text_seq("$"): 9358 tag_text = tags[-1] 9359 tags.append("$") 9360 else: 9361 self.raise_error("No closing $ found") 9362 9363 heredoc_start = self._curr 9364 9365 while self._curr: 9366 if self._match_text_seq(*tags, advance=False): 9367 this = self._find_sql(heredoc_start, self._prev) 9368 self._advance(len(tags)) 9369 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9370 9371 self._advance() 9372 9373 self.raise_error(f"No closing {''.join(tags)} found") 9374 return None 9375 9376 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9377 if not self._curr: 9378 return None 9379 9380 index = self._index 9381 this = [] 9382 while True: 9383 # The current token might be multiple words 9384 curr = self._curr.text.upper() 9385 key = curr.split(" ") 9386 this.append(curr) 9387 9388 self._advance() 9389 result, trie = in_trie(trie, key) 9390 if result == TrieResult.FAILED: 9391 break 9392 9393 if result == TrieResult.EXISTS: 9394 subparser = parsers[" ".join(this)] 9395 return subparser 9396 9397 self._retreat(index) 9398 return None 9399 9400 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9401 if not self._match(TokenType.L_PAREN, expression=expression): 9402 self.raise_error("Expecting (") 9403 9404 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9405 if not self._match(TokenType.R_PAREN, expression=expression): 9406 self.raise_error("Expecting )") 9407 9408 def _replace_lambda( 9409 self, node: exp.Expr | None, expressions: list[exp.Expr] 9410 ) -> exp.Expr | None: 9411 if not node: 9412 return node 9413 9414 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9415 9416 for column in node.find_all(exp.Column): 9417 typ = lambda_types.get(column.parts[0].name) 9418 if typ is not None: 9419 dot_or_id = column.to_dot() if column.table else column.this 9420 9421 if typ: 9422 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9423 9424 parent = column.parent 9425 9426 while isinstance(parent, exp.Dot): 9427 if not isinstance(parent.parent, exp.Dot): 9428 parent.replace(dot_or_id) 9429 break 9430 parent = parent.parent 9431 else: 9432 if column is node: 9433 node = dot_or_id 9434 else: 9435 column.replace(dot_or_id) 9436 return node 9437 9438 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9439 start = self._prev 9440 9441 # Not to be confused with TRUNCATE(number, decimals) function call 9442 if self._match(TokenType.L_PAREN): 9443 self._retreat(self._index - 2) 9444 return self._parse_function() 9445 9446 # Clickhouse supports TRUNCATE DATABASE as well 9447 is_database = self._match(TokenType.DATABASE) 9448 9449 self._match(TokenType.TABLE) 9450 9451 exists = self._parse_exists(not_=False) 9452 9453 expressions = self._parse_csv( 9454 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9455 ) 9456 9457 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9458 9459 if self._match_text_seq("RESTART", "IDENTITY"): 9460 identity = "RESTART" 9461 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9462 identity = "CONTINUE" 9463 else: 9464 identity = None 9465 9466 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9467 option = self._prev.text 9468 else: 9469 option = None 9470 9471 partition = self._parse_partition() 9472 9473 # Fallback case 9474 if self._curr: 9475 return self._parse_as_command(start) 9476 9477 return self.expression( 9478 exp.TruncateTable( 9479 expressions=expressions, 9480 is_database=is_database, 9481 exists=exists, 9482 cluster=cluster, 9483 identity=identity, 9484 option=option, 9485 partition=partition, 9486 ) 9487 ) 9488 9489 def _parse_with_operator(self) -> exp.Expr | None: 9490 this = self._parse_ordered(self._parse_opclass) 9491 9492 if not self._match(TokenType.WITH): 9493 return this 9494 9495 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9496 9497 return self.expression(exp.WithOperator(this=this, op=op)) 9498 9499 def _parse_wrapped_options(self) -> list[exp.Expr]: 9500 self._match(TokenType.EQ) 9501 self._match(TokenType.L_PAREN) 9502 9503 opts: list[exp.Expr] = [] 9504 option: exp.Expr | list[exp.Expr] | None 9505 while self._curr and not self._match(TokenType.R_PAREN): 9506 if self._match_text_seq("FORMAT_NAME", "="): 9507 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9508 option = self._parse_format_name() 9509 else: 9510 option = self._parse_property() 9511 9512 if option is None: 9513 self.raise_error("Unable to parse option") 9514 break 9515 9516 opts.extend(ensure_list(option)) 9517 9518 return opts 9519 9520 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9521 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9522 9523 options = [] 9524 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9525 option = self._parse_var(any_token=True) 9526 prev = self._prev.text.upper() 9527 9528 # Different dialects might separate options and values by white space, "=" and "AS" 9529 self._match(TokenType.EQ) 9530 self._match(TokenType.ALIAS) 9531 9532 param = self.expression(exp.CopyParameter(this=option)) 9533 9534 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9535 TokenType.L_PAREN, advance=False 9536 ): 9537 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9538 param.set("expressions", self._parse_wrapped_options()) 9539 elif prev == "FILE_FORMAT": 9540 # T-SQL's external file format case 9541 param.set("expression", self._parse_field()) 9542 elif ( 9543 prev == "FORMAT" 9544 and self._prev.token_type == TokenType.ALIAS 9545 and self._match_texts(("AVRO", "JSON")) 9546 ): 9547 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9548 param.set("expression", self._parse_field()) 9549 else: 9550 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9551 9552 options.append(param) 9553 9554 if sep: 9555 self._match(sep) 9556 9557 return options 9558 9559 def _parse_credentials(self) -> exp.Credentials | None: 9560 expr = self.expression(exp.Credentials()) 9561 9562 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9563 expr.set("storage", self._parse_field()) 9564 if self._match_text_seq("CREDENTIALS"): 9565 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9566 creds = ( 9567 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9568 ) 9569 expr.set("credentials", creds) 9570 if self._match_text_seq("ENCRYPTION"): 9571 expr.set("encryption", self._parse_wrapped_options()) 9572 if self._match_text_seq("IAM_ROLE"): 9573 expr.set( 9574 "iam_role", 9575 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9576 ) 9577 if self._match_text_seq("REGION"): 9578 expr.set("region", self._parse_field()) 9579 9580 return expr 9581 9582 def _parse_file_location(self) -> exp.Expr | None: 9583 return self._parse_field() 9584 9585 def _parse_copy(self) -> exp.Copy | exp.Command: 9586 start = self._prev 9587 9588 self._match(TokenType.INTO) 9589 9590 this = ( 9591 self._parse_select(nested=True, parse_subquery_alias=False) 9592 if self._match(TokenType.L_PAREN, advance=False) 9593 else self._parse_table(schema=True) 9594 ) 9595 9596 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9597 9598 files = self._parse_csv(self._parse_file_location) 9599 if self._match(TokenType.EQ, advance=False): 9600 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9601 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9602 # list via `_parse_wrapped(..)` below. 9603 self._advance(-1) 9604 files = [] 9605 9606 credentials = self._parse_credentials() 9607 9608 self._match_text_seq("WITH") 9609 9610 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9611 9612 # Fallback case 9613 if self._curr: 9614 return self._parse_as_command(start) 9615 9616 return self.expression( 9617 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9618 ) 9619 9620 def _parse_normalize(self) -> exp.Normalize: 9621 return self.expression( 9622 exp.Normalize( 9623 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9624 ) 9625 ) 9626 9627 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9628 args = self._parse_csv(lambda: self._parse_lambda()) 9629 9630 this = seq_get(args, 0) 9631 decimals = seq_get(args, 1) 9632 9633 return expr_type( 9634 this=this, 9635 decimals=decimals, 9636 to=self._parse_var() if self._match_text_seq("TO") else None, 9637 ) 9638 9639 def _parse_star_ops(self) -> exp.Expr | None: 9640 star_token = self._prev 9641 9642 if self._match_text_seq("COLUMNS", "(", advance=False): 9643 this = self._parse_function() 9644 if isinstance(this, exp.Columns): 9645 this.set("unpack", True) 9646 return this 9647 9648 return self.expression( 9649 exp.Star( 9650 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9651 replace=self._parse_star_op("REPLACE"), 9652 rename=self._parse_star_op("RENAME"), 9653 ) 9654 ).update_positions(star_token) 9655 9656 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9657 privilege_parts = [] 9658 9659 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9660 # (end of privilege list) or L_PAREN (start of column list) are met 9661 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9662 privilege_parts.append(self._curr.text.upper()) 9663 self._advance() 9664 9665 this = exp.var(" ".join(privilege_parts)) 9666 expressions = ( 9667 self._parse_wrapped_csv(self._parse_column) 9668 if self._match(TokenType.L_PAREN, advance=False) 9669 else None 9670 ) 9671 9672 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9673 9674 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9675 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9676 principal = self._parse_id_var() 9677 9678 if not principal: 9679 return None 9680 9681 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9682 9683 def _parse_grant_revoke_common( 9684 self, 9685 ) -> tuple[list | None, str | None, exp.Expr | None]: 9686 privileges = self._parse_csv(self._parse_grant_privilege) 9687 9688 self._match(TokenType.ON) 9689 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9690 9691 # Attempt to parse the securable e.g. MySQL allows names 9692 # such as "foo.*", "*.*" which are not easily parseable yet 9693 securable = self._try_parse(self._parse_table_parts) 9694 9695 return privileges, kind, securable 9696 9697 def _parse_grant(self) -> exp.Grant | exp.Command: 9698 start = self._prev 9699 9700 privileges, kind, securable = self._parse_grant_revoke_common() 9701 9702 if not securable or not self._match_text_seq("TO"): 9703 return self._parse_as_command(start) 9704 9705 principals = self._parse_csv(self._parse_grant_principal) 9706 9707 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9708 9709 if self._curr: 9710 return self._parse_as_command(start) 9711 9712 return self.expression( 9713 exp.Grant( 9714 privileges=privileges, 9715 kind=kind, 9716 securable=securable, 9717 principals=principals, 9718 grant_option=grant_option, 9719 ) 9720 ) 9721 9722 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9723 start = self._prev 9724 9725 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9726 9727 privileges, kind, securable = self._parse_grant_revoke_common() 9728 9729 if not securable or not self._match_text_seq("FROM"): 9730 return self._parse_as_command(start) 9731 9732 principals = self._parse_csv(self._parse_grant_principal) 9733 9734 cascade = None 9735 if self._match_texts(("CASCADE", "RESTRICT")): 9736 cascade = self._prev.text.upper() 9737 9738 if self._curr: 9739 return self._parse_as_command(start) 9740 9741 return self.expression( 9742 exp.Revoke( 9743 privileges=privileges, 9744 kind=kind, 9745 securable=securable, 9746 principals=principals, 9747 grant_option=grant_option, 9748 cascade=cascade, 9749 ) 9750 ) 9751 9752 def _parse_overlay(self) -> exp.Overlay: 9753 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9754 return ( 9755 self._parse_bitwise() 9756 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9757 else None 9758 ) 9759 9760 return self.expression( 9761 exp.Overlay( 9762 this=self._parse_bitwise(), 9763 expression=_parse_overlay_arg("PLACING"), 9764 from_=_parse_overlay_arg("FROM"), 9765 for_=_parse_overlay_arg("FOR"), 9766 ) 9767 ) 9768 9769 def _parse_format_name(self) -> exp.Property: 9770 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9771 # for FILE_FORMAT = <format_name> 9772 return self.expression( 9773 exp.Property( 9774 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9775 ) 9776 ) 9777 9778 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9779 args: list[exp.Expr] = [] 9780 9781 if self._match(TokenType.DISTINCT): 9782 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9783 self._match(TokenType.COMMA) 9784 9785 args.extend(self._parse_function_args()) 9786 9787 return self.expression( 9788 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9789 ) 9790 9791 def _identifier_expression( 9792 self, token: Token | None = None, quoted: bool | None = None 9793 ) -> exp.Identifier: 9794 token = token or self._prev 9795 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9796 9797 def _build_pipe_cte( 9798 self, 9799 query: exp.Query, 9800 expressions: list[exp.Expr], 9801 alias_cte: exp.TableAlias | None = None, 9802 ) -> exp.Select: 9803 new_cte: str | exp.TableAlias | None 9804 if alias_cte: 9805 new_cte = alias_cte 9806 else: 9807 self._pipe_cte_counter += 1 9808 new_cte = f"__tmp{self._pipe_cte_counter}" 9809 9810 with_ = query.args.get("with_") 9811 ctes = with_.pop() if with_ else None 9812 9813 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9814 if ctes: 9815 new_select.set("with_", ctes) 9816 9817 return new_select.with_(new_cte, as_=query, copy=False) 9818 9819 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9820 select = self._parse_select(consume_pipe=False) 9821 if not select: 9822 return query 9823 9824 return self._build_pipe_cte( 9825 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9826 ) 9827 9828 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9829 limit = self._parse_limit() 9830 offset = self._parse_offset() 9831 if limit: 9832 curr_limit = query.args.get("limit", limit) 9833 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9834 query.limit(limit, copy=False) 9835 if offset: 9836 curr_offset = query.args.get("offset") 9837 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9838 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9839 9840 return query 9841 9842 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9843 this = self._parse_disjunction() 9844 if self._match_text_seq("GROUP", "AND", advance=False): 9845 return this 9846 9847 this = self._parse_alias(this) 9848 9849 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9850 return self._parse_ordered(lambda: this) 9851 9852 return this 9853 9854 def _parse_pipe_syntax_aggregate_group_order_by( 9855 self, query: exp.Select, group_by_exists: bool = True 9856 ) -> exp.Select: 9857 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9858 aggregates_or_groups, orders = [], [] 9859 for element in expr: 9860 if isinstance(element, exp.Ordered): 9861 this = element.this 9862 if isinstance(this, exp.Alias): 9863 element.set("this", this.args["alias"]) 9864 orders.append(element) 9865 else: 9866 this = element 9867 aggregates_or_groups.append(this) 9868 9869 if group_by_exists: 9870 query.select( 9871 *aggregates_or_groups, *query.expressions, append=False, copy=False 9872 ).group_by( 9873 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9874 copy=False, 9875 ) 9876 else: 9877 query.select(*aggregates_or_groups, append=False, copy=False) 9878 9879 if orders: 9880 return query.order_by(*orders, append=False, copy=False) 9881 9882 return query 9883 9884 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9885 self._match_text_seq("AGGREGATE") 9886 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9887 9888 if self._match(TokenType.GROUP_BY) or ( 9889 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9890 ): 9891 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9892 9893 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9894 9895 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9896 first_setop = self.parse_set_operation(this=query) 9897 if not first_setop: 9898 return None 9899 9900 def _parse_and_unwrap_query() -> exp.Expr | None: 9901 expr = self._parse_paren() 9902 return expr.assert_is(exp.Subquery).unnest() if expr else None 9903 9904 first_setop.this.pop() 9905 9906 setops = [ 9907 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9908 *self._parse_csv(_parse_and_unwrap_query), 9909 ] 9910 9911 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9912 with_ = query.args.get("with_") 9913 ctes = with_.pop() if with_ else None 9914 9915 if isinstance(first_setop, exp.Union): 9916 query = query.union(*setops, copy=False, **first_setop.args) 9917 elif isinstance(first_setop, exp.Except): 9918 query = query.except_(*setops, copy=False, **first_setop.args) 9919 else: 9920 query = query.intersect(*setops, copy=False, **first_setop.args) 9921 9922 query.set("with_", ctes) 9923 9924 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9925 9926 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9927 join = self._parse_join() 9928 if not join: 9929 return None 9930 9931 if isinstance(query, exp.Select): 9932 return query.join(join, copy=False) 9933 9934 return query 9935 9936 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9937 pivots = self._parse_pivots() 9938 if not pivots: 9939 return query 9940 9941 from_ = query.args.get("from_") 9942 if from_: 9943 from_.this.set("pivots", pivots) 9944 9945 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9946 9947 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9948 self._match_text_seq("EXTEND") 9949 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9950 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9951 9952 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9953 sample = self._parse_table_sample() 9954 9955 with_ = query.args.get("with_") 9956 if with_: 9957 with_.expressions[-1].this.set("sample", sample) 9958 else: 9959 query.set("sample", sample) 9960 9961 return query 9962 9963 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9964 if isinstance(query, exp.Subquery): 9965 query = exp.select("*").from_(query, copy=False) 9966 9967 if not query.args.get("from_"): 9968 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9969 9970 while self._match(TokenType.PIPE_GT): 9971 start_index = self._index 9972 start_text = self._curr.text.upper() 9973 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9974 if not parser: 9975 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9976 # keywords, making it tricky to disambiguate them without lookahead. The approach 9977 # here is to try and parse a set operation and if that fails, then try to parse a 9978 # join operator. If that fails as well, then the operator is not supported. 9979 parsed_query = self._parse_pipe_syntax_set_operator(query) 9980 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9981 if not parsed_query: 9982 self._retreat(start_index) 9983 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9984 break 9985 query = parsed_query 9986 else: 9987 query = parser(self, query) 9988 9989 return query 9990 9991 def _parse_declareitem(self) -> exp.DeclareItem | None: 9992 self._match_texts(("VAR", "VARIABLE")) 9993 9994 vars = self._parse_csv(self._parse_id_var) 9995 if not vars: 9996 return None 9997 9998 self._match(TokenType.ALIAS) 9999 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 10000 default = ( 10001 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 10002 ) and self._parse_bitwise() 10003 10004 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 10005 10006 def _parse_declare(self) -> exp.Declare | exp.Command: 10007 start = self._prev 10008 replace = self._match_text_seq("OR", "REPLACE") 10009 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 10010 10011 if not expressions or self._curr: 10012 return self._parse_as_command(start) 10013 10014 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 10015 10016 def build_cast(self, strict: bool, **kwargs) -> exp.Expr: 10017 exp_class = exp.Cast if strict else exp.TryCast 10018 10019 if exp_class == exp.TryCast: 10020 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 10021 10022 return self.expression(exp_class(**kwargs)) 10023 10024 def _parse_json_value(self) -> exp.JSONValue: 10025 this = self._parse_bitwise() 10026 self._match(TokenType.COMMA) 10027 path = self._parse_bitwise() 10028 10029 returning = self._match(TokenType.RETURNING) and self._parse_type() 10030 10031 return self.expression( 10032 exp.JSONValue( 10033 this=this, 10034 path=self.dialect.to_json_path(path), 10035 returning=returning, 10036 on_condition=self._parse_on_condition(), 10037 ) 10038 ) 10039 10040 def _parse_group_concat(self) -> exp.Expr | None: 10041 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 10042 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 10043 concat_exprs = [ 10044 self.expression( 10045 exp.Concat( 10046 expressions=node.expressions, 10047 safe=True, 10048 coalesce=self.dialect.CONCAT_COALESCE, 10049 ) 10050 ) 10051 ] 10052 node.set("expressions", concat_exprs) 10053 return node 10054 if len(exprs) == 1: 10055 return exprs[0] 10056 return self.expression( 10057 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 10058 ) 10059 10060 args = self._parse_csv(self._parse_lambda) 10061 10062 if args: 10063 order = args[-1] if isinstance(args[-1], exp.Order) else None 10064 10065 if order: 10066 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 10067 # remove 'expr' from exp.Order and add it back to args 10068 args[-1] = order.this 10069 order.set("this", concat_exprs(order.this, args)) 10070 10071 this = order or concat_exprs(args[0], args) 10072 else: 10073 this = None 10074 10075 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 10076 10077 return self.expression(exp.GroupConcat(this=this, separator=separator)) 10078 10079 def _parse_initcap(self) -> exp.Initcap: 10080 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 10081 10082 # attach dialect's default delimiters 10083 if expr.args.get("expression") is None: 10084 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 10085 10086 return expr 10087 10088 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 10089 while True: 10090 if not self._match(TokenType.L_PAREN): 10091 break 10092 10093 op = "" 10094 while self._curr and not self._match(TokenType.R_PAREN): 10095 op += self._curr.text 10096 self._advance() 10097 10098 comments = self._prev_comments 10099 this = self.expression( 10100 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 10101 comments=comments, 10102 ) 10103 10104 if not self._match(TokenType.OPERATOR): 10105 break 10106 10107 return this
46def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap: 47 if len(args) == 1 and args[0].is_star: 48 return exp.StarMap(this=args[0]) 49 50 keys: list[ExpOrStr] = [] 51 values: list[ExpOrStr] = [] 52 for i in range(0, len(args), 2): 53 keys.append(args[i]) 54 values.append(args[i + 1]) 55 56 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
64def binary_range_parser( 65 expr_type: Type[exp.Expr], reverse_args: bool = False 66) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]: 67 def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None: 68 expression = self._parse_bitwise() 69 if reverse_args: 70 this, expression = expression, this 71 return self._parse_escape(self.expression(expr_type(this=this, expression=expression))) 72 73 return _parse_binary_range
76def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func: 77 # Default argument order is base, expression 78 this = seq_get(args, 0) 79 expression = seq_get(args, 1) 80 81 if expression: 82 if not dialect.LOG_BASE_FIRST: 83 this, expression = expression, this 84 return exp.Log(this=this, expression=expression) 85 86 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
106def build_extract_json_with_path( 107 expr_type: Type[E], 108) -> t.Callable[[BuilderArgs, Dialect], E]: 109 def _builder(args: BuilderArgs, dialect: Dialect) -> E: 110 expression = expr_type( 111 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 112 ) 113 if len(args) > 2 and expr_type is exp.JSONExtract: 114 expression.set("expressions", args[2:]) 115 if expr_type is exp.JSONExtractScalar: 116 expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY) 117 118 return expression 119 120 return _builder
123def build_mod(args: BuilderArgs) -> exp.Mod: 124 this = seq_get(args, 0) 125 expression = seq_get(args, 1) 126 127 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 128 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 129 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 130 131 return exp.Mod(this=this, expression=expression)
143def build_array_constructor( 144 exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect 145) -> exp.Expr: 146 array_exp = exp_class(expressions=args) 147 148 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 149 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 150 151 return array_exp
154def build_convert_timezone( 155 args: BuilderArgs, default_source_tz: str | None = None 156) -> exp.ConvertTimezone | exp.Anonymous: 157 if len(args) == 2: 158 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 159 return exp.ConvertTimezone( 160 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 161 ) 162 163 return exp.ConvertTimezone.from_arg_list(args)
166def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim: 167 this, expression = seq_get(args, 0), seq_get(args, 1) 168 169 if expression and reverse_args: 170 this, expression = expression, this 171 172 return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
189def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend: 190 """ 191 Builds ArrayAppend with NULL propagation semantics based on the dialect configuration. 192 193 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 194 Others (DuckDB, PostgreSQL) create a new single-element array instead. 195 196 Args: 197 args: Function arguments [array, element] 198 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 199 200 Returns: 201 ArrayAppend expression with appropriate null_propagation flag 202 """ 203 return exp.ArrayAppend( 204 this=seq_get(args, 0), 205 expression=seq_get(args, 1), 206 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 207 )
Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayAppend expression with appropriate null_propagation flag
210def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend: 211 """ 212 Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration. 213 214 Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. 215 Others (DuckDB, PostgreSQL) create a new single-element array instead. 216 217 Args: 218 args: Function arguments [array, element] 219 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 220 221 Returns: 222 ArrayPrepend expression with appropriate null_propagation flag 223 """ 224 return exp.ArrayPrepend( 225 this=seq_get(args, 0), 226 expression=seq_get(args, 1), 227 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 228 )
Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.
Arguments:
- args: Function arguments [array, element]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayPrepend expression with appropriate null_propagation flag
231def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat: 232 """ 233 Builds ArrayConcat with NULL propagation semantics based on the dialect configuration. 234 235 Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. 236 Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation. 237 238 Args: 239 args: Function arguments [array1, array2, ...] (variadic) 240 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 241 242 Returns: 243 ArrayConcat expression with appropriate null_propagation flag 244 """ 245 return exp.ArrayConcat( 246 this=seq_get(args, 0), 247 expressions=args[1:], 248 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 249 )
Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
Arguments:
- args: Function arguments [array1, array2, ...] (variadic)
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayConcat expression with appropriate null_propagation flag
252def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove: 253 """ 254 Builds ArrayRemove with NULL propagation semantics based on the dialect configuration. 255 256 Some dialects (Snowflake) return NULL when the removal value is NULL. 257 Others (DuckDB) may return empty array due to NULL comparison semantics. 258 259 Args: 260 args: Function arguments [array, value_to_remove] 261 dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from 262 263 Returns: 264 ArrayRemove expression with appropriate null_propagation flag 265 """ 266 return exp.ArrayRemove( 267 this=seq_get(args, 0), 268 expression=seq_get(args, 1), 269 null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS, 270 )
Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.
Arguments:
- args: Function arguments [array, value_to_remove]
- dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
Returns:
ArrayRemove expression with appropriate null_propagation flag
291class Parser: 292 """ 293 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 294 295 Args: 296 error_level: The desired error level. 297 Default: ErrorLevel.IMMEDIATE 298 error_message_context: The amount of context to capture from a query string when displaying 299 the error message (in number of characters). 300 Default: 100 301 max_errors: Maximum number of error messages to include in a raised ParseError. 302 This is only relevant if error_level is ErrorLevel.RAISE. 303 Default: 3 304 max_nodes: Maximum number of AST nodes to prevent memory exhaustion. 305 Set to -1 (default) to disable the check. 306 """ 307 308 __slots__ = ( 309 "error_level", 310 "error_message_context", 311 "max_errors", 312 "max_nodes", 313 "dialect", 314 "sql", 315 "errors", 316 "_tokens", 317 "_index", 318 "_curr", 319 "_next", 320 "_prev", 321 "_prev_comments", 322 "_pipe_cte_counter", 323 "_chunks", 324 "_chunk_index", 325 "_tokens_size", 326 "_node_count", 327 ) 328 329 FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = { 330 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 331 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 332 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 333 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 334 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 335 ), 336 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 337 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 338 ), 339 "ARRAY_APPEND": build_array_append, 340 "ARRAY_CAT": build_array_concat, 341 "ARRAY_CONCAT": build_array_concat, 342 "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args), 343 "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args), 344 "ARRAY_PREPEND": build_array_prepend, 345 "ARRAY_REMOVE": build_array_remove, 346 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 347 "CONCAT": lambda args, dialect: exp.Concat( 348 expressions=args, 349 safe=not dialect.STRICT_STRING_CONCAT, 350 coalesce=dialect.CONCAT_COALESCE, 351 ), 352 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 353 expressions=args, 354 safe=not dialect.STRICT_STRING_CONCAT, 355 coalesce=dialect.CONCAT_WS_COALESCE, 356 ), 357 "CONVERT_TIMEZONE": build_convert_timezone, 358 "DATE_TO_DATE_STR": lambda args: exp.Cast( 359 this=seq_get(args, 0), 360 to=exp.DataType(this=exp.DType.TEXT), 361 ), 362 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 363 start=seq_get(args, 0), 364 end=seq_get(args, 1), 365 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 366 ), 367 "GENERATE_UUID": lambda args, dialect: exp.Uuid( 368 is_string=dialect.UUID_IS_STRING_TYPE or None 369 ), 370 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 371 "GREATEST": lambda args, dialect: exp.Greatest( 372 this=seq_get(args, 0), 373 expressions=args[1:], 374 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 375 ), 376 "LEAST": lambda args, dialect: exp.Least( 377 this=seq_get(args, 0), 378 expressions=args[1:], 379 ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS, 380 ), 381 "HEX": build_hex, 382 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 383 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 384 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 385 "JSON_KEYS": lambda args, dialect: exp.JSONKeys( 386 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 387 ), 388 "LIKE": build_like, 389 "LOG": build_logarithm, 390 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 391 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 392 "LOWER": build_lower, 393 "LPAD": lambda args: build_pad(args), 394 "LEFTPAD": lambda args: build_pad(args), 395 "LTRIM": lambda args: build_trim(args), 396 "MOD": build_mod, 397 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 398 "RPAD": lambda args: build_pad(args, is_left=False), 399 "RTRIM": lambda args: build_trim(args, is_left=False), 400 "SCOPE_RESOLUTION": lambda args: ( 401 exp.ScopeResolution(expression=seq_get(args, 0)) 402 if len(args) != 2 403 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)) 404 ), 405 "STRPOS": exp.StrPosition.from_arg_list, 406 "CHARINDEX": lambda args: build_locate_strposition(args), 407 "INSTR": exp.StrPosition.from_arg_list, 408 "LOCATE": lambda args: build_locate_strposition(args), 409 "TIME_TO_TIME_STR": lambda args: exp.Cast( 410 this=seq_get(args, 0), 411 to=exp.DataType(this=exp.DType.TEXT), 412 ), 413 "TO_HEX": build_hex, 414 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 415 this=exp.Cast( 416 this=seq_get(args, 0), 417 to=exp.DataType(this=exp.DType.TEXT), 418 ), 419 start=exp.Literal.number(1), 420 length=exp.Literal.number(10), 421 ), 422 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 423 "UPPER": build_upper, 424 "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None), 425 "UUID_STRING": lambda args, dialect: exp.Uuid( 426 this=seq_get(args, 0), 427 name=seq_get(args, 1), 428 is_string=dialect.UUID_IS_STRING_TYPE or None, 429 ), 430 "VAR_MAP": build_var_map, 431 } 432 433 NO_PAREN_FUNCTIONS: t.ClassVar[dict] = { 434 TokenType.CURRENT_DATE: exp.CurrentDate, 435 TokenType.CURRENT_DATETIME: exp.CurrentDate, 436 TokenType.CURRENT_TIME: exp.CurrentTime, 437 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 438 TokenType.CURRENT_USER: exp.CurrentUser, 439 TokenType.CURRENT_ROLE: exp.CurrentRole, 440 } 441 442 STRUCT_TYPE_TOKENS: t.ClassVar = { 443 TokenType.NESTED, 444 TokenType.OBJECT, 445 TokenType.STRUCT, 446 TokenType.UNION, 447 } 448 449 NESTED_TYPE_TOKENS: t.ClassVar = { 450 TokenType.ARRAY, 451 TokenType.LIST, 452 TokenType.LOWCARDINALITY, 453 TokenType.MAP, 454 TokenType.NULLABLE, 455 TokenType.RANGE, 456 *STRUCT_TYPE_TOKENS, 457 } 458 459 ENUM_TYPE_TOKENS: t.ClassVar = { 460 TokenType.DYNAMIC, 461 TokenType.ENUM, 462 TokenType.ENUM8, 463 TokenType.ENUM16, 464 } 465 466 AGGREGATE_TYPE_TOKENS: t.ClassVar = { 467 TokenType.AGGREGATEFUNCTION, 468 TokenType.SIMPLEAGGREGATEFUNCTION, 469 } 470 471 TYPE_TOKENS: t.ClassVar = { 472 TokenType.BIT, 473 TokenType.BOOLEAN, 474 TokenType.TINYINT, 475 TokenType.UTINYINT, 476 TokenType.SMALLINT, 477 TokenType.USMALLINT, 478 TokenType.INT, 479 TokenType.UINT, 480 TokenType.BIGINT, 481 TokenType.UBIGINT, 482 TokenType.BIGNUM, 483 TokenType.INT128, 484 TokenType.UINT128, 485 TokenType.INT256, 486 TokenType.UINT256, 487 TokenType.MEDIUMINT, 488 TokenType.UMEDIUMINT, 489 TokenType.FIXEDSTRING, 490 TokenType.FLOAT, 491 TokenType.DOUBLE, 492 TokenType.UDOUBLE, 493 TokenType.CHAR, 494 TokenType.NCHAR, 495 TokenType.VARCHAR, 496 TokenType.NVARCHAR, 497 TokenType.BPCHAR, 498 TokenType.TEXT, 499 TokenType.MEDIUMTEXT, 500 TokenType.LONGTEXT, 501 TokenType.BLOB, 502 TokenType.MEDIUMBLOB, 503 TokenType.LONGBLOB, 504 TokenType.BINARY, 505 TokenType.VARBINARY, 506 TokenType.JSON, 507 TokenType.JSONB, 508 TokenType.INTERVAL, 509 TokenType.TINYBLOB, 510 TokenType.TINYTEXT, 511 TokenType.TIME, 512 TokenType.TIMETZ, 513 TokenType.TIME_NS, 514 TokenType.TIMESTAMP, 515 TokenType.TIMESTAMP_S, 516 TokenType.TIMESTAMP_MS, 517 TokenType.TIMESTAMP_NS, 518 TokenType.TIMESTAMPTZ, 519 TokenType.TIMESTAMPLTZ, 520 TokenType.TIMESTAMPNTZ, 521 TokenType.DATETIME, 522 TokenType.DATETIME2, 523 TokenType.DATETIME64, 524 TokenType.SMALLDATETIME, 525 TokenType.DATE, 526 TokenType.DATE32, 527 TokenType.INT4RANGE, 528 TokenType.INT4MULTIRANGE, 529 TokenType.INT8RANGE, 530 TokenType.INT8MULTIRANGE, 531 TokenType.NUMRANGE, 532 TokenType.NUMMULTIRANGE, 533 TokenType.TSRANGE, 534 TokenType.TSMULTIRANGE, 535 TokenType.TSTZRANGE, 536 TokenType.TSTZMULTIRANGE, 537 TokenType.DATERANGE, 538 TokenType.DATEMULTIRANGE, 539 TokenType.DECIMAL, 540 TokenType.DECIMAL32, 541 TokenType.DECIMAL64, 542 TokenType.DECIMAL128, 543 TokenType.DECIMAL256, 544 TokenType.DECFLOAT, 545 TokenType.UDECIMAL, 546 TokenType.BIGDECIMAL, 547 TokenType.UUID, 548 TokenType.GEOGRAPHY, 549 TokenType.GEOGRAPHYPOINT, 550 TokenType.GEOMETRY, 551 TokenType.POINT, 552 TokenType.RING, 553 TokenType.LINESTRING, 554 TokenType.MULTILINESTRING, 555 TokenType.POLYGON, 556 TokenType.MULTIPOLYGON, 557 TokenType.HLLSKETCH, 558 TokenType.HSTORE, 559 TokenType.PSEUDO_TYPE, 560 TokenType.SUPER, 561 TokenType.SERIAL, 562 TokenType.SMALLSERIAL, 563 TokenType.BIGSERIAL, 564 TokenType.XML, 565 TokenType.YEAR, 566 TokenType.USERDEFINED, 567 TokenType.MONEY, 568 TokenType.SMALLMONEY, 569 TokenType.ROWVERSION, 570 TokenType.IMAGE, 571 TokenType.VARIANT, 572 TokenType.VECTOR, 573 TokenType.VOID, 574 TokenType.OBJECT, 575 TokenType.OBJECT_IDENTIFIER, 576 TokenType.INET, 577 TokenType.IPADDRESS, 578 TokenType.IPPREFIX, 579 TokenType.IPV4, 580 TokenType.IPV6, 581 TokenType.UNKNOWN, 582 TokenType.NOTHING, 583 TokenType.NULL, 584 TokenType.NAME, 585 TokenType.TDIGEST, 586 TokenType.DYNAMIC, 587 *ENUM_TYPE_TOKENS, 588 *NESTED_TYPE_TOKENS, 589 *AGGREGATE_TYPE_TOKENS, 590 } 591 592 SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = { 593 TokenType.BIGINT: TokenType.UBIGINT, 594 TokenType.INT: TokenType.UINT, 595 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 596 TokenType.SMALLINT: TokenType.USMALLINT, 597 TokenType.TINYINT: TokenType.UTINYINT, 598 TokenType.DECIMAL: TokenType.UDECIMAL, 599 TokenType.DOUBLE: TokenType.UDOUBLE, 600 } 601 602 SUBQUERY_PREDICATES: t.ClassVar = { 603 TokenType.ANY: exp.Any, 604 TokenType.ALL: exp.All, 605 TokenType.EXISTS: exp.Exists, 606 TokenType.SOME: exp.Any, 607 } 608 609 SUBQUERY_TOKENS: t.ClassVar = { 610 TokenType.SELECT, 611 TokenType.WITH, 612 TokenType.FROM, 613 } 614 615 RESERVED_TOKENS: t.ClassVar = { 616 *Tokenizer.SINGLE_TOKENS.values(), 617 TokenType.SELECT, 618 } - {TokenType.IDENTIFIER} 619 620 DB_CREATABLES: t.ClassVar = { 621 TokenType.DATABASE, 622 TokenType.DICTIONARY, 623 TokenType.FILE_FORMAT, 624 TokenType.MODEL, 625 TokenType.NAMESPACE, 626 TokenType.SCHEMA, 627 TokenType.SEMANTIC_VIEW, 628 TokenType.SEQUENCE, 629 TokenType.SINK, 630 TokenType.SOURCE, 631 TokenType.STAGE, 632 TokenType.STORAGE_INTEGRATION, 633 TokenType.STREAMLIT, 634 TokenType.TABLE, 635 TokenType.TAG, 636 TokenType.VIEW, 637 TokenType.WAREHOUSE, 638 } 639 640 CREATABLES: t.ClassVar = { 641 TokenType.COLUMN, 642 TokenType.CONSTRAINT, 643 TokenType.FOREIGN_KEY, 644 TokenType.FUNCTION, 645 TokenType.INDEX, 646 TokenType.PROCEDURE, 647 TokenType.TRIGGER, 648 TokenType.TYPE, 649 *DB_CREATABLES, 650 } 651 652 TRIGGER_EVENTS: t.ClassVar = { 653 TokenType.INSERT, 654 TokenType.UPDATE, 655 TokenType.DELETE, 656 TokenType.TRUNCATE, 657 } 658 659 ALTERABLES: t.ClassVar = { 660 TokenType.INDEX, 661 TokenType.TABLE, 662 TokenType.VIEW, 663 TokenType.SESSION, 664 } 665 666 # Tokens that can represent identifiers 667 ID_VAR_TOKENS: t.ClassVar[set] = { 668 TokenType.ALL, 669 TokenType.ANALYZE, 670 TokenType.ATTACH, 671 TokenType.VAR, 672 TokenType.ANTI, 673 TokenType.APPLY, 674 TokenType.ASC, 675 TokenType.ASOF, 676 TokenType.AUTO_INCREMENT, 677 TokenType.BEGIN, 678 TokenType.BPCHAR, 679 TokenType.CACHE, 680 TokenType.CASE, 681 TokenType.COLLATE, 682 TokenType.COMMAND, 683 TokenType.COMMENT, 684 TokenType.COMMIT, 685 TokenType.CONSTRAINT, 686 TokenType.COPY, 687 TokenType.CUBE, 688 TokenType.CURRENT_SCHEMA, 689 TokenType.DEFAULT, 690 TokenType.DELETE, 691 TokenType.DESC, 692 TokenType.DESCRIBE, 693 TokenType.DETACH, 694 TokenType.DICTIONARY, 695 TokenType.DIV, 696 TokenType.END, 697 TokenType.EXECUTE, 698 TokenType.EXPORT, 699 TokenType.ESCAPE, 700 TokenType.FALSE, 701 TokenType.FIRST, 702 TokenType.FILE, 703 TokenType.FILTER, 704 TokenType.FINAL, 705 TokenType.FORMAT, 706 TokenType.FULL, 707 TokenType.GET, 708 TokenType.IDENTIFIER, 709 TokenType.INOUT, 710 TokenType.IS, 711 TokenType.ISNULL, 712 TokenType.INTERVAL, 713 TokenType.KEEP, 714 TokenType.KILL, 715 TokenType.LEFT, 716 TokenType.LIMIT, 717 TokenType.LOAD, 718 TokenType.LOCK, 719 TokenType.MATCH, 720 TokenType.MERGE, 721 TokenType.NATURAL, 722 TokenType.NEXT, 723 TokenType.OFFSET, 724 TokenType.OPERATOR, 725 TokenType.ORDINALITY, 726 TokenType.OVER, 727 TokenType.OVERLAPS, 728 TokenType.OVERWRITE, 729 TokenType.PARTITION, 730 TokenType.PERCENT, 731 TokenType.PIVOT, 732 TokenType.PRAGMA, 733 TokenType.PUT, 734 TokenType.RANGE, 735 TokenType.RECURSIVE, 736 TokenType.REFERENCES, 737 TokenType.REFRESH, 738 TokenType.RENAME, 739 TokenType.REPLACE, 740 TokenType.RIGHT, 741 TokenType.ROLLUP, 742 TokenType.ROW, 743 TokenType.ROWS, 744 TokenType.SEMI, 745 TokenType.SET, 746 TokenType.SETTINGS, 747 TokenType.SHOW, 748 TokenType.STREAM, 749 TokenType.STREAMLIT, 750 TokenType.TEMPORARY, 751 TokenType.TOP, 752 TokenType.TRUE, 753 TokenType.TRUNCATE, 754 TokenType.UNIQUE, 755 TokenType.UNNEST, 756 TokenType.UNPIVOT, 757 TokenType.UPDATE, 758 TokenType.USE, 759 TokenType.VOLATILE, 760 TokenType.WINDOW, 761 TokenType.CURRENT_CATALOG, 762 TokenType.LOCALTIME, 763 TokenType.LOCALTIMESTAMP, 764 TokenType.SESSION_USER, 765 TokenType.STRAIGHT_JOIN, 766 *ALTERABLES, 767 *CREATABLES, 768 *SUBQUERY_PREDICATES, 769 *TYPE_TOKENS, 770 *NO_PAREN_FUNCTIONS, 771 } - {TokenType.UNION} 772 773 TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - { 774 TokenType.ANTI, 775 TokenType.ASOF, 776 TokenType.FULL, 777 TokenType.LEFT, 778 TokenType.LOCK, 779 TokenType.NATURAL, 780 TokenType.RIGHT, 781 TokenType.SEMI, 782 TokenType.WINDOW, 783 } 784 785 ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS 786 787 COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS 788 789 ARRAY_CONSTRUCTORS: t.ClassVar = { 790 "ARRAY": exp.Array, 791 "LIST": exp.List, 792 } 793 794 COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS} 795 796 UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET} 797 798 TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"} 799 800 # Tokens that indicate a simple column reference 801 IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER}) 802 803 BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE}) 804 805 # Postfix tokens that prevent the bare column fast path 806 COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 807 { 808 TokenType.L_PAREN, 809 TokenType.L_BRACKET, 810 TokenType.L_BRACE, 811 TokenType.COLON, 812 TokenType.JOIN_MARKER, 813 } 814 ) 815 816 TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset( 817 { 818 TokenType.L_PAREN, 819 TokenType.L_BRACKET, 820 TokenType.L_BRACE, 821 TokenType.PIVOT, 822 TokenType.UNPIVOT, 823 TokenType.TABLE_SAMPLE, 824 } 825 ) 826 827 FUNC_TOKENS: t.ClassVar = { 828 TokenType.COLLATE, 829 TokenType.COMMAND, 830 TokenType.CURRENT_DATE, 831 TokenType.CURRENT_DATETIME, 832 TokenType.CURRENT_SCHEMA, 833 TokenType.CURRENT_TIMESTAMP, 834 TokenType.CURRENT_TIME, 835 TokenType.CURRENT_USER, 836 TokenType.CURRENT_CATALOG, 837 TokenType.FILTER, 838 TokenType.FIRST, 839 TokenType.FORMAT, 840 TokenType.GET, 841 TokenType.GLOB, 842 TokenType.IDENTIFIER, 843 TokenType.INDEX, 844 TokenType.ISNULL, 845 TokenType.ILIKE, 846 TokenType.INSERT, 847 TokenType.LIKE, 848 TokenType.LOCALTIME, 849 TokenType.LOCALTIMESTAMP, 850 TokenType.MERGE, 851 TokenType.NEXT, 852 TokenType.OFFSET, 853 TokenType.PRIMARY_KEY, 854 TokenType.RANGE, 855 TokenType.REPLACE, 856 TokenType.RLIKE, 857 TokenType.ROW, 858 TokenType.SESSION_USER, 859 TokenType.UNNEST, 860 TokenType.VAR, 861 TokenType.LEFT, 862 TokenType.RIGHT, 863 TokenType.SEQUENCE, 864 TokenType.DATE, 865 TokenType.DATETIME, 866 TokenType.TABLE, 867 TokenType.TIMESTAMP, 868 TokenType.TIMESTAMPTZ, 869 TokenType.TRUNCATE, 870 TokenType.UTC_DATE, 871 TokenType.UTC_TIME, 872 TokenType.UTC_TIMESTAMP, 873 TokenType.WINDOW, 874 TokenType.XOR, 875 *TYPE_TOKENS, 876 *SUBQUERY_PREDICATES, 877 } 878 879 CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 880 TokenType.AND: exp.And, 881 } 882 883 ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 884 TokenType.COLON_EQ: exp.PropertyEQ, 885 } 886 887 DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = { 888 TokenType.OR: exp.Or, 889 } 890 891 EQUALITY: t.ClassVar = { 892 TokenType.EQ: exp.EQ, 893 TokenType.NEQ: exp.NEQ, 894 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 895 } 896 897 COMPARISON: t.ClassVar = { 898 TokenType.GT: exp.GT, 899 TokenType.GTE: exp.GTE, 900 TokenType.LT: exp.LT, 901 TokenType.LTE: exp.LTE, 902 } 903 904 BITWISE: t.ClassVar = { 905 TokenType.AMP: exp.BitwiseAnd, 906 TokenType.CARET: exp.BitwiseXor, 907 TokenType.PIPE: exp.BitwiseOr, 908 } 909 910 TERM: t.ClassVar = { 911 TokenType.DASH: exp.Sub, 912 TokenType.PLUS: exp.Add, 913 TokenType.MOD: exp.Mod, 914 TokenType.COLLATE: exp.Collate, 915 } 916 917 FACTOR: t.ClassVar = { 918 TokenType.DIV: exp.IntDiv, 919 TokenType.LR_ARROW: exp.Distance, 920 TokenType.LLRR_ARROW: exp.DistanceNd, 921 TokenType.SLASH: exp.Div, 922 TokenType.STAR: exp.Mul, 923 } 924 925 EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {} 926 927 TIMES: t.ClassVar = { 928 TokenType.TIME, 929 TokenType.TIMETZ, 930 } 931 932 TIMESTAMPS: t.ClassVar = { 933 TokenType.TIMESTAMP, 934 TokenType.TIMESTAMPNTZ, 935 TokenType.TIMESTAMPTZ, 936 TokenType.TIMESTAMPLTZ, 937 *TIMES, 938 } 939 940 SET_OPERATIONS: t.ClassVar = { 941 TokenType.UNION, 942 TokenType.INTERSECT, 943 TokenType.EXCEPT, 944 } 945 946 JOIN_METHODS: t.ClassVar = { 947 TokenType.ASOF, 948 TokenType.NATURAL, 949 TokenType.POSITIONAL, 950 } 951 952 JOIN_SIDES: t.ClassVar = { 953 TokenType.LEFT, 954 TokenType.RIGHT, 955 TokenType.FULL, 956 } 957 958 JOIN_KINDS: t.ClassVar = { 959 TokenType.ANTI, 960 TokenType.CROSS, 961 TokenType.INNER, 962 TokenType.OUTER, 963 TokenType.SEMI, 964 TokenType.STRAIGHT_JOIN, 965 } 966 967 JOIN_HINTS: t.ClassVar[set[str]] = set() 968 969 # Tokens that unambiguously end a table reference on the fast path 970 TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset( 971 { 972 TokenType.COMMA, 973 TokenType.GROUP_BY, 974 TokenType.HAVING, 975 TokenType.JOIN, 976 TokenType.LIMIT, 977 TokenType.ON, 978 TokenType.ORDER_BY, 979 TokenType.R_PAREN, 980 TokenType.SEMICOLON, 981 TokenType.SENTINEL, 982 TokenType.WHERE, 983 *SET_OPERATIONS, 984 *JOIN_KINDS, 985 *JOIN_METHODS, 986 *JOIN_SIDES, 987 } 988 ) 989 990 LAMBDAS: t.ClassVar = { 991 TokenType.ARROW: lambda self, expressions: self.expression( 992 exp.Lambda( 993 this=self._replace_lambda( 994 self._parse_disjunction(), 995 expressions, 996 ), 997 expressions=expressions, 998 ) 999 ), 1000 TokenType.FARROW: lambda self, expressions: self.expression( 1001 exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction()) 1002 ), 1003 } 1004 1005 # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake 1006 TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False 1007 1008 LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN}) 1009 1010 COLUMN_OPERATORS: t.ClassVar = { 1011 TokenType.DOT: None, 1012 TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)), 1013 TokenType.DCOLON: lambda self, this, to: self.build_cast( 1014 strict=self.STRICT_CAST, this=this, to=to 1015 ), 1016 TokenType.ARROW: lambda self, this, path: self.expression( 1017 exp.JSONExtract( 1018 this=this, 1019 expression=self.dialect.to_json_path(path), 1020 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1021 ) 1022 ), 1023 TokenType.DARROW: lambda self, this, path: self.expression( 1024 exp.JSONExtractScalar( 1025 this=this, 1026 expression=self.dialect.to_json_path(path), 1027 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 1028 scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY, 1029 ) 1030 ), 1031 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 1032 exp.JSONBExtract(this=this, expression=path) 1033 ), 1034 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 1035 exp.JSONBExtractScalar(this=this, expression=path) 1036 ), 1037 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 1038 exp.JSONBContains(this=this, expression=key) 1039 ), 1040 } 1041 1042 CAST_COLUMN_OPERATORS: t.ClassVar = { 1043 TokenType.DOTCOLON, 1044 TokenType.DCOLON, 1045 } 1046 1047 EXPRESSION_PARSERS: t.ClassVar = { 1048 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1049 exp.Column: lambda self: self._parse_column(), 1050 exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()), 1051 exp.Condition: lambda self: self._parse_disjunction(), 1052 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 1053 exp.Expr: lambda self: self._parse_expression(), 1054 exp.From: lambda self: self._parse_from(joins=True), 1055 exp.GrantPrincipal: lambda self: self._parse_grant_principal(), 1056 exp.GrantPrivilege: lambda self: self._parse_grant_privilege(), 1057 exp.Group: lambda self: self._parse_group(), 1058 exp.Having: lambda self: self._parse_having(), 1059 exp.Hint: lambda self: self._parse_hint_body(), 1060 exp.Identifier: lambda self: self._parse_id_var(), 1061 exp.Join: lambda self: self._parse_join(), 1062 exp.Lambda: lambda self: self._parse_lambda(), 1063 exp.Lateral: lambda self: self._parse_lateral(), 1064 exp.Limit: lambda self: self._parse_limit(), 1065 exp.Offset: lambda self: self._parse_offset(), 1066 exp.Order: lambda self: self._parse_order(), 1067 exp.Ordered: lambda self: self._parse_ordered(), 1068 exp.Properties: lambda self: self._parse_properties(), 1069 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 1070 exp.Qualify: lambda self: self._parse_qualify(), 1071 exp.Returning: lambda self: self._parse_returning(), 1072 exp.Select: lambda self: self._parse_select(), 1073 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 1074 exp.Table: lambda self: self._parse_table_parts(), 1075 exp.TableAlias: lambda self: self._parse_table_alias(), 1076 exp.Tuple: lambda self: self._parse_value(values=False), 1077 exp.Whens: lambda self: self._parse_when_matched(), 1078 exp.Where: lambda self: self._parse_where(), 1079 exp.Window: lambda self: self._parse_named_window(), 1080 exp.With: lambda self: self._parse_with(), 1081 } 1082 1083 STATEMENT_PARSERS: t.ClassVar = { 1084 TokenType.ALTER: lambda self: self._parse_alter(), 1085 TokenType.ANALYZE: lambda self: self._parse_analyze(), 1086 TokenType.BEGIN: lambda self: self._parse_transaction(), 1087 TokenType.CACHE: lambda self: self._parse_cache(), 1088 TokenType.COMMENT: lambda self: self._parse_comment(), 1089 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 1090 TokenType.COPY: lambda self: self._parse_copy(), 1091 TokenType.CREATE: lambda self: self._parse_create(), 1092 TokenType.DELETE: lambda self: self._parse_delete(), 1093 TokenType.DESC: lambda self: self._parse_describe(), 1094 TokenType.DESCRIBE: lambda self: self._parse_describe(), 1095 TokenType.DROP: lambda self: self._parse_drop(), 1096 TokenType.GRANT: lambda self: self._parse_grant(), 1097 TokenType.REVOKE: lambda self: self._parse_revoke(), 1098 TokenType.INSERT: lambda self: self._parse_insert(), 1099 TokenType.KILL: lambda self: self._parse_kill(), 1100 TokenType.LOAD: lambda self: self._parse_load(), 1101 TokenType.MERGE: lambda self: self._parse_merge(), 1102 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 1103 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())), 1104 TokenType.REFRESH: lambda self: self._parse_refresh(), 1105 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 1106 TokenType.SET: lambda self: self._parse_set(), 1107 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 1108 TokenType.UNCACHE: lambda self: self._parse_uncache(), 1109 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 1110 TokenType.UPDATE: lambda self: self._parse_update(), 1111 TokenType.USE: lambda self: self._parse_use(), 1112 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 1113 } 1114 1115 UNARY_PARSERS: t.ClassVar = { 1116 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 1117 TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())), 1118 TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())), 1119 TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())), 1120 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())), 1121 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())), 1122 } 1123 1124 STRING_PARSERS: t.ClassVar = { 1125 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 1126 exp.RawString(this=token.text), token 1127 ), 1128 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 1129 exp.National(this=token.text), token 1130 ), 1131 TokenType.RAW_STRING: lambda self, token: self.expression( 1132 exp.RawString(this=token.text), token 1133 ), 1134 TokenType.STRING: lambda self, token: self.expression( 1135 exp.Literal(this=token.text, is_string=True), token 1136 ), 1137 TokenType.UNICODE_STRING: lambda self, token: self.expression( 1138 exp.UnicodeString( 1139 this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string() 1140 ), 1141 token, 1142 ), 1143 } 1144 1145 NUMERIC_PARSERS: t.ClassVar = { 1146 TokenType.BIT_STRING: lambda self, token: self.expression( 1147 exp.BitString(this=token.text), token 1148 ), 1149 TokenType.BYTE_STRING: lambda self, token: self.expression( 1150 exp.ByteString( 1151 this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None 1152 ), 1153 token, 1154 ), 1155 TokenType.HEX_STRING: lambda self, token: self.expression( 1156 exp.HexString( 1157 this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None 1158 ), 1159 token, 1160 ), 1161 TokenType.NUMBER: lambda self, token: self.expression( 1162 exp.Literal(this=token.text, is_string=False), token 1163 ), 1164 } 1165 1166 PRIMARY_PARSERS: t.ClassVar = { 1167 **STRING_PARSERS, 1168 **NUMERIC_PARSERS, 1169 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 1170 TokenType.NULL: lambda self, _: self.expression(exp.Null()), 1171 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)), 1172 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)), 1173 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 1174 TokenType.STAR: lambda self, _: self._parse_star_ops(), 1175 } 1176 1177 PLACEHOLDER_PARSERS: t.ClassVar = { 1178 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()), 1179 TokenType.PARAMETER: lambda self: self._parse_parameter(), 1180 TokenType.COLON: lambda self: ( 1181 self.expression(exp.Placeholder(this=self._prev.text)) 1182 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 1183 else None 1184 ), 1185 } 1186 1187 RANGE_PARSERS: t.ClassVar = { 1188 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 1189 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 1190 TokenType.GLOB: binary_range_parser(exp.Glob), 1191 TokenType.ILIKE: binary_range_parser(exp.ILike), 1192 TokenType.IN: lambda self, this: self._parse_in(this), 1193 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 1194 TokenType.IS: lambda self, this: self._parse_is(this), 1195 TokenType.LIKE: binary_range_parser(exp.Like), 1196 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 1197 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 1198 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 1199 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 1200 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 1201 TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys), 1202 TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys), 1203 TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath), 1204 TokenType.ADJACENT: binary_range_parser(exp.Adjacent), 1205 TokenType.OPERATOR: lambda self, this: self._parse_operator(this), 1206 TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft), 1207 TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight), 1208 } 1209 1210 PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = { 1211 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 1212 "AS": lambda self, query: self._build_pipe_cte( 1213 query, [exp.Star()], self._parse_table_alias() 1214 ), 1215 "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False), 1216 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 1217 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 1218 "ORDER BY": lambda self, query: query.order_by( 1219 self._parse_order(), append=False, copy=False 1220 ), 1221 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1222 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 1223 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 1224 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 1225 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 1226 } 1227 1228 PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1229 "ALLOWED_VALUES": lambda self: self.expression( 1230 exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary)) 1231 ), 1232 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 1233 "AUTO": lambda self: self._parse_auto_property(), 1234 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 1235 "BACKUP": lambda self: self.expression( 1236 exp.BackupProperty(this=self._parse_var(any_token=True)) 1237 ), 1238 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 1239 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1240 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 1241 "CHECKSUM": lambda self: self._parse_checksum(), 1242 "CLUSTER BY": lambda self: self._parse_cluster_property(), 1243 "CLUSTERED": lambda self: self._parse_clustered_by(), 1244 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 1245 exp.CollateProperty, **kwargs 1246 ), 1247 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 1248 "CONTAINS": lambda self: self._parse_contains_property(), 1249 "COPY": lambda self: self._parse_copy_property(), 1250 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 1251 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 1252 "DEFINER": lambda self: self._parse_definer(), 1253 "DETERMINISTIC": lambda self: self.expression( 1254 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1255 ), 1256 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 1257 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 1258 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()), 1259 "DISTKEY": lambda self: self._parse_distkey(), 1260 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 1261 "EMPTY": lambda self: self.expression(exp.EmptyProperty()), 1262 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 1263 "ENVIRONMENT": lambda self: self.expression( 1264 exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment)) 1265 ), 1266 "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty), 1267 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1268 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()), 1269 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1270 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1271 "FREESPACE": lambda self: self._parse_freespace(), 1272 "GLOBAL": lambda self: self.expression(exp.GlobalProperty()), 1273 "HEAP": lambda self: self.expression(exp.HeapProperty()), 1274 "ICEBERG": lambda self: self.expression(exp.IcebergProperty()), 1275 "IMMUTABLE": lambda self: self.expression( 1276 exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE")) 1277 ), 1278 "INHERITS": lambda self: self.expression( 1279 exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table)) 1280 ), 1281 "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())), 1282 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1283 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1284 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1285 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1286 "LIKE": lambda self: self._parse_create_like(), 1287 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1288 "LOCK": lambda self: self._parse_locking(), 1289 "LOCKING": lambda self: self._parse_locking(), 1290 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1291 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()), 1292 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1293 "MODIFIES": lambda self: self._parse_modifies_property(), 1294 "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)), 1295 "NO": lambda self: self._parse_no_property(), 1296 "ON": lambda self: self._parse_on_property(), 1297 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1298 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())), 1299 "PARTITION": lambda self: self._parse_partitioned_of(), 1300 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1301 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1302 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1303 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1304 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1305 "READS": lambda self: self._parse_reads_property(), 1306 "REMOTE": lambda self: self._parse_remote_with_connection(), 1307 "RETURNS": lambda self: self._parse_returns(), 1308 "STRICT": lambda self: self.expression(exp.StrictProperty()), 1309 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()), 1310 "ROW": lambda self: self._parse_row(), 1311 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1312 "SAMPLE": lambda self: self.expression( 1313 exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise()) 1314 ), 1315 "SECURE": lambda self: self.expression(exp.SecureProperty()), 1316 "SECURITY": lambda self: self._parse_sql_security(), 1317 "SQL SECURITY": lambda self: self._parse_sql_security(), 1318 "SET": lambda self: self.expression(exp.SetProperty(multi=False)), 1319 "SETTINGS": lambda self: self._parse_settings_property(), 1320 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1321 "SORTKEY": lambda self: self._parse_sortkey(), 1322 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1323 "STABLE": lambda self: self.expression( 1324 exp.StabilityProperty(this=exp.Literal.string("STABLE")) 1325 ), 1326 "STORED": lambda self: self._parse_stored(), 1327 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1328 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1329 "TEMP": lambda self: self.expression(exp.TemporaryProperty()), 1330 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()), 1331 "TO": lambda self: self._parse_to_table(), 1332 "TRANSIENT": lambda self: self.expression(exp.TransientProperty()), 1333 "TRANSFORM": lambda self: self.expression( 1334 exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression)) 1335 ), 1336 "TTL": lambda self: self._parse_ttl(), 1337 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1338 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()), 1339 "VOLATILE": lambda self: self._parse_volatile_property(), 1340 "WITH": lambda self: self._parse_with_property(), 1341 } 1342 1343 CONSTRAINT_PARSERS: t.ClassVar = { 1344 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1345 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1346 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)), 1347 "CHARACTER SET": lambda self: self.expression( 1348 exp.CharacterSetColumnConstraint(this=self._parse_var_or_string()) 1349 ), 1350 "CHECK": lambda self: self._parse_check_constraint(), 1351 "COLLATE": lambda self: self.expression( 1352 exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column()) 1353 ), 1354 "COMMENT": lambda self: self.expression( 1355 exp.CommentColumnConstraint(this=self._parse_string()) 1356 ), 1357 "COMPRESS": lambda self: self._parse_compress(), 1358 "CLUSTERED": lambda self: self.expression( 1359 exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1360 ), 1361 "NONCLUSTERED": lambda self: self.expression( 1362 exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered)) 1363 ), 1364 "DEFAULT": lambda self: self.expression( 1365 exp.DefaultColumnConstraint(this=self._parse_bitwise()) 1366 ), 1367 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())), 1368 "EPHEMERAL": lambda self: self.expression( 1369 exp.EphemeralColumnConstraint(this=self._parse_bitwise()) 1370 ), 1371 "EXCLUDE": lambda self: self.expression( 1372 exp.ExcludeColumnConstraint(this=self._parse_index_params()) 1373 ), 1374 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1375 "FORMAT": lambda self: self.expression( 1376 exp.DateFormatColumnConstraint(this=self._parse_var_or_string()) 1377 ), 1378 "GENERATED": lambda self: self._parse_generated_as_identity(), 1379 "IDENTITY": lambda self: self._parse_auto_increment(), 1380 "INLINE": lambda self: self._parse_inline(), 1381 "LIKE": lambda self: self._parse_create_like(), 1382 "NOT": lambda self: self._parse_not_constraint(), 1383 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)), 1384 "ON": lambda self: ( 1385 ( 1386 self._match(TokenType.UPDATE) 1387 and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function())) 1388 ) 1389 or self.expression(exp.OnProperty(this=self._parse_id_var())) 1390 ), 1391 "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())), 1392 "PERIOD": lambda self: self._parse_period_for_system_time(), 1393 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1394 "REFERENCES": lambda self: self._parse_references(match=False), 1395 "TITLE": lambda self: self.expression( 1396 exp.TitleColumnConstraint(this=self._parse_var_or_string()) 1397 ), 1398 "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])), 1399 "UNIQUE": lambda self: self._parse_unique(), 1400 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()), 1401 "WITH": lambda self: self.expression( 1402 exp.Properties(expressions=self._parse_wrapped_properties()) 1403 ), 1404 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1405 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1406 } 1407 1408 def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None: 1409 if not self._match(TokenType.L_PAREN, advance=False): 1410 # Partitioning by bucket or truncate follows the syntax: 1411 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1412 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1413 self._retreat(self._index - 1) 1414 return None 1415 1416 klass = ( 1417 exp.PartitionedByBucket 1418 if self._prev.text.upper() == "BUCKET" 1419 else exp.PartitionByTruncate 1420 ) 1421 1422 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1423 this, expression = seq_get(args, 0), seq_get(args, 1) 1424 1425 if isinstance(this, exp.Literal): 1426 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1427 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1428 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1429 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1430 # 1431 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1432 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1433 this, expression = expression, this 1434 1435 return self.expression(klass(this=this, expression=expression)) 1436 1437 ALTER_PARSERS: t.ClassVar = { 1438 "ADD": lambda self: self._parse_alter_table_add(), 1439 "AS": lambda self: self._parse_select(), 1440 "ALTER": lambda self: self._parse_alter_table_alter(), 1441 "CLUSTER BY": lambda self: self._parse_cluster_property(), 1442 "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())), 1443 "DROP": lambda self: self._parse_alter_table_drop(), 1444 "RENAME": lambda self: self._parse_alter_table_rename(), 1445 "SET": lambda self: self._parse_alter_table_set(), 1446 "SWAP": lambda self: self.expression( 1447 exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True)) 1448 ), 1449 } 1450 1451 ALTER_ALTER_PARSERS: t.ClassVar = { 1452 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1453 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1454 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1455 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1456 } 1457 1458 SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = { 1459 "CHECK", 1460 "EXCLUDE", 1461 "FOREIGN KEY", 1462 "LIKE", 1463 "PERIOD", 1464 "PRIMARY KEY", 1465 "UNIQUE", 1466 "BUCKET", 1467 "TRUNCATE", 1468 } 1469 1470 NO_PAREN_FUNCTION_PARSERS: t.ClassVar = { 1471 "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())), 1472 "CASE": lambda self: self._parse_case(), 1473 "CONNECT_BY_ROOT": lambda self: self.expression( 1474 exp.ConnectByRoot(this=self._parse_column()) 1475 ), 1476 "IF": lambda self: self._parse_if(), 1477 } 1478 1479 INVALID_FUNC_NAME_TOKENS: t.ClassVar = { 1480 TokenType.IDENTIFIER, 1481 TokenType.STRING, 1482 } 1483 1484 FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"} 1485 1486 KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1487 1488 FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = { 1489 **{ 1490 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1491 }, 1492 **{ 1493 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1494 }, 1495 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1496 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1497 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1498 "CHAR": lambda self: self._parse_char(), 1499 "CHR": lambda self: self._parse_char(), 1500 "DECODE": lambda self: self._parse_decode(), 1501 "EXTRACT": lambda self: self._parse_extract(), 1502 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1503 "GAP_FILL": lambda self: self._parse_gap_fill(), 1504 "INITCAP": lambda self: self._parse_initcap(), 1505 "JSON_OBJECT": lambda self: self._parse_json_object(), 1506 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1507 "JSON_TABLE": lambda self: self._parse_json_table(), 1508 "MATCH": lambda self: self._parse_match_against(), 1509 "NORMALIZE": lambda self: self._parse_normalize(), 1510 "OPENJSON": lambda self: self._parse_open_json(), 1511 "OVERLAY": lambda self: self._parse_overlay(), 1512 "POSITION": lambda self: self._parse_position(), 1513 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1514 "STRING_AGG": lambda self: self._parse_string_agg(), 1515 "SUBSTRING": lambda self: self._parse_substring(), 1516 "TRIM": lambda self: self._parse_trim(), 1517 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1518 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1519 "XMLELEMENT": lambda self: self._parse_xml_element(), 1520 "XMLTABLE": lambda self: self._parse_xml_table(), 1521 } 1522 1523 QUERY_MODIFIER_PARSERS: t.ClassVar = { 1524 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1525 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1526 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1527 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1528 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1529 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1530 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1531 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1532 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1533 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1534 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1535 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1536 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1537 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1538 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1539 TokenType.CLUSTER_BY: lambda self: ( 1540 "cluster", 1541 self._parse_cluster(), 1542 ), 1543 TokenType.DISTRIBUTE_BY: lambda self: ( 1544 "distribute", 1545 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1546 ), 1547 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1548 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1549 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1550 } 1551 QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS) 1552 1553 SET_PARSERS: t.ClassVar = { 1554 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1555 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1556 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1557 "TRANSACTION": lambda self: self._parse_set_transaction(), 1558 } 1559 1560 SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {} 1561 1562 TYPE_LITERAL_PARSERS: t.ClassVar = { 1563 exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)), 1564 } 1565 1566 TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {} 1567 1568 DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1569 1570 PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1571 1572 TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1573 TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = { 1574 "ISOLATION": ( 1575 ("LEVEL", "REPEATABLE", "READ"), 1576 ("LEVEL", "READ", "COMMITTED"), 1577 ("LEVEL", "READ", "UNCOMITTED"), 1578 ("LEVEL", "SERIALIZABLE"), 1579 ), 1580 "READ": ("WRITE", "ONLY"), 1581 } 1582 1583 CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = { 1584 **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()), 1585 "DO": ("NOTHING", "UPDATE"), 1586 } 1587 1588 TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = { 1589 "INSTEAD": (("OF",),), 1590 "BEFORE": tuple(), 1591 "AFTER": tuple(), 1592 } 1593 1594 TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = { 1595 "NOT": (("DEFERRABLE",),), 1596 "DEFERRABLE": tuple(), 1597 } 1598 1599 CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = { 1600 "SCALE": ("EXTEND", "NOEXTEND"), 1601 "SHARD": ("EXTEND", "NOEXTEND"), 1602 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1603 **dict.fromkeys( 1604 ( 1605 "SESSION", 1606 "GLOBAL", 1607 "KEEP", 1608 "NOKEEP", 1609 "ORDER", 1610 "NOORDER", 1611 "NOCACHE", 1612 "CYCLE", 1613 "NOCYCLE", 1614 "NOMINVALUE", 1615 "NOMAXVALUE", 1616 "NOSCALE", 1617 "NOSHARD", 1618 ), 1619 tuple(), 1620 ), 1621 } 1622 1623 ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")} 1624 1625 USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1626 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1627 ) 1628 1629 CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1630 1631 SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1632 "TYPE": ("EVOLUTION",), 1633 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1634 } 1635 1636 PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {} 1637 1638 EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys( 1639 ("CALLER", "SELF", "OWNER"), tuple() 1640 ) 1641 1642 KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1643 "NOT": ("ENFORCED",), 1644 "MATCH": ( 1645 "FULL", 1646 "PARTIAL", 1647 "SIMPLE", 1648 ), 1649 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1650 "USING": ( 1651 "BTREE", 1652 "HASH", 1653 ), 1654 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1655 } 1656 1657 WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = { 1658 "NO": ("OTHERS",), 1659 "CURRENT": ("ROW",), 1660 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1661 } 1662 1663 INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1664 1665 CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"} 1666 HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"} 1667 HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1668 1669 OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"} 1670 1671 OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN} 1672 1673 TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1674 1675 VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1676 1677 WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS} 1678 WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER} 1679 WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"} 1680 1681 JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1682 1683 FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1684 1685 ADD_CONSTRAINT_TOKENS: t.ClassVar = { 1686 TokenType.CONSTRAINT, 1687 TokenType.FOREIGN_KEY, 1688 TokenType.INDEX, 1689 TokenType.KEY, 1690 TokenType.PRIMARY_KEY, 1691 TokenType.UNIQUE, 1692 } 1693 1694 DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT} 1695 1696 UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1697 1698 SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1699 1700 COPY_INTO_VARLEN_OPTIONS: t.ClassVar = { 1701 "FILE_FORMAT", 1702 "COPY_OPTIONS", 1703 "FORMAT_OPTIONS", 1704 "CREDENTIAL", 1705 } 1706 1707 IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1708 1709 ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {} 1710 1711 ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1712 1713 PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1714 1715 # The style options for the DESCRIBE statement 1716 DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1717 1718 SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"} 1719 1720 # The style options for the ANALYZE statement 1721 ANALYZE_STYLES: t.ClassVar = { 1722 "BUFFER_USAGE_LIMIT", 1723 "FULL", 1724 "LOCAL", 1725 "NO_WRITE_TO_BINLOG", 1726 "SAMPLE", 1727 "SKIP_LOCKED", 1728 "VERBOSE", 1729 } 1730 1731 ANALYZE_EXPRESSION_PARSERS: t.ClassVar = { 1732 "ALL": lambda self: self._parse_analyze_columns(), 1733 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1734 "DELETE": lambda self: self._parse_analyze_delete(), 1735 "DROP": lambda self: self._parse_analyze_histogram(), 1736 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1737 "LIST": lambda self: self._parse_analyze_list(), 1738 "PREDICATE": lambda self: self._parse_analyze_columns(), 1739 "UPDATE": lambda self: self._parse_analyze_histogram(), 1740 "VALIDATE": lambda self: self._parse_analyze_validate(), 1741 } 1742 1743 PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"} 1744 1745 AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET) 1746 1747 OPERATION_MODIFIERS: t.ClassVar[set[str]] = set() 1748 1749 RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"} 1750 1751 SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"} 1752 1753 MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values) 1754 1755 STRICT_CAST: t.ClassVar = True 1756 1757 PREFIXED_PIVOT_COLUMNS: t.ClassVar = False 1758 IDENTIFY_PIVOT_STRINGS: t.ClassVar = False 1759 # Controls when an aggregation's name is included in a pivoted column's name: 1760 # "agg_name_if_aliased" - only for aggregations that carry an explicit alias 1761 # "agg_name_if_aliased_or_multiple" - if aliased, or whenever there are multiple aggregations 1762 # "agg_name_if_multiple" - only when there are multiple aggregations (a lone agg is value-only) 1763 PIVOT_COLUMN_NAMING: t.ClassVar[str] = "agg_name_if_aliased" 1764 1765 LOG_DEFAULTS_TO_LN: t.ClassVar = False 1766 1767 # Whether the table sample clause expects CSV syntax 1768 TABLESAMPLE_CSV: t.ClassVar = False 1769 1770 # The default method used for table sampling 1771 DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None 1772 1773 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1774 SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True 1775 1776 # Whether the TRIM function expects the characters to trim as its first argument 1777 TRIM_PATTERN_FIRST: t.ClassVar = False 1778 1779 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1780 STRING_ALIASES: t.ClassVar = False 1781 1782 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1783 MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True 1784 SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"} 1785 1786 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1787 NO_PAREN_IF_COMMANDS: t.ClassVar = True 1788 1789 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1790 JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False 1791 1792 # Whether the `:` operator is used to extract a value from a VARIANT column 1793 COLON_IS_VARIANT_EXTRACT: t.ClassVar = False 1794 1795 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1796 # If this is True and '(' is not found, the keyword will be treated as an identifier 1797 VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True 1798 1799 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1800 SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False 1801 1802 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1803 INTERVAL_SPANS: t.ClassVar = True 1804 1805 # Whether a PARTITION clause can follow a table reference 1806 SUPPORTS_PARTITION_SELECTION: t.ClassVar = False 1807 1808 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1809 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True 1810 1811 # Whether the 'AS' keyword is optional in the CTE definition syntax 1812 OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True 1813 1814 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1815 ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True 1816 1817 # Whether Alter statements are allowed to contain Partition specifications 1818 ALTER_TABLE_PARTITIONS: t.ClassVar = False 1819 1820 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1821 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1822 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1823 # as BigQuery, where all joins have the same precedence. 1824 JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False 1825 1826 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1827 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False 1828 1829 # Whether map literals support arbitrary expressions as keys. 1830 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1831 # When False, keys are typically restricted to identifiers. 1832 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False 1833 1834 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1835 # is true for Snowflake but not for BigQuery which can also process strings 1836 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False 1837 1838 # Dialects like Databricks support JOINS without join criteria 1839 # Adding an ON TRUE, makes transpilation semantically correct for other dialects 1840 ADD_JOIN_ON_TRUE: t.ClassVar = False 1841 1842 # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]' 1843 # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND` 1844 SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False 1845 1846 SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS) 1847 SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS) 1848 1849 def __init__( 1850 self, 1851 error_level: ErrorLevel | None = None, 1852 error_message_context: int = 100, 1853 max_errors: int = 3, 1854 max_nodes: int = -1, 1855 dialect: DialectType = None, 1856 ): 1857 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1858 self.error_message_context: int = error_message_context 1859 self.max_errors: int = max_errors 1860 self.max_nodes: int = max_nodes 1861 self.dialect: t.Any = _resolve_dialect(dialect) 1862 self.sql: str = "" 1863 self.errors: list[ParseError] = [] 1864 self._tokens: list[Token] = [] 1865 self._tokens_size: i64 = 0 1866 self._index: i64 = 0 1867 self._curr: Token = SENTINEL_NONE 1868 self._next: Token = SENTINEL_NONE 1869 self._prev: Token = SENTINEL_NONE 1870 self._prev_comments: list[str] = [] 1871 self._pipe_cte_counter: int = 0 1872 self._chunks: list[list[Token]] = [] 1873 self._chunk_index: i64 = 0 1874 self._node_count: int = 0 1875 1876 def reset(self) -> None: 1877 self.sql = "" 1878 self.errors = [] 1879 self._tokens = [] 1880 self._tokens_size = 0 1881 self._index = 0 1882 self._curr = SENTINEL_NONE 1883 self._next = SENTINEL_NONE 1884 self._prev = SENTINEL_NONE 1885 self._prev_comments = [] 1886 self._pipe_cte_counter = 0 1887 self._chunks = [] 1888 self._chunk_index = 0 1889 self._node_count = 0 1890 1891 def _advance(self, times: i64 = 1) -> None: 1892 index = self._index + times 1893 self._index = index 1894 tokens = self._tokens 1895 size = self._tokens_size 1896 self._curr = tokens[index] if index < size else SENTINEL_NONE 1897 self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE 1898 1899 if index > 0: 1900 prev = tokens[index - 1] 1901 self._prev = prev 1902 self._prev_comments = prev.comments 1903 else: 1904 self._prev = SENTINEL_NONE 1905 self._prev_comments = [] 1906 1907 def _advance_chunk(self) -> None: 1908 self._index = -1 1909 self._tokens = self._chunks[self._chunk_index] 1910 self._tokens_size = i64(len(self._tokens)) 1911 self._chunk_index += 1 1912 self._advance() 1913 1914 def _retreat(self, index: i64) -> None: 1915 if index != self._index: 1916 self._advance(index - self._index) 1917 1918 def _add_comments(self, expression: exp.Expr | None) -> None: 1919 if expression and self._prev_comments: 1920 expression.add_comments(self._prev_comments) 1921 self._prev_comments = [] 1922 1923 def _match( 1924 self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None 1925 ) -> bool: 1926 if self._curr.token_type == token_type: 1927 if advance: 1928 self._advance() 1929 self._add_comments(expression) 1930 return True 1931 return False 1932 1933 def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool: 1934 if self._curr.token_type in types: 1935 if advance: 1936 self._advance() 1937 return True 1938 return False 1939 1940 def _match_pair( 1941 self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True 1942 ) -> bool: 1943 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 1944 if advance: 1945 self._advance(2) 1946 return True 1947 return False 1948 1949 def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool: 1950 if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts: 1951 if advance: 1952 self._advance() 1953 return True 1954 return False 1955 1956 def _match_text_seq(self, *texts: str, advance: bool = True) -> bool: 1957 index = self._index 1958 string_type = TokenType.STRING 1959 for text in texts: 1960 if self._curr.token_type != string_type and self._curr.text.upper() == text: 1961 self._advance() 1962 else: 1963 self._retreat(index) 1964 return False 1965 1966 if not advance: 1967 self._retreat(index) 1968 1969 return True 1970 1971 def _is_connected(self) -> bool: 1972 prev = self._prev 1973 curr = self._curr 1974 return bool(prev and curr and prev.end + 1 == curr.start) 1975 1976 def _find_sql(self, start: Token, end: Token) -> str: 1977 return self.sql[start.start : end.end + 1] 1978 1979 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1980 token = token or self._curr or self._prev or Token.string("") 1981 formatted_sql, start_context, highlight, end_context = highlight_sql( 1982 sql=self.sql, 1983 positions=[(token.start, token.end)], 1984 context_length=self.error_message_context, 1985 ) 1986 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1987 1988 error = ParseError.new( 1989 formatted_message, 1990 description=message, 1991 line=token.line, 1992 col=token.col, 1993 start_context=start_context, 1994 highlight=highlight, 1995 end_context=end_context, 1996 ) 1997 1998 if self.error_level == ErrorLevel.IMMEDIATE: 1999 raise error 2000 2001 self.errors.append(error) 2002 2003 def validate_expression(self, expression: E, args: list | None = None) -> E: 2004 if self.max_nodes > -1: 2005 self._node_count += 1 2006 if self._node_count > self.max_nodes: 2007 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2008 if self.error_level != ErrorLevel.IGNORE: 2009 for error_message in expression.error_messages(args): 2010 self.raise_error(error_message) 2011 return expression 2012 2013 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None: 2014 index = self._index 2015 error_level = self.error_level 2016 this: T | None = None 2017 2018 self.error_level = ErrorLevel.IMMEDIATE 2019 try: 2020 this = parse_method() 2021 except ParseError: 2022 this = None 2023 finally: 2024 if not this or retreat: 2025 self._retreat(index) 2026 self.error_level = error_level 2027 2028 return this 2029 2030 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2031 """ 2032 Parses a list of tokens and returns a list of syntax trees, one tree 2033 per parsed SQL statement. 2034 2035 Args: 2036 raw_tokens: The list of tokens. 2037 sql: The original SQL string. 2038 2039 Returns: 2040 The list of the produced syntax trees. 2041 """ 2042 return self._parse( 2043 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2044 ) 2045 2046 def parse_into( 2047 self, 2048 expression_types: exp.IntoType, 2049 raw_tokens: list[Token], 2050 sql: str | None = None, 2051 ) -> list[exp.Expr | None]: 2052 """ 2053 Parses a list of tokens into a given Expr type. If a collection of Expr 2054 types is given instead, this method will try to parse the token list into each one 2055 of them, stopping at the first for which the parsing succeeds. 2056 2057 Args: 2058 expression_types: The expression type(s) to try and parse the token list into. 2059 raw_tokens: The list of tokens. 2060 sql: The original SQL string, used to produce helpful debug messages. 2061 2062 Returns: 2063 The target Expr. 2064 """ 2065 errors = [] 2066 for expression_type in ensure_list(expression_types): 2067 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2068 if not parser: 2069 raise TypeError(f"No parser registered for {expression_type}") 2070 2071 try: 2072 return self._parse(parser, raw_tokens, sql) 2073 except ParseError as e: 2074 e.errors[0]["into_expression"] = expression_type 2075 errors.append(e) 2076 2077 raise ParseError( 2078 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2079 errors=merge_errors(errors), 2080 ) from errors[-1] 2081 2082 def check_errors(self) -> None: 2083 """Logs or raises any found errors, depending on the chosen error level setting.""" 2084 if self.error_level == ErrorLevel.WARN: 2085 for error in self.errors: 2086 logger.error(str(error)) 2087 elif self.error_level == ErrorLevel.RAISE and self.errors: 2088 raise ParseError( 2089 concat_messages(self.errors, self.max_errors), 2090 errors=merge_errors(self.errors), 2091 ) 2092 2093 def expression( 2094 self, 2095 instance: E, 2096 token: Token | None = None, 2097 comments: list[str] | None = None, 2098 ) -> E: 2099 if token: 2100 instance.update_positions(token) 2101 instance.add_comments(comments) if comments else self._add_comments(instance) 2102 if not instance.is_primitive: 2103 instance = self.validate_expression(instance) 2104 return instance 2105 2106 def _parse_batch_statements( 2107 self, 2108 parse_method: t.Callable[[Parser], exp.Expr | None], 2109 sep_first_statement: bool = True, 2110 ) -> list[exp.Expr | None]: 2111 expressions = [] 2112 2113 # Chunkification binds if/while statements with the first statement of the body 2114 if sep_first_statement: 2115 self._match(TokenType.BEGIN) 2116 expressions.append(parse_method(self)) 2117 2118 chunks_length = len(self._chunks) 2119 while self._chunk_index < chunks_length: 2120 self._advance_chunk() 2121 2122 if self._match(TokenType.ELSE, advance=False): 2123 return expressions 2124 2125 if expressions and not self._next and self._match(TokenType.END): 2126 expressions.append(exp.EndStatement()) 2127 continue 2128 2129 expressions.append(parse_method(self)) 2130 2131 if self._index < self._tokens_size: 2132 self.raise_error("Invalid expression / Unexpected token") 2133 2134 self.check_errors() 2135 2136 return expressions 2137 2138 def _parse( 2139 self, 2140 parse_method: t.Callable[[Parser], exp.Expr | None], 2141 raw_tokens: list[Token], 2142 sql: str | None = None, 2143 ) -> list[exp.Expr | None]: 2144 self.reset() 2145 self.sql = sql or "" 2146 2147 total = len(raw_tokens) 2148 chunks: list[list[Token]] = [[]] 2149 2150 for i, token in enumerate(raw_tokens): 2151 if token.token_type == TokenType.SEMICOLON: 2152 if token.comments: 2153 chunks.append([token]) 2154 2155 if i < total - 1: 2156 chunks.append([]) 2157 else: 2158 chunks[-1].append(token) 2159 2160 self._chunks = chunks 2161 2162 return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False) 2163 2164 def _warn_unsupported(self) -> None: 2165 if self._tokens_size <= 1: 2166 return 2167 2168 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 2169 # interested in emitting a warning for the one being currently processed. 2170 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 2171 2172 logger.warning( 2173 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 2174 ) 2175 2176 def _parse_command(self) -> exp.Command: 2177 self._warn_unsupported() 2178 comments = self._prev_comments 2179 return self.expression( 2180 exp.Command(this=self._prev.text.upper(), expression=self._parse_string()), 2181 comments=comments, 2182 ) 2183 2184 def _parse_comment(self, allow_exists: bool = True) -> exp.Expr: 2185 start = self._prev 2186 exists = self._parse_exists() if allow_exists else None 2187 2188 self._match(TokenType.ON) 2189 2190 materialized = self._match_text_seq("MATERIALIZED") 2191 kind = self._match_set(self.CREATABLES) and self._prev 2192 if not kind: 2193 return self._parse_as_command(start) 2194 2195 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2196 this = self._parse_user_defined_function(kind=kind.token_type) 2197 elif kind.token_type == TokenType.TABLE: 2198 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 2199 elif kind.token_type == TokenType.COLUMN: 2200 this = self._parse_column() 2201 else: 2202 this = self._parse_table_parts(schema=True) 2203 2204 self._match(TokenType.IS) 2205 2206 return self.expression( 2207 exp.Comment( 2208 this=this, 2209 kind=kind.text, 2210 expression=self._parse_string(), 2211 exists=exists, 2212 materialized=materialized, 2213 ) 2214 ) 2215 2216 def _parse_to_table( 2217 self, 2218 ) -> exp.ToTableProperty: 2219 table = self._parse_table_parts(schema=True) 2220 return self.expression(exp.ToTableProperty(this=table)) 2221 2222 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 2223 def _parse_ttl(self) -> exp.Expr: 2224 def _parse_ttl_action() -> exp.Expr | None: 2225 this = self._parse_bitwise() 2226 2227 if self._match_text_seq("DELETE"): 2228 return self.expression(exp.MergeTreeTTLAction(this=this, delete=True)) 2229 if self._match_text_seq("RECOMPRESS"): 2230 return self.expression( 2231 exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise()) 2232 ) 2233 if self._match_text_seq("TO", "DISK"): 2234 return self.expression( 2235 exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string()) 2236 ) 2237 if self._match_text_seq("TO", "VOLUME"): 2238 return self.expression( 2239 exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string()) 2240 ) 2241 2242 return this 2243 2244 expressions = self._parse_csv(_parse_ttl_action) 2245 where = self._parse_where() 2246 group = self._parse_group() 2247 2248 aggregates = None 2249 if group and self._match(TokenType.SET): 2250 aggregates = self._parse_csv(self._parse_set_item) 2251 2252 return self.expression( 2253 exp.MergeTreeTTL( 2254 expressions=expressions, where=where, group=group, aggregates=aggregates 2255 ) 2256 ) 2257 2258 def _parse_condition(self) -> exp.Expr | None: 2259 return self._parse_wrapped(parse_method=self._parse_expression, optional=True) 2260 2261 def _parse_block(self) -> exp.Block: 2262 return self.expression( 2263 exp.Block( 2264 expressions=self._parse_batch_statements( 2265 parse_method=lambda self: self._parse_statement() 2266 ) 2267 ) 2268 ) 2269 2270 def _parse_whileblock(self) -> exp.WhileBlock: 2271 return self.expression( 2272 exp.WhileBlock(this=self._parse_condition(), body=self._parse_block()) 2273 ) 2274 2275 def _parse_statement(self) -> exp.Expr | None: 2276 if not self._curr: 2277 return None 2278 2279 if self._match_set(self.STATEMENT_PARSERS): 2280 comments = self._prev_comments 2281 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 2282 stmt.add_comments(comments, prepend=True) 2283 return stmt 2284 2285 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 2286 return self._parse_command() 2287 2288 if self._match_text_seq("WHILE"): 2289 return self._parse_whileblock() 2290 2291 expression = self._parse_expression() 2292 expression = self._parse_set_operations(expression) if expression else self._parse_select() 2293 2294 if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False): 2295 expression = self._parse_pipe_syntax_query(expression) 2296 2297 return self._parse_query_modifiers(expression) 2298 2299 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 2300 start = self._prev 2301 temporary = self._match(TokenType.TEMPORARY) 2302 materialized = self._match_text_seq("MATERIALIZED") 2303 iceberg = self._match_text_seq("ICEBERG") 2304 2305 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 2306 if not kind or (iceberg and kind and kind != "TABLE"): 2307 return self._parse_as_command(start) 2308 2309 concurrently = self._match_text_seq("CONCURRENTLY") 2310 if_exists = exists or self._parse_exists() 2311 2312 if kind == "COLUMN": 2313 this = self._parse_column() 2314 else: 2315 this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA") 2316 2317 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 2318 2319 if self._match(TokenType.L_PAREN, advance=False): 2320 expressions = self._parse_wrapped_csv(self._parse_types) 2321 else: 2322 expressions = None 2323 2324 cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper() 2325 2326 return self.expression( 2327 exp.Drop( 2328 exists=if_exists, 2329 this=this, 2330 expressions=expressions, 2331 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 2332 temporary=temporary, 2333 materialized=materialized, 2334 cascade=cascade_or_restrict == "CASCADE", 2335 restrict=cascade_or_restrict == "RESTRICT", 2336 constraints=self._match_text_seq("CONSTRAINTS"), 2337 purge=self._match_text_seq("PURGE"), 2338 cluster=cluster, 2339 concurrently=concurrently, 2340 sync=self._match_text_seq("SYNC"), 2341 iceberg=iceberg, 2342 ) 2343 ) 2344 2345 def _parse_exists(self, not_: bool = False) -> bool | None: 2346 return ( 2347 self._match_text_seq("IF") 2348 and (not not_ or self._match(TokenType.NOT)) 2349 and self._match(TokenType.EXISTS) 2350 ) 2351 2352 def _parse_create(self) -> exp.Create | exp.Command: 2353 # Note: this can't be None because we've matched a statement parser 2354 start = self._prev 2355 2356 replace = ( 2357 start.token_type == TokenType.REPLACE 2358 or self._match_pair(TokenType.OR, TokenType.REPLACE) 2359 or self._match_pair(TokenType.OR, TokenType.ALTER) 2360 ) 2361 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 2362 2363 unique = self._match(TokenType.UNIQUE) 2364 2365 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 2366 clustered = True 2367 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 2368 "COLUMNSTORE" 2369 ): 2370 clustered = False 2371 else: 2372 clustered = None 2373 2374 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2375 self._advance() 2376 2377 properties = None 2378 create_token = self._match_set(self.CREATABLES) and self._prev 2379 2380 if not create_token: 2381 # exp.Properties.Location.POST_CREATE 2382 properties = self._parse_properties() 2383 create_token = self._match_set(self.CREATABLES) and self._prev 2384 2385 if not properties or not create_token: 2386 return self._parse_as_command(start) 2387 2388 create_token_type = t.cast(Token, create_token).token_type 2389 2390 concurrently = self._match_text_seq("CONCURRENTLY") 2391 exists = self._parse_exists(not_=True) 2392 this = None 2393 expression: exp.Expr | None = None 2394 indexes = None 2395 no_schema_binding = None 2396 begin = None 2397 clone = None 2398 2399 def extend_props(temp_props: exp.Properties | None) -> None: 2400 nonlocal properties 2401 if properties and temp_props: 2402 properties.expressions.extend(temp_props.expressions) 2403 elif temp_props: 2404 properties = temp_props 2405 2406 if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2407 this = self._parse_user_defined_function(kind=create_token_type) 2408 2409 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2410 extend_props(self._parse_properties()) 2411 2412 expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None 2413 2414 if ( 2415 not expression 2416 and create_token_type == TokenType.FUNCTION 2417 and isinstance(this, exp.UserDefinedFunction) 2418 and this.args.get("wrapped") 2419 ): 2420 pre_table_index = self._index 2421 is_table = self._match(TokenType.TABLE) 2422 2423 expression = self._parse_expression() 2424 overload_mode = bool( 2425 expression 2426 and self._curr.token_type == TokenType.COMMA 2427 and self._next.token_type == TokenType.L_PAREN 2428 ) 2429 if not overload_mode: 2430 self._retreat(pre_table_index) 2431 is_table = False 2432 expression = None 2433 else: 2434 is_table = False 2435 overload_mode = False 2436 2437 extend_props(self._parse_function_properties()) 2438 2439 if not expression: 2440 if self._match(TokenType.COMMAND): 2441 expression = self._parse_as_command(self._prev) 2442 else: 2443 begin = self._match(TokenType.BEGIN) 2444 return_ = self._match_text_seq("RETURN") 2445 2446 if self._match(TokenType.STRING, advance=False): 2447 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2448 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2449 expression = self._parse_string() 2450 extend_props(self._parse_properties()) 2451 else: 2452 expression = ( 2453 self._parse_user_defined_function_expression() 2454 if create_token_type == TokenType.FUNCTION 2455 else self._parse_block() 2456 ) 2457 2458 if return_: 2459 expression = self.expression(exp.Return(this=expression)) 2460 2461 if overload_mode and expression: 2462 expression = self._parse_macro_overloads( 2463 t.cast(exp.UserDefinedFunction, this), expression, is_table 2464 ) 2465 elif create_token_type == TokenType.INDEX: 2466 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2467 if not self._match(TokenType.ON): 2468 index = self._parse_id_var() 2469 anonymous = False 2470 else: 2471 index = None 2472 anonymous = True 2473 2474 this = self._parse_index(index=index, anonymous=anonymous) 2475 elif ( 2476 create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER) 2477 ) or create_token_type == TokenType.TRIGGER: 2478 if is_constraint := (create_token_type == TokenType.CONSTRAINT): 2479 create_token = self._prev 2480 2481 trigger_name = self._parse_id_var() 2482 if not trigger_name: 2483 return self._parse_as_command(start) 2484 2485 timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False) 2486 timing = timing_var.this if timing_var else None 2487 if not timing: 2488 return self._parse_as_command(start) 2489 2490 events = self._parse_trigger_events() 2491 if not self._match(TokenType.ON): 2492 self.raise_error("Expected ON in trigger definition") 2493 2494 table = self._parse_table_parts() 2495 referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None 2496 deferrable, initially = self._parse_trigger_deferrable() 2497 referencing = self._parse_trigger_referencing() 2498 for_each = self._parse_trigger_for_each() 2499 when = self._match_text_seq("WHEN") and self._parse_wrapped( 2500 self._parse_disjunction, optional=True 2501 ) 2502 execute = self._parse_trigger_execute() 2503 2504 if execute is None: 2505 return self._parse_as_command(start) 2506 2507 trigger_props = self.expression( 2508 exp.TriggerProperties( 2509 table=table, 2510 timing=timing, 2511 events=events, 2512 execute=execute, 2513 constraint=is_constraint, 2514 referenced_table=referenced_table, 2515 deferrable=deferrable, 2516 initially=initially, 2517 referencing=referencing, 2518 for_each=for_each, 2519 when=when, 2520 ) 2521 ) 2522 2523 this = trigger_name 2524 extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else [])) 2525 elif create_token_type == TokenType.TYPE: 2526 this = self._parse_table_parts(schema=True) 2527 if not this or not self._match(TokenType.ALIAS): 2528 return self._parse_as_command(start) 2529 2530 if self._match(TokenType.ENUM): 2531 expression = exp.DataType( 2532 this=exp.DType.ENUM, 2533 expressions=self._parse_wrapped_csv(self._parse_string), 2534 ) 2535 elif self._match(TokenType.L_PAREN, advance=False): 2536 expression = self._parse_schema() 2537 else: 2538 return self._parse_as_command(start) 2539 elif create_token_type in self.DB_CREATABLES: 2540 table_parts = self._parse_table_parts( 2541 schema=True, is_db_reference=create_token_type == TokenType.SCHEMA 2542 ) 2543 2544 # exp.Properties.Location.POST_NAME 2545 self._match(TokenType.COMMA) 2546 extend_props(self._parse_properties(before=True)) 2547 2548 this = self._parse_schema(this=table_parts) 2549 2550 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2551 extend_props(self._parse_properties()) 2552 2553 has_alias = self._match(TokenType.ALIAS) 2554 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2555 # exp.Properties.Location.POST_ALIAS 2556 extend_props(self._parse_properties()) 2557 2558 if create_token_type == TokenType.SEQUENCE: 2559 expression = self._parse_types() 2560 props = self._parse_properties() 2561 if props: 2562 sequence_props = exp.SequenceProperties() 2563 options = [] 2564 for prop in props: 2565 if isinstance(prop, exp.SequenceProperties): 2566 for arg, value in prop.args.items(): 2567 if arg == "options": 2568 options.extend(value) 2569 else: 2570 sequence_props.set(arg, value) 2571 prop.pop() 2572 2573 if options: 2574 sequence_props.set("options", options) 2575 2576 props.append("expressions", sequence_props) 2577 extend_props(props) 2578 else: 2579 expression = self._parse_ddl_select() 2580 2581 # Some dialects also support using a table as an alias instead of a SELECT. 2582 # Here we fallback to this as an alternative. 2583 if not expression and has_alias: 2584 expression = self._try_parse(self._parse_table_parts) 2585 2586 if create_token_type == TokenType.TABLE: 2587 # exp.Properties.Location.POST_EXPRESSION 2588 extend_props(self._parse_properties()) 2589 2590 indexes = [] 2591 while True: 2592 index = self._parse_index() 2593 2594 # exp.Properties.Location.POST_INDEX 2595 extend_props(self._parse_properties()) 2596 if not index: 2597 break 2598 else: 2599 self._match(TokenType.COMMA) 2600 indexes.append(index) 2601 elif create_token_type == TokenType.VIEW: 2602 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2603 no_schema_binding = True 2604 elif create_token_type in (TokenType.SINK, TokenType.SOURCE): 2605 extend_props(self._parse_properties()) 2606 2607 shallow = self._match_text_seq("SHALLOW") 2608 2609 if self._match_texts(self.CLONE_KEYWORDS): 2610 copy = self._prev.text.lower() == "copy" 2611 clone = self.expression( 2612 exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy) 2613 ) 2614 2615 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2616 return self._parse_as_command(start) 2617 2618 create_kind_text = create_token.text.upper() 2619 return self.expression( 2620 exp.Create( 2621 this=this, 2622 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2623 replace=replace, 2624 refresh=refresh, 2625 unique=unique, 2626 expression=expression, 2627 exists=exists, 2628 properties=properties, 2629 indexes=indexes, 2630 no_schema_binding=no_schema_binding, 2631 begin=begin, 2632 clone=clone, 2633 concurrently=concurrently, 2634 clustered=clustered, 2635 ) 2636 ) 2637 2638 def _parse_sequence_properties(self) -> exp.SequenceProperties | None: 2639 seq = exp.SequenceProperties() 2640 2641 options = [] 2642 index = self._index 2643 2644 while self._curr: 2645 self._match(TokenType.COMMA) 2646 if self._match_text_seq("INCREMENT"): 2647 self._match_text_seq("BY") 2648 self._match_text_seq("=") 2649 seq.set("increment", self._parse_term()) 2650 elif self._match_text_seq("MINVALUE"): 2651 seq.set("minvalue", self._parse_term()) 2652 elif self._match_text_seq("MAXVALUE"): 2653 seq.set("maxvalue", self._parse_term()) 2654 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2655 self._match_text_seq("=") 2656 seq.set("start", self._parse_term()) 2657 elif self._match_text_seq("CACHE"): 2658 # T-SQL allows empty CACHE which is initialized dynamically 2659 seq.set("cache", self._parse_number() or True) 2660 elif self._match_text_seq("OWNED", "BY"): 2661 # "OWNED BY NONE" is the default 2662 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2663 else: 2664 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2665 if opt: 2666 options.append(opt) 2667 else: 2668 break 2669 2670 seq.set("options", options if options else None) 2671 return None if self._index == index else seq 2672 2673 def _parse_trigger_events(self) -> list[exp.TriggerEvent]: 2674 events = [] 2675 2676 while True: 2677 event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper() 2678 2679 if not event_type: 2680 self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)") 2681 2682 columns = ( 2683 self._parse_csv(self._parse_column) 2684 if event_type == "UPDATE" and self._match_text_seq("OF") 2685 else None 2686 ) 2687 2688 events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns))) 2689 2690 if not self._match(TokenType.OR): 2691 break 2692 2693 return events 2694 2695 def _parse_trigger_deferrable( 2696 self, 2697 ) -> tuple[str | None, str | None]: 2698 deferrable_var = self._parse_var_from_options( 2699 self.TRIGGER_DEFERRABLE, raise_unmatched=False 2700 ) 2701 deferrable = deferrable_var.this if deferrable_var else None 2702 2703 initially = None 2704 if deferrable and self._match_text_seq("INITIALLY"): 2705 initially = ( 2706 self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None 2707 ) 2708 2709 return deferrable, initially 2710 2711 def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None: 2712 if not self._match_text_seq(keyword): 2713 return None 2714 if not self._match_text_seq("TABLE"): 2715 self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause") 2716 self._match_text_seq("AS") 2717 return self._parse_id_var() 2718 2719 def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None: 2720 if not self._match_text_seq("REFERENCING"): 2721 return None 2722 2723 old_alias = None 2724 new_alias = None 2725 2726 while True: 2727 if alias := self._parse_trigger_referencing_clause("OLD"): 2728 if old_alias is not None: 2729 self.raise_error("Duplicate OLD clause in REFERENCING") 2730 old_alias = alias 2731 elif alias := self._parse_trigger_referencing_clause("NEW"): 2732 if new_alias is not None: 2733 self.raise_error("Duplicate NEW clause in REFERENCING") 2734 new_alias = alias 2735 else: 2736 break 2737 2738 if old_alias is None and new_alias is None: 2739 self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE") 2740 2741 return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias)) 2742 2743 def _parse_trigger_for_each(self) -> str | None: 2744 if not self._match_text_seq("FOR", "EACH"): 2745 return None 2746 2747 return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None 2748 2749 def _parse_trigger_execute(self) -> exp.TriggerExecute | None: 2750 if not self._match(TokenType.EXECUTE): 2751 return None 2752 2753 if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)): 2754 self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE") 2755 2756 func_call = self._parse_column() 2757 return self.expression(exp.TriggerExecute(this=func_call)) 2758 2759 def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None: 2760 # only used for teradata currently 2761 self._match(TokenType.COMMA) 2762 2763 kwargs = { 2764 "no": self._match_text_seq("NO"), 2765 "dual": self._match_text_seq("DUAL"), 2766 "before": self._match_text_seq("BEFORE"), 2767 "default": self._match_text_seq("DEFAULT"), 2768 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2769 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2770 "after": self._match_text_seq("AFTER"), 2771 "minimum": self._match_texts(("MIN", "MINIMUM")), 2772 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2773 } 2774 2775 if self._match_texts(self.PROPERTY_PARSERS): 2776 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2777 try: 2778 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2779 except TypeError: 2780 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2781 2782 return None 2783 2784 def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]: 2785 return self._parse_wrapped_csv(self._parse_property) 2786 2787 def _parse_property(self) -> exp.Expr | list[exp.Expr] | None: 2788 if self._match_texts(self.PROPERTY_PARSERS): 2789 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2790 2791 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2792 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2793 2794 if self._match_text_seq("COMPOUND", "SORTKEY"): 2795 return self._parse_sortkey(compound=True) 2796 2797 if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"): 2798 return self.expression(exp.ParameterStyleProperty(this="PANDAS")) 2799 2800 index = self._index 2801 2802 seq_props = self._parse_sequence_properties() 2803 if seq_props: 2804 return seq_props 2805 2806 self._retreat(index) 2807 key = self._parse_column() 2808 2809 if not self._match(TokenType.EQ): 2810 self._retreat(index) 2811 return None 2812 2813 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2814 if isinstance(key, exp.Column): 2815 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2816 2817 value = self._parse_bitwise() or self._parse_var(any_token=True) 2818 2819 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2820 if isinstance(value, exp.Column): 2821 value = exp.var(value.name) 2822 2823 return self.expression(exp.Property(this=key, value=value)) 2824 2825 def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty: 2826 if self._match_text_seq("BY"): 2827 return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string())) 2828 2829 self._match(TokenType.ALIAS) 2830 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2831 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2832 2833 return self.expression( 2834 exp.FileFormatProperty( 2835 this=( 2836 self.expression( 2837 exp.InputOutputFormat( 2838 input_format=input_format, output_format=output_format 2839 ) 2840 ) 2841 if input_format or output_format 2842 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2843 ), 2844 hive_format=True, 2845 ) 2846 ) 2847 2848 def _parse_unquoted_field(self) -> exp.Expr | None: 2849 field = self._parse_field() 2850 if isinstance(field, exp.Identifier) and not field.quoted: 2851 field = exp.var(field) 2852 2853 return field 2854 2855 def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E: 2856 self._match(TokenType.EQ) 2857 self._match(TokenType.ALIAS) 2858 2859 return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs)) 2860 2861 def _parse_properties(self, before: bool | None = None) -> exp.Properties | None: 2862 properties = [] 2863 while True: 2864 if before: 2865 prop = self._parse_property_before() 2866 else: 2867 prop = self._parse_property() 2868 if not prop: 2869 break 2870 for p in ensure_list(prop): 2871 properties.append(p) 2872 2873 if properties: 2874 return self.expression(exp.Properties(expressions=properties)) 2875 2876 return None 2877 2878 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2879 return self.expression( 2880 exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION")) 2881 ) 2882 2883 def _parse_sql_security(self) -> exp.SqlSecurityProperty: 2884 return self.expression( 2885 exp.SqlSecurityProperty( 2886 this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper() 2887 ) 2888 ) 2889 2890 def _parse_settings_property(self) -> exp.SettingsProperty: 2891 return self.expression( 2892 exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment)) 2893 ) 2894 2895 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2896 if self._index >= 2: 2897 pre_volatile_token = self._tokens[self._index - 2] 2898 else: 2899 pre_volatile_token = None 2900 2901 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2902 return exp.VolatileProperty() 2903 2904 return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE"))) 2905 2906 def _parse_retention_period(self) -> exp.Var: 2907 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2908 number = self._parse_number() 2909 number_str = f"{number} " if number else "" 2910 unit = self._parse_var(any_token=True) 2911 return exp.var(f"{number_str}{unit}") 2912 2913 def _parse_system_versioning_property( 2914 self, with_: bool = False 2915 ) -> exp.WithSystemVersioningProperty: 2916 self._match(TokenType.EQ) 2917 prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_)) 2918 2919 if self._match_text_seq("OFF"): 2920 prop.set("on", False) 2921 return prop 2922 2923 self._match(TokenType.ON) 2924 if self._match(TokenType.L_PAREN): 2925 while self._curr and not self._match(TokenType.R_PAREN): 2926 if self._match_text_seq("HISTORY_TABLE", "="): 2927 prop.set("this", self._parse_table_parts()) 2928 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2929 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2930 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2931 prop.set("retention_period", self._parse_retention_period()) 2932 2933 self._match(TokenType.COMMA) 2934 2935 return prop 2936 2937 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2938 self._match(TokenType.EQ) 2939 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2940 prop = self.expression(exp.DataDeletionProperty(on=on)) 2941 2942 if self._match(TokenType.L_PAREN): 2943 while self._curr and not self._match(TokenType.R_PAREN): 2944 if self._match_text_seq("FILTER_COLUMN", "="): 2945 prop.set("filter_column", self._parse_column()) 2946 elif self._match_text_seq("RETENTION_PERIOD", "="): 2947 prop.set("retention_period", self._parse_retention_period()) 2948 2949 self._match(TokenType.COMMA) 2950 2951 return prop 2952 2953 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2954 kind = "HASH" 2955 expressions: list[exp.Expr] | None = None 2956 if self._match_text_seq("BY", "HASH"): 2957 expressions = self._parse_wrapped_csv(self._parse_id_var) 2958 elif self._match_text_seq("BY", "RANDOM"): 2959 kind = "RANDOM" 2960 2961 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2962 buckets: exp.Expr | None = None 2963 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2964 buckets = self._parse_number() 2965 2966 return self.expression( 2967 exp.DistributedByProperty( 2968 expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order() 2969 ) 2970 ) 2971 2972 def _parse_composite_key_property(self, expr_type: type[E]) -> E: 2973 self._match_text_seq("KEY") 2974 expressions = self._parse_wrapped_id_vars() 2975 return self.expression(expr_type(expressions=expressions)) 2976 2977 def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]: 2978 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2979 prop = self._parse_system_versioning_property(with_=True) 2980 self._match_r_paren() 2981 return prop 2982 2983 if self._match(TokenType.L_PAREN, advance=False): 2984 result: list[exp.Expr] = [] 2985 for i in self._parse_wrapped_properties(): 2986 result.extend(i) if isinstance(i, list) else result.append(i) 2987 return result 2988 2989 if self._match_text_seq("JOURNAL"): 2990 return self._parse_withjournaltable() 2991 2992 if self._match_texts(self.VIEW_ATTRIBUTES): 2993 return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper())) 2994 2995 if self._match_text_seq("DATA"): 2996 return self._parse_withdata(no=False) 2997 elif self._match_text_seq("NO", "DATA"): 2998 return self._parse_withdata(no=True) 2999 3000 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 3001 return self._parse_serde_properties(with_=True) 3002 3003 if self._match(TokenType.SCHEMA): 3004 return self.expression( 3005 exp.WithSchemaBindingProperty( 3006 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS) 3007 ) 3008 ) 3009 3010 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 3011 return self.expression( 3012 exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option)) 3013 ) 3014 3015 if not self._next: 3016 return None 3017 3018 return self._parse_withisolatedloading() 3019 3020 def _parse_procedure_option(self) -> exp.Expr | None: 3021 if self._match_text_seq("EXECUTE", "AS"): 3022 return self.expression( 3023 exp.ExecuteAsProperty( 3024 this=self._parse_var_from_options( 3025 self.EXECUTE_AS_OPTIONS, raise_unmatched=False 3026 ) 3027 or self._parse_string() 3028 ) 3029 ) 3030 3031 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 3032 3033 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 3034 def _parse_definer(self) -> exp.DefinerProperty | None: 3035 self._match(TokenType.EQ) 3036 3037 user = self._parse_id_var() 3038 self._match(TokenType.PARAMETER) 3039 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 3040 3041 if not user or not host: 3042 return None 3043 3044 return exp.DefinerProperty(this=f"{user}@{host}") 3045 3046 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 3047 self._match(TokenType.TABLE) 3048 self._match(TokenType.EQ) 3049 return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts())) 3050 3051 def _parse_log(self, no: bool = False) -> exp.LogProperty: 3052 return self.expression(exp.LogProperty(no=no)) 3053 3054 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 3055 return self.expression(exp.JournalProperty(**kwargs)) 3056 3057 def _parse_checksum(self) -> exp.ChecksumProperty: 3058 self._match(TokenType.EQ) 3059 3060 on = None 3061 if self._match(TokenType.ON): 3062 on = True 3063 elif self._match_text_seq("OFF"): 3064 on = False 3065 3066 return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT))) 3067 3068 def _parse_cluster(self) -> exp.Cluster: 3069 self._match(TokenType.CLUSTER_BY) 3070 return self.expression( 3071 exp.Cluster( 3072 expressions=self._parse_csv(self._parse_column), 3073 ) 3074 ) 3075 3076 def _parse_cluster_property(self) -> exp.ClusterProperty: 3077 return self.expression( 3078 exp.ClusterProperty( 3079 expressions=self._parse_wrapped_csv(self._parse_column), 3080 ) 3081 ) 3082 3083 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 3084 self._match_text_seq("BY") 3085 3086 self._match_l_paren() 3087 expressions = self._parse_csv(self._parse_column) 3088 self._match_r_paren() 3089 3090 if self._match_text_seq("SORTED", "BY"): 3091 self._match_l_paren() 3092 sorted_by = self._parse_csv(self._parse_ordered) 3093 self._match_r_paren() 3094 else: 3095 sorted_by = None 3096 3097 self._match(TokenType.INTO) 3098 buckets = self._parse_number() 3099 self._match_text_seq("BUCKETS") 3100 3101 return self.expression( 3102 exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets) 3103 ) 3104 3105 def _parse_copy_property(self) -> exp.CopyGrantsProperty | None: 3106 if not self._match_text_seq("GRANTS"): 3107 self._retreat(self._index - 1) 3108 return None 3109 3110 return self.expression(exp.CopyGrantsProperty()) 3111 3112 def _parse_freespace(self) -> exp.FreespaceProperty: 3113 self._match(TokenType.EQ) 3114 return self.expression( 3115 exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT)) 3116 ) 3117 3118 def _parse_mergeblockratio( 3119 self, no: bool = False, default: bool = False 3120 ) -> exp.MergeBlockRatioProperty: 3121 if self._match(TokenType.EQ): 3122 return self.expression( 3123 exp.MergeBlockRatioProperty( 3124 this=self._parse_number(), percent=self._match(TokenType.PERCENT) 3125 ) 3126 ) 3127 3128 return self.expression(exp.MergeBlockRatioProperty(no=no, default=default)) 3129 3130 def _parse_datablocksize( 3131 self, 3132 default: bool | None = None, 3133 minimum: bool | None = None, 3134 maximum: bool | None = None, 3135 ) -> exp.DataBlocksizeProperty: 3136 self._match(TokenType.EQ) 3137 size = self._parse_number() 3138 3139 units = None 3140 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 3141 units = self._prev.text 3142 3143 return self.expression( 3144 exp.DataBlocksizeProperty( 3145 size=size, units=units, default=default, minimum=minimum, maximum=maximum 3146 ) 3147 ) 3148 3149 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 3150 self._match(TokenType.EQ) 3151 always = self._match_text_seq("ALWAYS") 3152 manual = self._match_text_seq("MANUAL") 3153 never = self._match_text_seq("NEVER") 3154 default = self._match_text_seq("DEFAULT") 3155 3156 autotemp = None 3157 if self._match_text_seq("AUTOTEMP"): 3158 autotemp = self._parse_schema() 3159 3160 return self.expression( 3161 exp.BlockCompressionProperty( 3162 always=always, manual=manual, never=never, default=default, autotemp=autotemp 3163 ) 3164 ) 3165 3166 def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None: 3167 index = self._index 3168 no = self._match_text_seq("NO") 3169 concurrent = self._match_text_seq("CONCURRENT") 3170 3171 if not self._match_text_seq("ISOLATED", "LOADING"): 3172 self._retreat(index) 3173 return None 3174 3175 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 3176 return self.expression( 3177 exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target) 3178 ) 3179 3180 def _parse_locking(self) -> exp.LockingProperty: 3181 if self._match(TokenType.TABLE): 3182 kind = "TABLE" 3183 elif self._match(TokenType.VIEW): 3184 kind = "VIEW" 3185 elif self._match(TokenType.ROW): 3186 kind = "ROW" 3187 elif self._match_text_seq("DATABASE"): 3188 kind = "DATABASE" 3189 else: 3190 kind = None 3191 3192 if kind in ("DATABASE", "TABLE", "VIEW"): 3193 this = self._parse_table_parts() 3194 else: 3195 this = None 3196 3197 if self._match(TokenType.FOR): 3198 for_or_in = "FOR" 3199 elif self._match(TokenType.IN): 3200 for_or_in = "IN" 3201 else: 3202 for_or_in = None 3203 3204 if self._match_text_seq("ACCESS"): 3205 lock_type = "ACCESS" 3206 elif self._match_texts(("EXCL", "EXCLUSIVE")): 3207 lock_type = "EXCLUSIVE" 3208 elif self._match_text_seq("SHARE"): 3209 lock_type = "SHARE" 3210 elif self._match_text_seq("READ"): 3211 lock_type = "READ" 3212 elif self._match_text_seq("WRITE"): 3213 lock_type = "WRITE" 3214 elif self._match_text_seq("CHECKSUM"): 3215 lock_type = "CHECKSUM" 3216 else: 3217 lock_type = None 3218 3219 override = self._match_text_seq("OVERRIDE") 3220 3221 return self.expression( 3222 exp.LockingProperty( 3223 this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override 3224 ) 3225 ) 3226 3227 def _parse_partition_by(self) -> list[exp.Expr]: 3228 if self._match(TokenType.PARTITION_BY): 3229 return self._parse_csv(self._parse_disjunction) 3230 return [] 3231 3232 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 3233 def _parse_partition_bound_expr() -> exp.Expr | None: 3234 if self._match_text_seq("MINVALUE"): 3235 return exp.var("MINVALUE") 3236 if self._match_text_seq("MAXVALUE"): 3237 return exp.var("MAXVALUE") 3238 return self._parse_bitwise() 3239 3240 this: exp.Expr | list[exp.Expr] | None = None 3241 expression = None 3242 from_expressions = None 3243 to_expressions = None 3244 3245 if self._match(TokenType.IN): 3246 this = self._parse_wrapped_csv(self._parse_bitwise) 3247 elif self._match(TokenType.FROM): 3248 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3249 self._match_text_seq("TO") 3250 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 3251 elif self._match_text_seq("WITH", "(", "MODULUS"): 3252 this = self._parse_number() 3253 self._match_text_seq(",", "REMAINDER") 3254 expression = self._parse_number() 3255 self._match_r_paren() 3256 else: 3257 self.raise_error("Failed to parse partition bound spec.") 3258 3259 return self.expression( 3260 exp.PartitionBoundSpec( 3261 this=this, 3262 expression=expression, 3263 from_expressions=from_expressions, 3264 to_expressions=to_expressions, 3265 ) 3266 ) 3267 3268 # https://www.postgresql.org/docs/current/sql-createtable.html 3269 def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None: 3270 if not self._match_text_seq("OF"): 3271 self._retreat(self._index - 1) 3272 return None 3273 3274 this = self._parse_table(schema=True) 3275 3276 if self._match(TokenType.DEFAULT): 3277 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 3278 elif self._match_text_seq("FOR", "VALUES"): 3279 expression = self._parse_partition_bound_spec() 3280 else: 3281 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 3282 3283 return self.expression(exp.PartitionedOfProperty(this=this, expression=expression)) 3284 3285 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 3286 self._match(TokenType.EQ) 3287 return self.expression( 3288 exp.PartitionedByProperty( 3289 this=self._parse_schema() or self._parse_bracket(self._parse_field()) 3290 ) 3291 ) 3292 3293 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 3294 if self._match_text_seq("AND", "STATISTICS"): 3295 statistics = True 3296 elif self._match_text_seq("AND", "NO", "STATISTICS"): 3297 statistics = False 3298 else: 3299 statistics = None 3300 3301 return self.expression(exp.WithDataProperty(no=no, statistics=statistics)) 3302 3303 def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None: 3304 if self._match_text_seq("SQL"): 3305 return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL")) 3306 return None 3307 3308 def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None: 3309 if self._match_text_seq("SQL", "DATA"): 3310 return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA")) 3311 return None 3312 3313 def _parse_no_property(self) -> exp.Expr | None: 3314 if self._match_text_seq("PRIMARY", "INDEX"): 3315 return exp.NoPrimaryIndexProperty() 3316 if self._match_text_seq("SQL"): 3317 return self.expression(exp.SqlReadWriteProperty(this="NO SQL")) 3318 return None 3319 3320 def _parse_on_property(self) -> exp.Expr | None: 3321 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 3322 return exp.OnCommitProperty() 3323 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 3324 return exp.OnCommitProperty(delete=True) 3325 return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var()))) 3326 3327 def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None: 3328 if self._match_text_seq("SQL", "DATA"): 3329 return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA")) 3330 return None 3331 3332 def _parse_distkey(self) -> exp.DistKeyProperty: 3333 return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var))) 3334 3335 def _parse_create_like(self) -> exp.LikeProperty | None: 3336 table = self._parse_table(schema=True) 3337 3338 options = [] 3339 while self._match_texts(("INCLUDING", "EXCLUDING")): 3340 this = self._prev.text.upper() 3341 3342 id_var = self._parse_id_var() 3343 if not id_var: 3344 return None 3345 3346 options.append( 3347 self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper()))) 3348 ) 3349 3350 return self.expression(exp.LikeProperty(this=table, expressions=options)) 3351 3352 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 3353 return self.expression( 3354 exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound) 3355 ) 3356 3357 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 3358 self._match(TokenType.EQ) 3359 return self.expression( 3360 exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default) 3361 ) 3362 3363 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 3364 self._match_text_seq("WITH", "CONNECTION") 3365 return self.expression( 3366 exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts()) 3367 ) 3368 3369 def _parse_returns(self) -> exp.ReturnsProperty: 3370 value: exp.Expr | None 3371 null = None 3372 is_table = self._match(TokenType.TABLE) 3373 3374 if is_table: 3375 if self._match(TokenType.LT): 3376 value = self.expression( 3377 exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types)) 3378 ) 3379 if not self._match(TokenType.GT): 3380 self.raise_error("Expecting >") 3381 else: 3382 value = self._parse_schema(exp.var("TABLE")) 3383 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 3384 null = True 3385 value = None 3386 else: 3387 value = self._parse_types() 3388 3389 return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null)) 3390 3391 def _parse_describe(self) -> exp.Describe: 3392 kind = self._prev.text if self._match_set(self.CREATABLES) else None 3393 style: str | None = ( 3394 self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None 3395 ) 3396 if self._match(TokenType.DOT): 3397 style = None 3398 self._retreat(self._index - 2) 3399 3400 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 3401 3402 if self._match_set(self.STATEMENT_PARSERS, advance=False): 3403 this = self._parse_statement() 3404 else: 3405 this = self._parse_table(schema=True) 3406 3407 properties = self._parse_properties() 3408 expressions = properties.expressions if properties else None 3409 partition = self._parse_partition() 3410 return self.expression( 3411 exp.Describe( 3412 this=this, 3413 style=style, 3414 kind=kind, 3415 expressions=expressions, 3416 partition=partition, 3417 format=format, 3418 as_json=self._match_text_seq("AS", "JSON"), 3419 ) 3420 ) 3421 3422 def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts: 3423 kind = self._prev.text.upper() 3424 expressions = [] 3425 3426 def parse_conditional_insert() -> exp.ConditionalInsert | None: 3427 if self._match(TokenType.WHEN): 3428 expression = self._parse_disjunction() 3429 self._match(TokenType.THEN) 3430 else: 3431 expression = None 3432 3433 else_ = self._match(TokenType.ELSE) 3434 3435 if not self._match(TokenType.INTO): 3436 return None 3437 3438 return self.expression( 3439 exp.ConditionalInsert( 3440 this=self.expression( 3441 exp.Insert( 3442 this=self._parse_table(schema=True), 3443 expression=self._parse_derived_table_values(), 3444 ) 3445 ), 3446 expression=expression, 3447 else_=else_, 3448 ) 3449 ) 3450 3451 expression = parse_conditional_insert() 3452 while expression is not None: 3453 expressions.append(expression) 3454 expression = parse_conditional_insert() 3455 3456 return self.expression( 3457 exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()), 3458 comments=comments, 3459 ) 3460 3461 def _parse_insert(self) -> exp.Insert | exp.MultitableInserts: 3462 comments: list[str] = [] 3463 hint = self._parse_hint() 3464 overwrite = self._match(TokenType.OVERWRITE) 3465 ignore = self._match(TokenType.IGNORE) 3466 local = self._match_text_seq("LOCAL") 3467 alternative = None 3468 is_function = None 3469 3470 if self._match_text_seq("DIRECTORY"): 3471 this: exp.Expr | None = self.expression( 3472 exp.Directory( 3473 this=self._parse_var_or_string(), 3474 local=local, 3475 row_format=self._parse_row_format(match_row=True), 3476 ) 3477 ) 3478 else: 3479 if self._match_set((TokenType.FIRST, TokenType.ALL)): 3480 comments += ensure_list(self._prev_comments) 3481 return self._parse_multitable_inserts(comments) 3482 3483 if self._match(TokenType.OR): 3484 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 3485 3486 self._match(TokenType.INTO) 3487 comments += ensure_list(self._prev_comments) 3488 self._match(TokenType.TABLE) 3489 is_function = self._match(TokenType.FUNCTION) 3490 3491 this = self._parse_function() if is_function else self._parse_insert_table() 3492 3493 returning = self._parse_returning() # TSQL allows RETURNING before source 3494 3495 return self.expression( 3496 exp.Insert( 3497 hint=hint, 3498 is_function=is_function, 3499 this=this, 3500 stored=self._match_text_seq("STORED") and self._parse_stored(), 3501 by_name=self._match_text_seq("BY", "NAME"), 3502 exists=self._parse_exists(), 3503 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) 3504 and self._parse_disjunction(), 3505 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 3506 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 3507 default=self._match_text_seq("DEFAULT", "VALUES"), 3508 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 3509 conflict=self._parse_on_conflict(), 3510 returning=returning or self._parse_returning(), 3511 overwrite=overwrite, 3512 alternative=alternative, 3513 ignore=ignore, 3514 source=self._match(TokenType.TABLE) and self._parse_table(), 3515 ), 3516 comments=comments, 3517 ) 3518 3519 def _parse_insert_table(self) -> exp.Expr | None: 3520 this = self._parse_table(schema=True, parse_partition=True) 3521 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 3522 this.set("alias", self._parse_table_alias()) 3523 return this 3524 3525 def _parse_kill(self) -> exp.Kill: 3526 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 3527 3528 return self.expression(exp.Kill(this=self._parse_primary(), kind=kind)) 3529 3530 def _parse_on_conflict(self) -> exp.OnConflict | None: 3531 conflict = self._match_text_seq("ON", "CONFLICT") 3532 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 3533 3534 if not conflict and not duplicate: 3535 return None 3536 3537 conflict_keys = None 3538 constraint = None 3539 3540 if conflict: 3541 if self._match_text_seq("ON", "CONSTRAINT"): 3542 constraint = self._parse_id_var() 3543 elif self._match(TokenType.L_PAREN): 3544 conflict_keys = self._parse_csv(self._parse_id_var) 3545 self._match_r_paren() 3546 3547 index_predicate = self._parse_where() 3548 3549 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 3550 if self._prev.token_type == TokenType.UPDATE: 3551 self._match(TokenType.SET) 3552 expressions = self._parse_csv(self._parse_equality) 3553 else: 3554 expressions = None 3555 3556 return self.expression( 3557 exp.OnConflict( 3558 duplicate=duplicate, 3559 expressions=expressions, 3560 action=action, 3561 conflict_keys=conflict_keys, 3562 index_predicate=index_predicate, 3563 constraint=constraint, 3564 where=self._parse_where(), 3565 ) 3566 ) 3567 3568 def _parse_returning(self) -> exp.Returning | None: 3569 if not self._match(TokenType.RETURNING): 3570 return None 3571 return self.expression( 3572 exp.Returning( 3573 expressions=self._parse_csv(self._parse_expression), 3574 into=self._match(TokenType.INTO) and self._parse_table_part(), 3575 ) 3576 ) 3577 3578 def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3579 if not self._match(TokenType.FORMAT): 3580 return None 3581 return self._parse_row_format() 3582 3583 def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None: 3584 index = self._index 3585 with_ = with_ or self._match_text_seq("WITH") 3586 3587 if not self._match(TokenType.SERDE_PROPERTIES): 3588 self._retreat(index) 3589 return None 3590 return self.expression( 3591 exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_) 3592 ) 3593 3594 def _parse_row_format( 3595 self, match_row: bool = False 3596 ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None: 3597 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3598 return None 3599 3600 if self._match_text_seq("SERDE"): 3601 this = self._parse_string() 3602 3603 serde_properties = self._parse_serde_properties() 3604 3605 return self.expression( 3606 exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties) 3607 ) 3608 3609 self._match_text_seq("DELIMITED") 3610 3611 kwargs = {} 3612 3613 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3614 kwargs["fields"] = self._parse_string() 3615 if self._match_text_seq("ESCAPED", "BY"): 3616 kwargs["escaped"] = self._parse_string() 3617 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3618 kwargs["collection_items"] = self._parse_string() 3619 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3620 kwargs["map_keys"] = self._parse_string() 3621 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3622 kwargs["lines"] = self._parse_string() 3623 if self._match_text_seq("NULL", "DEFINED", "AS"): 3624 kwargs["null"] = self._parse_string() 3625 3626 return self.expression(exp.RowFormatDelimitedProperty(**kwargs)) # type: ignore 3627 3628 def _parse_load(self) -> exp.LoadData | exp.Command: 3629 if self._match_text_seq("DATA"): 3630 local = self._match_text_seq("LOCAL") 3631 self._match_text_seq("INPATH") 3632 inpath = self._parse_string() 3633 overwrite = self._match(TokenType.OVERWRITE) 3634 temp: bool | None = None 3635 if self._match(TokenType.INTO): 3636 temp = self._match(TokenType.TEMPORARY) 3637 self._match(TokenType.TABLE) 3638 3639 return self.expression( 3640 exp.LoadData( 3641 this=self._parse_table(schema=True), 3642 local=local, 3643 overwrite=overwrite, 3644 temp=temp, 3645 inpath=inpath, 3646 files=self._match_text_seq("FROM", "FILES") 3647 and exp.Properties(expressions=self._parse_wrapped_properties()), 3648 partition=self._parse_partition(), 3649 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3650 serde=self._match_text_seq("SERDE") and self._parse_string(), 3651 ) 3652 ) 3653 return self._parse_as_command(self._prev) 3654 3655 def _parse_delete(self) -> exp.Delete: 3656 hint = self._parse_hint() 3657 3658 # This handles MySQL's "Multiple-Table Syntax" 3659 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3660 tables = None 3661 if not self._match(TokenType.FROM, advance=False): 3662 tables = self._parse_csv(self._parse_table) or None 3663 3664 returning = self._parse_returning() 3665 3666 return self.expression( 3667 exp.Delete( 3668 hint=hint, 3669 tables=tables, 3670 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3671 using=self._match(TokenType.USING) 3672 and self._parse_csv(lambda: self._parse_table(joins=True)), 3673 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3674 where=self._parse_where(), 3675 returning=returning or self._parse_returning(), 3676 order=self._parse_order(), 3677 limit=self._parse_limit(), 3678 ) 3679 ) 3680 3681 def _parse_update(self) -> exp.Update: 3682 hint = self._parse_hint() 3683 kwargs: dict[str, object] = { 3684 "hint": hint, 3685 "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS), 3686 } 3687 while self._curr: 3688 if self._match(TokenType.SET): 3689 kwargs["expressions"] = self._parse_csv(self._parse_equality) 3690 elif self._match(TokenType.RETURNING, advance=False): 3691 kwargs["returning"] = self._parse_returning() 3692 elif self._match(TokenType.FROM, advance=False): 3693 from_ = self._parse_from(joins=True) 3694 table = from_.this if from_ else None 3695 if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False): 3696 table.set("joins", list(self._parse_joins()) or None) 3697 3698 kwargs["from_"] = from_ 3699 elif self._match(TokenType.WHERE, advance=False): 3700 kwargs["where"] = self._parse_where() 3701 elif self._match(TokenType.ORDER_BY, advance=False): 3702 kwargs["order"] = self._parse_order() 3703 elif self._match(TokenType.LIMIT, advance=False): 3704 kwargs["limit"] = self._parse_limit() 3705 else: 3706 break 3707 3708 return self.expression(exp.Update(**kwargs)) 3709 3710 def _parse_use(self) -> exp.Use: 3711 return self.expression( 3712 exp.Use( 3713 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3714 this=self._parse_table(schema=False), 3715 ) 3716 ) 3717 3718 def _parse_uncache(self) -> exp.Uncache: 3719 if not self._match(TokenType.TABLE): 3720 self.raise_error("Expecting TABLE after UNCACHE") 3721 3722 return self.expression( 3723 exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True)) 3724 ) 3725 3726 def _parse_cache(self) -> exp.Cache: 3727 lazy = self._match_text_seq("LAZY") 3728 self._match(TokenType.TABLE) 3729 table = self._parse_table(schema=True) 3730 3731 options = [] 3732 if self._match_text_seq("OPTIONS"): 3733 self._match_l_paren() 3734 k = self._parse_string() 3735 self._match(TokenType.EQ) 3736 v = self._parse_string() 3737 options = [k, v] 3738 self._match_r_paren() 3739 3740 self._match(TokenType.ALIAS) 3741 return self.expression( 3742 exp.Cache( 3743 this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True) 3744 ) 3745 ) 3746 3747 def _parse_partition(self) -> exp.Partition | None: 3748 if not self._match_texts(self.PARTITION_KEYWORDS): 3749 return None 3750 3751 return self.expression( 3752 exp.Partition( 3753 subpartition=self._prev.text.upper() == "SUBPARTITION", 3754 expressions=self._parse_wrapped_csv(self._parse_disjunction), 3755 ) 3756 ) 3757 3758 def _parse_value(self, values: bool = True) -> exp.Tuple | None: 3759 def _parse_value_expression() -> exp.Expr | None: 3760 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3761 return exp.var(self._prev.text.upper()) 3762 return self._parse_expression() 3763 3764 if self._match(TokenType.L_PAREN): 3765 expressions = self._parse_csv(_parse_value_expression) 3766 self._match_r_paren() 3767 return self.expression(exp.Tuple(expressions=expressions)) 3768 3769 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3770 expression = self._parse_expression() 3771 if expression: 3772 return self.expression(exp.Tuple(expressions=[expression])) 3773 return None 3774 3775 def _parse_projections( 3776 self, 3777 ) -> tuple[list[exp.Expr], list[exp.Expr] | None]: 3778 return self._parse_expressions(), None 3779 3780 def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None: 3781 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3782 this: exp.Expr | None = self._parse_simplified_pivot( 3783 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3784 ) 3785 elif self._match(TokenType.FROM): 3786 from_ = self._parse_from(joins=True, skip_from_token=True, consume_pipe=True) 3787 # Support parentheses for duckdb FROM-first syntax 3788 select = self._parse_select(from_=from_) 3789 if select: 3790 if not select.args.get("from_"): 3791 select.set("from_", from_) 3792 this = select 3793 else: 3794 this = exp.select("*").from_(t.cast(exp.From, from_)) 3795 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3796 else: 3797 this = ( 3798 self._parse_table(consume_pipe=True) 3799 if table 3800 else self._parse_select(nested=True, parse_set_operation=False) 3801 ) 3802 3803 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3804 # in case a modifier (e.g. join) is following 3805 if table and isinstance(this, exp.Values) and this.alias: 3806 alias = this.args["alias"].pop() 3807 this = exp.Table(this=this, alias=alias) 3808 3809 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3810 3811 return this 3812 3813 def _parse_select( 3814 self, 3815 nested: bool = False, 3816 table: bool = False, 3817 parse_subquery_alias: bool = True, 3818 parse_set_operation: bool = True, 3819 consume_pipe: bool = True, 3820 from_: exp.From | None = None, 3821 ) -> exp.Expr | None: 3822 query = self._parse_select_query( 3823 nested=nested, 3824 table=table, 3825 parse_subquery_alias=parse_subquery_alias, 3826 parse_set_operation=parse_set_operation, 3827 ) 3828 3829 if consume_pipe and self._match(TokenType.PIPE_GT, advance=False): 3830 if not query and from_: 3831 query = exp.select("*").from_(from_) 3832 if isinstance(query, exp.Query): 3833 query = self._parse_pipe_syntax_query(query) 3834 query = query.subquery(copy=False) if query and table else query 3835 3836 return query 3837 3838 def _parse_select_query( 3839 self, 3840 nested: bool = False, 3841 table: bool = False, 3842 parse_subquery_alias: bool = True, 3843 parse_set_operation: bool = True, 3844 ) -> exp.Expr | None: 3845 cte = self._parse_with() 3846 3847 if cte: 3848 this = self._parse_statement() 3849 3850 if not this: 3851 self.raise_error("Failed to parse any statement following CTE") 3852 return cte 3853 3854 while isinstance(this, exp.Subquery) and this.is_wrapper: 3855 this = this.this 3856 3857 assert this is not None 3858 if "with_" in this.arg_types: 3859 if inner_cte := this.args.get("with_"): 3860 cte.set("expressions", cte.expressions + inner_cte.expressions) 3861 if inner_cte.args.get("recursive"): 3862 cte.set("recursive", True) 3863 this.set("with_", cte) 3864 else: 3865 self.raise_error(f"{this.key} does not support CTE") 3866 this = cte 3867 3868 return this 3869 3870 # duckdb supports leading with FROM x 3871 from_ = ( 3872 self._parse_from(joins=True, consume_pipe=True) 3873 if self._match(TokenType.FROM, advance=False) 3874 else None 3875 ) 3876 3877 if self._match(TokenType.SELECT): 3878 comments = self._prev_comments 3879 3880 hint = self._parse_hint() 3881 3882 if self._next and not self._next.token_type == TokenType.DOT: 3883 all_ = self._match(TokenType.ALL) 3884 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3885 else: 3886 all_, matched_distinct = None, False 3887 3888 kind = ( 3889 self._prev.text.upper() 3890 if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE")) 3891 else None 3892 ) 3893 3894 distinct: exp.Expr | None = ( 3895 self.expression( 3896 exp.Distinct( 3897 on=self._parse_value(values=False) if self._match(TokenType.ON) else None 3898 ) 3899 ) 3900 if matched_distinct 3901 else None 3902 ) 3903 3904 operation_modifiers = [] 3905 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3906 operation_modifiers.append(exp.var(self._prev.text.upper())) 3907 3908 limit = self._parse_limit(top=True) 3909 3910 # Some dialects (e.g. Redshift, T-SQL) allow SELECT TOP N DISTINCT ... 3911 if limit and not matched_distinct and not all_: 3912 matched_distinct = self._match_set(self.DISTINCT_TOKENS) 3913 if matched_distinct: 3914 distinct = self.expression( 3915 exp.Distinct( 3916 on=self._parse_value(values=False) 3917 if self._match(TokenType.ON) 3918 else None 3919 ) 3920 ) 3921 else: 3922 all_ = self._match(TokenType.ALL) 3923 3924 if all_ and distinct: 3925 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3926 3927 projections, exclude = self._parse_projections() 3928 3929 this = self.expression( 3930 exp.Select( 3931 kind=kind, 3932 hint=hint, 3933 distinct=distinct, 3934 expressions=projections, 3935 limit=limit, 3936 exclude=exclude, 3937 operation_modifiers=operation_modifiers or None, 3938 ) 3939 ) 3940 this.comments = comments 3941 3942 into = self._parse_into() 3943 if into: 3944 this.set("into", into) 3945 3946 if not from_: 3947 from_ = self._parse_from() 3948 3949 if from_: 3950 this.set("from_", from_) 3951 3952 this = self._parse_query_modifiers(this) 3953 elif (table or nested) and self._match(TokenType.L_PAREN): 3954 comments = self._prev_comments 3955 this = self._parse_wrapped_select(table=table) 3956 3957 if this: 3958 this.add_comments(comments, prepend=True) 3959 3960 # We return early here so that the UNION isn't attached to the subquery by the 3961 # following call to _parse_set_operations, but instead becomes the parent node 3962 self._match_r_paren() 3963 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3964 elif self._match(TokenType.VALUES, advance=False): 3965 this = self._parse_derived_table_values() 3966 elif from_: 3967 this = exp.select("*").from_(from_.this, copy=False) 3968 this = self._parse_query_modifiers(this) 3969 elif self._match(TokenType.SUMMARIZE): 3970 table = self._match(TokenType.TABLE) 3971 this = self._parse_select() or self._parse_string() or self._parse_table() 3972 return self.expression(exp.Summarize(this=this, table=table)) 3973 elif self._match(TokenType.DESCRIBE): 3974 this = self._parse_describe() 3975 else: 3976 this = None 3977 3978 return self._parse_set_operations(this) if parse_set_operation else this 3979 3980 def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None: 3981 self._match_text_seq("SEARCH") 3982 3983 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3984 3985 if not kind: 3986 return None 3987 3988 self._match_text_seq("FIRST", "BY") 3989 3990 return self.expression( 3991 exp.RecursiveWithSearch( 3992 kind=kind, 3993 this=self._parse_id_var(), 3994 expression=self._match_text_seq("SET") and self._parse_id_var(), 3995 using=self._match_text_seq("USING") and self._parse_id_var(), 3996 ) 3997 ) 3998 3999 def _parse_with(self, skip_with_token: bool = False) -> exp.With | None: 4000 if not skip_with_token and not self._match(TokenType.WITH): 4001 return None 4002 4003 comments = self._prev_comments 4004 recursive = self._match(TokenType.RECURSIVE) 4005 4006 last_comments = None 4007 expressions = [] 4008 while True: 4009 cte = self._parse_cte() 4010 if isinstance(cte, exp.CTE): 4011 expressions.append(cte) 4012 if last_comments: 4013 cte.add_comments(last_comments) 4014 4015 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 4016 break 4017 else: 4018 self._match(TokenType.WITH) 4019 4020 last_comments = self._prev_comments 4021 4022 return self.expression( 4023 exp.With( 4024 expressions=expressions, 4025 recursive=recursive or None, 4026 search=self._parse_recursive_with_search(), 4027 ), 4028 comments=comments, 4029 ) 4030 4031 def _parse_cte(self) -> exp.CTE | None: 4032 index = self._index 4033 4034 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 4035 if not alias or not alias.this: 4036 self.raise_error("Expected CTE to have alias") 4037 4038 key_expressions = ( 4039 self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None 4040 ) 4041 4042 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 4043 self._retreat(index) 4044 return None 4045 4046 comments = self._prev_comments 4047 4048 if self._match_text_seq("NOT", "MATERIALIZED"): 4049 materialized = False 4050 elif self._match_text_seq("MATERIALIZED"): 4051 materialized = True 4052 else: 4053 materialized = None 4054 4055 cte = self.expression( 4056 exp.CTE( 4057 this=self._parse_wrapped(self._parse_statement), 4058 alias=alias, 4059 materialized=materialized, 4060 key_expressions=key_expressions, 4061 ), 4062 comments=comments, 4063 ) 4064 4065 values = cte.this 4066 if isinstance(values, exp.Values): 4067 if values.alias: 4068 cte.set("this", exp.select("*").from_(values)) 4069 else: 4070 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 4071 4072 return cte 4073 4074 def _parse_table_alias( 4075 self, alias_tokens: t.Collection[TokenType] | None = None 4076 ) -> exp.TableAlias | None: 4077 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 4078 # so this section tries to parse the clause version and if it fails, it treats the token 4079 # as an identifier (alias) 4080 if self._can_parse_limit_or_offset(): 4081 return None 4082 4083 any_token = self._match(TokenType.ALIAS) 4084 alias = ( 4085 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4086 or self._parse_string_as_identifier() 4087 ) 4088 4089 index = self._index 4090 if self._match(TokenType.L_PAREN): 4091 columns = self._parse_csv(self._parse_function_parameter) 4092 self._match_r_paren() if columns else self._retreat(index) 4093 else: 4094 columns = None 4095 4096 if not alias and not columns: 4097 return None 4098 4099 table_alias = self.expression(exp.TableAlias(this=alias, columns=columns)) 4100 4101 # We bubble up comments from the Identifier to the TableAlias 4102 if isinstance(alias, exp.Identifier): 4103 table_alias.add_comments(alias.pop_comments()) 4104 4105 return table_alias 4106 4107 def _parse_subquery( 4108 self, this: exp.Expr | None, parse_alias: bool = True 4109 ) -> exp.Subquery | None: 4110 if not this: 4111 return None 4112 4113 return self.expression( 4114 exp.Subquery( 4115 this=this, 4116 pivots=self._parse_pivots(), 4117 alias=self._parse_table_alias() if parse_alias else None, 4118 sample=self._parse_table_sample(), 4119 ) 4120 ) 4121 4122 def _implicit_unnests_to_explicit(self, this: E) -> E: 4123 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 4124 4125 refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name} 4126 for i, join in enumerate(this.args.get("joins") or []): 4127 table = join.this 4128 normalized_table = table.copy() 4129 normalized_table.meta["maybe_column"] = True 4130 normalized_table = _norm(normalized_table, dialect=self.dialect) 4131 4132 if isinstance(table, exp.Table) and not join.args.get("on"): 4133 if len(normalized_table.parts) > 1 and normalized_table.parts[0].name in refs: 4134 table_as_column = table.to_column() 4135 unnest = exp.Unnest(expressions=[table_as_column]) 4136 4137 # Table.to_column creates a parent Alias node that we want to convert to 4138 # a TableAlias and attach to the Unnest, so it matches the parser's output 4139 if isinstance(table.args.get("alias"), exp.TableAlias): 4140 table_as_column.replace(table_as_column.this) 4141 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 4142 4143 table.replace(unnest) 4144 4145 refs.add(normalized_table.alias_or_name) 4146 4147 return this 4148 4149 @t.overload 4150 def _parse_query_modifiers(self, this: E) -> E: ... 4151 4152 @t.overload 4153 def _parse_query_modifiers(self, this: None) -> None: ... 4154 4155 def _parse_query_modifiers(self, this): 4156 if isinstance(this, self.MODIFIABLES): 4157 for join in self._parse_joins(): 4158 this.append("joins", join) 4159 for lateral in iter(self._parse_lateral, None): 4160 this.append("laterals", lateral) 4161 4162 while True: 4163 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 4164 modifier_token = self._curr 4165 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 4166 key, expression = parser(self) 4167 4168 if expression: 4169 if this.args.get(key): 4170 self.raise_error( 4171 f"Found multiple '{modifier_token.text.upper()}' clauses", 4172 token=modifier_token, 4173 ) 4174 4175 this.set(key, expression) 4176 if key == "limit": 4177 offset = expression.args.get("offset") 4178 expression.set("offset", None) 4179 4180 if offset: 4181 offset = exp.Offset(expression=offset) 4182 this.set("offset", offset) 4183 4184 limit_by_expressions = expression.expressions 4185 expression.set("expressions", None) 4186 offset.set("expressions", limit_by_expressions) 4187 continue 4188 break 4189 4190 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"): 4191 this = self._implicit_unnests_to_explicit(this) 4192 4193 return this 4194 4195 def _parse_hint_fallback_to_string(self) -> exp.Hint | None: 4196 start = self._curr 4197 while self._curr: 4198 self._advance() 4199 4200 end = self._tokens[self._index - 1] 4201 return exp.Hint(expressions=[self._find_sql(start, end)]) 4202 4203 def _parse_hint_function_call(self) -> exp.Expr | None: 4204 return self._parse_function_call() 4205 4206 def _parse_hint_body(self) -> exp.Hint | None: 4207 start_index = self._index 4208 should_fallback_to_string = False 4209 4210 hints = [] 4211 try: 4212 for hint in iter( 4213 lambda: self._parse_csv( 4214 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 4215 ), 4216 [], 4217 ): 4218 hints.extend(hint) 4219 except ParseError: 4220 should_fallback_to_string = True 4221 4222 if should_fallback_to_string or self._curr: 4223 self._retreat(start_index) 4224 return self._parse_hint_fallback_to_string() 4225 4226 return self.expression(exp.Hint(expressions=hints)) 4227 4228 def _parse_hint(self) -> exp.Hint | None: 4229 if self._match(TokenType.HINT) and self._prev_comments: 4230 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 4231 4232 return None 4233 4234 def _parse_into(self) -> exp.Into | None: 4235 if not self._match(TokenType.INTO): 4236 return None 4237 4238 temp = self._match(TokenType.TEMPORARY) 4239 unlogged = self._match_text_seq("UNLOGGED") 4240 self._match(TokenType.TABLE) 4241 4242 return self.expression( 4243 exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged) 4244 ) 4245 4246 def _parse_from( 4247 self, 4248 joins: bool = False, 4249 skip_from_token: bool = False, 4250 consume_pipe: bool = False, 4251 ) -> exp.From | None: 4252 if not skip_from_token and not self._match(TokenType.FROM): 4253 return None 4254 4255 comments = self._prev_comments 4256 return self.expression( 4257 exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)), 4258 comments=comments, 4259 ) 4260 4261 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 4262 return self.expression( 4263 exp.MatchRecognizeMeasure( 4264 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 4265 this=self._parse_expression(), 4266 ) 4267 ) 4268 4269 def _parse_match_recognize(self) -> exp.MatchRecognize | None: 4270 if not self._match(TokenType.MATCH_RECOGNIZE): 4271 return None 4272 4273 self._match_l_paren() 4274 4275 partition = self._parse_partition_by() 4276 order = self._parse_order() 4277 4278 measures = ( 4279 self._parse_csv(self._parse_match_recognize_measure) 4280 if self._match_text_seq("MEASURES") 4281 else None 4282 ) 4283 4284 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 4285 rows = exp.var("ONE ROW PER MATCH") 4286 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 4287 text = "ALL ROWS PER MATCH" 4288 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 4289 text += " SHOW EMPTY MATCHES" 4290 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 4291 text += " OMIT EMPTY MATCHES" 4292 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 4293 text += " WITH UNMATCHED ROWS" 4294 rows = exp.var(text) 4295 else: 4296 rows = None 4297 4298 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 4299 text = "AFTER MATCH SKIP" 4300 if self._match_text_seq("PAST", "LAST", "ROW"): 4301 text += " PAST LAST ROW" 4302 elif self._match_text_seq("TO", "NEXT", "ROW"): 4303 text += " TO NEXT ROW" 4304 elif self._match_text_seq("TO", "FIRST"): 4305 text += f" TO FIRST {self._advance_any().text}" # type: ignore 4306 elif self._match_text_seq("TO", "LAST"): 4307 text += f" TO LAST {self._advance_any().text}" # type: ignore 4308 after = exp.var(text) 4309 else: 4310 after = None 4311 4312 if self._match_text_seq("PATTERN"): 4313 self._match_l_paren() 4314 4315 if not self._curr: 4316 self.raise_error("Expecting )", self._curr) 4317 4318 paren = 1 4319 start = self._curr 4320 4321 while self._curr and paren > 0: 4322 if self._curr.token_type == TokenType.L_PAREN: 4323 paren += 1 4324 if self._curr.token_type == TokenType.R_PAREN: 4325 paren -= 1 4326 4327 end = self._prev 4328 self._advance() 4329 4330 if paren > 0: 4331 self.raise_error("Expecting )", self._curr) 4332 4333 pattern = exp.var(self._find_sql(start, end)) 4334 else: 4335 pattern = None 4336 4337 define = ( 4338 self._parse_csv(self._parse_name_as_expression) 4339 if self._match_text_seq("DEFINE") 4340 else None 4341 ) 4342 4343 self._match_r_paren() 4344 4345 return self.expression( 4346 exp.MatchRecognize( 4347 partition_by=partition, 4348 order=order, 4349 measures=measures, 4350 rows=rows, 4351 after=after, 4352 pattern=pattern, 4353 define=define, 4354 alias=self._parse_table_alias(), 4355 ) 4356 ) 4357 4358 def _parse_lateral(self) -> exp.Lateral | None: 4359 cross_apply: bool | None = None 4360 if self._match_pair(TokenType.CROSS, TokenType.APPLY): 4361 cross_apply = True 4362 elif self._match_pair(TokenType.OUTER, TokenType.APPLY): 4363 cross_apply = False 4364 4365 if cross_apply is not None: 4366 this = self._parse_select(table=True) 4367 view = None 4368 outer = None 4369 elif self._match(TokenType.LATERAL): 4370 this = self._parse_select(table=True) 4371 view = self._match(TokenType.VIEW) 4372 outer = self._match(TokenType.OUTER) 4373 else: 4374 return None 4375 4376 if not this: 4377 this = ( 4378 self._parse_unnest() 4379 or self._parse_function() 4380 or self._parse_id_var(any_token=False) 4381 ) 4382 4383 while self._match(TokenType.DOT): 4384 this = exp.Dot( 4385 this=this, 4386 expression=self._parse_function() or self._parse_id_var(any_token=False), 4387 ) 4388 4389 ordinality: bool | None = None 4390 4391 if view: 4392 table = self._parse_id_var(any_token=False) 4393 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 4394 table_alias: exp.TableAlias | None = self.expression( 4395 exp.TableAlias(this=table, columns=columns) 4396 ) 4397 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 4398 # We move the alias from the lateral's child node to the lateral itself 4399 table_alias = this.args["alias"].pop() 4400 else: 4401 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4402 table_alias = self._parse_table_alias() 4403 4404 return self.expression( 4405 exp.Lateral( 4406 this=this, 4407 view=view, 4408 outer=outer, 4409 alias=table_alias, 4410 cross_apply=cross_apply, 4411 ordinality=ordinality, 4412 ) 4413 ) 4414 4415 def _parse_stream(self) -> exp.Stream | None: 4416 index = self._index 4417 if self._match(TokenType.STREAM): 4418 if this := self._try_parse(self._parse_table): 4419 return self.expression(exp.Stream(this=this)) 4420 self._retreat(index) 4421 return None 4422 4423 def _parse_join_parts( 4424 self, 4425 ) -> tuple[Token | None, Token | None, Token | None]: 4426 return ( 4427 self._prev if self._match_set(self.JOIN_METHODS) else None, 4428 self._prev if self._match_set(self.JOIN_SIDES) else None, 4429 self._prev if self._match_set(self.JOIN_KINDS) else None, 4430 ) 4431 4432 def _parse_using_identifiers(self) -> list[exp.Expr]: 4433 def _parse_column_as_identifier() -> exp.Expr | None: 4434 this = self._parse_column() 4435 if isinstance(this, exp.Column): 4436 return this.this 4437 return this 4438 4439 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 4440 4441 def _parse_join( 4442 self, 4443 skip_join_token: bool = False, 4444 parse_bracket: bool = False, 4445 alias_tokens: t.Collection[TokenType] | None = None, 4446 ) -> exp.Join | None: 4447 if self._match(TokenType.COMMA): 4448 table = self._try_parse(lambda: self._parse_table(alias_tokens=alias_tokens)) 4449 cross_join = self.expression(exp.Join(this=table)) if table else None 4450 4451 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 4452 cross_join.set("kind", "CROSS") 4453 4454 return cross_join 4455 4456 index = self._index 4457 method, side, kind = self._parse_join_parts() 4458 directed = self._match_text_seq("DIRECTED") 4459 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 4460 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 4461 join_comments = self._prev_comments 4462 4463 if not skip_join_token and not join: 4464 self._retreat(index) 4465 kind = None 4466 method = None 4467 side = None 4468 4469 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 4470 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 4471 4472 if not skip_join_token and not join and not outer_apply and not cross_apply: 4473 return None 4474 4475 kwargs: dict[str, t.Any] = { 4476 "this": self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens) 4477 } 4478 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 4479 kwargs["expressions"] = self._parse_csv( 4480 lambda: self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens) 4481 ) 4482 4483 if method: 4484 kwargs["method"] = method.text.upper() 4485 if side: 4486 kwargs["side"] = side.text.upper() 4487 if kind: 4488 kwargs["kind"] = kind.text.upper() 4489 if hint: 4490 kwargs["hint"] = hint 4491 4492 if self._match(TokenType.MATCH_CONDITION): 4493 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 4494 4495 if self._match(TokenType.ON): 4496 kwargs["on"] = self._parse_disjunction() 4497 elif self._match(TokenType.USING): 4498 kwargs["using"] = self._parse_using_identifiers() 4499 elif ( 4500 not method 4501 and not (outer_apply or cross_apply) 4502 and not isinstance(kwargs["this"], exp.Unnest) 4503 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 4504 ): 4505 index = self._index 4506 joins: list | None = list(self._parse_joins(alias_tokens=alias_tokens)) 4507 4508 if joins and self._match(TokenType.ON): 4509 kwargs["on"] = self._parse_disjunction() 4510 elif joins and self._match(TokenType.USING): 4511 kwargs["using"] = self._parse_using_identifiers() 4512 else: 4513 joins = None 4514 self._retreat(index) 4515 4516 kwargs["this"].set("joins", joins if joins else None) 4517 4518 kwargs["pivots"] = self._parse_pivots() 4519 4520 comments = [c for token in (method, side, kind) if token for c in token.comments] 4521 comments = (join_comments or []) + comments 4522 4523 if ( 4524 self.ADD_JOIN_ON_TRUE 4525 and not kwargs.get("on") 4526 and not kwargs.get("using") 4527 and not kwargs.get("method") 4528 and kwargs.get("kind") in (None, "INNER", "OUTER") 4529 ): 4530 kwargs["on"] = exp.true() 4531 4532 if directed: 4533 kwargs["directed"] = directed 4534 4535 return self.expression(exp.Join(**kwargs), comments=comments) 4536 4537 def _parse_opclass(self) -> exp.Expr | None: 4538 this = self._parse_disjunction() 4539 4540 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 4541 return this 4542 4543 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 4544 return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts())) 4545 4546 return this 4547 4548 def _parse_index_params(self) -> exp.IndexParameters: 4549 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 4550 4551 if self._match(TokenType.L_PAREN, advance=False): 4552 columns = self._parse_wrapped_csv(self._parse_with_operator) 4553 else: 4554 columns = None 4555 4556 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 4557 partition_by = self._parse_partition_by() 4558 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 4559 tablespace = ( 4560 self._parse_var(any_token=True) 4561 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 4562 else None 4563 ) 4564 where = self._parse_where() 4565 4566 on = self._parse_field() if self._match(TokenType.ON) else None 4567 4568 return self.expression( 4569 exp.IndexParameters( 4570 using=using, 4571 columns=columns, 4572 include=include, 4573 partition_by=partition_by, 4574 where=where, 4575 with_storage=with_storage, 4576 tablespace=tablespace, 4577 on=on, 4578 ) 4579 ) 4580 4581 def _parse_index( 4582 self, index: exp.Expr | None = None, anonymous: bool = False 4583 ) -> exp.Index | None: 4584 if index or anonymous: 4585 unique = None 4586 primary = None 4587 amp = None 4588 4589 self._match(TokenType.ON) 4590 self._match(TokenType.TABLE) # hive 4591 table = self._parse_table_parts(schema=True) 4592 else: 4593 unique = self._match(TokenType.UNIQUE) 4594 primary = self._match_text_seq("PRIMARY") 4595 amp = self._match_text_seq("AMP") 4596 4597 if not self._match(TokenType.INDEX): 4598 return None 4599 4600 index = self._parse_id_var() 4601 table = None 4602 4603 params = self._parse_index_params() 4604 4605 return self.expression( 4606 exp.Index( 4607 this=index, table=table, unique=unique, primary=primary, amp=amp, params=params 4608 ) 4609 ) 4610 4611 def _parse_table_hints(self) -> list[exp.Expr] | None: 4612 hints: list[exp.Expr] = [] 4613 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 4614 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 4615 hints.append( 4616 self.expression( 4617 exp.WithTableHint( 4618 expressions=self._parse_csv( 4619 lambda: self._parse_function() or self._parse_var(any_token=True) 4620 ) 4621 ) 4622 ) 4623 ) 4624 self._match_r_paren() 4625 else: 4626 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 4627 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 4628 hint = exp.IndexTableHint(this=self._prev.text.upper()) 4629 4630 self._match_set((TokenType.INDEX, TokenType.KEY)) 4631 if self._match(TokenType.FOR): 4632 hint.set("target", self._advance_any() and self._prev.text.upper()) 4633 4634 hint.set("expressions", self._parse_wrapped_id_vars()) 4635 hints.append(hint) 4636 4637 return hints or None 4638 4639 def _parse_table_part(self, schema: bool = False) -> exp.Expr | None: 4640 return ( 4641 (not schema and self._parse_function(optional_parens=False)) 4642 or self._parse_id_var(any_token=False) 4643 or self._parse_string_as_identifier() 4644 or self._parse_placeholder() 4645 ) 4646 4647 def _parse_table_parts_fast(self) -> exp.Table | None: 4648 index = self._index 4649 parts: list[exp.Identifier] | None = None 4650 all_comments: list[str] | None = None 4651 4652 while self._match_set(self.IDENTIFIER_TOKENS): 4653 token = self._prev 4654 comments = self._prev_comments 4655 4656 has_dot = self._match(TokenType.DOT) 4657 curr_tt = self._curr.token_type 4658 4659 if not has_dot: 4660 if curr_tt in self.TABLE_POSTFIX_TOKENS: 4661 self._retreat(index) 4662 return None 4663 elif curr_tt not in self.IDENTIFIER_TOKENS: 4664 self._retreat(index) 4665 return None 4666 4667 if parts is None: 4668 parts = [] 4669 4670 if comments: 4671 if all_comments is None: 4672 all_comments = [] 4673 all_comments.extend(comments) 4674 self._prev_comments = [] 4675 4676 parts.append( 4677 self.expression( 4678 exp.Identifier( 4679 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 4680 ), 4681 token, 4682 ) 4683 ) 4684 4685 if not has_dot: 4686 break 4687 4688 if parts is None: 4689 return None 4690 4691 n = len(parts) 4692 4693 if n == 1: 4694 table: exp.Table = exp.Table(this=parts[0]) 4695 elif n == 2: 4696 table = exp.Table(this=parts[1], db=parts[0]) 4697 elif n >= 3: 4698 this: exp.Identifier | exp.Dot = parts[2] 4699 for i in range(3, n): 4700 this = exp.Dot(this=this, expression=parts[i]) 4701 4702 table = exp.Table(this=this, db=parts[1], catalog=parts[0]) 4703 4704 if table is None: 4705 self._retreat(index) 4706 elif all_comments: 4707 table.add_comments(all_comments) 4708 return table 4709 4710 def _parse_table_parts( 4711 self, 4712 schema: bool = False, 4713 is_db_reference: bool = False, 4714 wildcard: bool = False, 4715 fast: bool = False, 4716 ) -> exp.Table | exp.Dot | None: 4717 if fast: 4718 return self._parse_table_parts_fast() 4719 4720 catalog: exp.Expr | str | None = None 4721 db: exp.Expr | str | None = None 4722 table: exp.Expr | str | None = self._parse_table_part(schema=schema) 4723 4724 while self._match(TokenType.DOT): 4725 if catalog: 4726 # This allows nesting the table in arbitrarily many dot expressions if needed 4727 table = self.expression( 4728 exp.Dot(this=table, expression=self._parse_table_part(schema=schema)) 4729 ) 4730 else: 4731 catalog = db 4732 db = table 4733 # "" used for tsql FROM a..b case 4734 table = self._parse_table_part(schema=schema) or "" 4735 4736 if ( 4737 wildcard 4738 and self._is_connected() 4739 and (isinstance(table, exp.Identifier) or not table) 4740 and self._match(TokenType.STAR) 4741 ): 4742 if isinstance(table, exp.Identifier): 4743 table.args["this"] += "*" 4744 else: 4745 table = exp.Identifier(this="*") 4746 4747 if is_db_reference: 4748 catalog = db 4749 db = table 4750 table = None 4751 4752 if not table and not is_db_reference: 4753 self.raise_error(f"Expected table name but got {self._curr}") 4754 if not db and is_db_reference: 4755 self.raise_error(f"Expected database name but got {self._curr}") 4756 4757 table = self.expression(exp.Table(this=table, db=db, catalog=catalog)) 4758 4759 # Bubble up comments from identifier parts to the Table 4760 comments = [] 4761 for part in table.parts: 4762 if part_comments := part.pop_comments(): 4763 comments.extend(part_comments) 4764 if comments: 4765 table.add_comments(comments) 4766 4767 changes = self._parse_changes() 4768 if changes: 4769 table.set("changes", changes) 4770 4771 at_before = self._parse_historical_data() 4772 if at_before: 4773 table.set("when", at_before) 4774 4775 pivots = self._parse_pivots() 4776 if pivots: 4777 table.set("pivots", pivots) 4778 4779 return table 4780 4781 def _parse_table( 4782 self, 4783 schema: bool = False, 4784 joins: bool = False, 4785 alias_tokens: t.Collection[TokenType] | None = None, 4786 parse_bracket: bool = False, 4787 is_db_reference: bool = False, 4788 parse_partition: bool = False, 4789 consume_pipe: bool = False, 4790 ) -> exp.Expr | None: 4791 if not schema and not is_db_reference and not consume_pipe and not joins: 4792 index = self._index 4793 table = self._parse_table_parts(fast=True) 4794 4795 if table is not None: 4796 curr_tt = self._curr.token_type 4797 next_tt = self._next.token_type 4798 4799 fast_terminators = self.TABLE_TERMINATORS 4800 4801 # only return the table if we're sure there are no other operators 4802 # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT 4803 if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION: 4804 return table 4805 4806 postfix_tokens = self.TABLE_POSTFIX_TOKENS 4807 4808 if curr_tt not in postfix_tokens and next_tt not in postfix_tokens: 4809 if alias := self._parse_table_alias( 4810 alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS 4811 ): 4812 table.set("alias", alias) 4813 4814 if self._curr.token_type in fast_terminators: 4815 return table 4816 4817 self._retreat(index) 4818 4819 if stream := self._parse_stream(): 4820 return stream 4821 4822 if lateral := self._parse_lateral(): 4823 return lateral 4824 4825 if unnest := self._parse_unnest(): 4826 return unnest 4827 4828 if values := self._parse_derived_table_values(): 4829 return values 4830 4831 if subquery := self._parse_select(table=True, consume_pipe=consume_pipe): 4832 if not subquery.args.get("pivots"): 4833 subquery.set("pivots", self._parse_pivots()) 4834 if joins: 4835 for join in self._parse_joins(): 4836 subquery.append("joins", join) 4837 return subquery 4838 4839 bracket = parse_bracket and self._parse_bracket(None) 4840 bracket = self.expression(exp.Table(this=bracket)) if bracket else None 4841 4842 rows_from_tables = ( 4843 self._parse_wrapped_csv(self._parse_table) 4844 if self._match_text_seq("ROWS", "FROM") 4845 else None 4846 ) 4847 rows_from = ( 4848 self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None 4849 ) 4850 4851 only = self._match(TokenType.ONLY) 4852 4853 this = t.cast( 4854 exp.Expr, 4855 bracket 4856 or rows_from 4857 or self._parse_bracket( 4858 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4859 ), 4860 ) 4861 4862 if only: 4863 this.set("only", only) 4864 4865 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4866 self._match(TokenType.STAR) 4867 4868 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4869 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4870 this.set("partition", self._parse_partition()) 4871 4872 if schema: 4873 return self._parse_schema(this=this) 4874 4875 if self.dialect.ALIAS_POST_VERSION: 4876 this.set("version", self._parse_version()) 4877 4878 if self.dialect.ALIAS_POST_TABLESAMPLE: 4879 this.set("sample", self._parse_table_sample()) 4880 4881 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4882 if alias: 4883 this.set("alias", alias) 4884 4885 if self._match(TokenType.INDEXED_BY): 4886 this.set("indexed", self._parse_table_parts()) 4887 elif self._match_text_seq("NOT", "INDEXED"): 4888 this.set("indexed", False) 4889 4890 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4891 return self.expression( 4892 exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var()) 4893 ) 4894 4895 this.set("hints", self._parse_table_hints()) 4896 4897 if not this.args.get("pivots"): 4898 this.set("pivots", self._parse_pivots()) 4899 4900 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4901 this.set("sample", self._parse_table_sample()) 4902 4903 if not self.dialect.ALIAS_POST_VERSION: 4904 this.set("version", self._parse_version()) 4905 4906 if joins: 4907 for join in self._parse_joins(alias_tokens=alias_tokens): 4908 this.append("joins", join) 4909 4910 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4911 this.set("ordinality", True) 4912 this.set("alias", self._parse_table_alias()) 4913 4914 return this 4915 4916 def _parse_version(self) -> exp.Version | None: 4917 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4918 this = "TIMESTAMP" 4919 elif self._match(TokenType.VERSION_SNAPSHOT): 4920 this = "VERSION" 4921 else: 4922 return None 4923 4924 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4925 kind = self._prev.text.upper() 4926 start = self._parse_bitwise() 4927 self._match_texts(("TO", "AND")) 4928 end = self._parse_bitwise() 4929 expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end])) 4930 elif self._match_text_seq("CONTAINED", "IN"): 4931 kind = "CONTAINED IN" 4932 expression = self.expression( 4933 exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise)) 4934 ) 4935 elif self._match(TokenType.ALL): 4936 kind = "ALL" 4937 expression = None 4938 else: 4939 self._match_text_seq("AS", "OF") 4940 kind = "AS OF" 4941 expression = self._parse_type() 4942 4943 return self.expression(exp.Version(this=this, expression=expression, kind=kind)) 4944 4945 def _parse_historical_data(self) -> exp.HistoricalData | None: 4946 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4947 index = self._index 4948 historical_data = None 4949 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4950 this = self._prev.text.upper() 4951 kind = ( 4952 self._match(TokenType.L_PAREN) 4953 and self._match_texts(self.HISTORICAL_DATA_KIND) 4954 and self._prev.text.upper() 4955 ) 4956 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4957 4958 if expression: 4959 self._match_r_paren() 4960 historical_data = self.expression( 4961 exp.HistoricalData(this=this, kind=kind, expression=expression) 4962 ) 4963 else: 4964 self._retreat(index) 4965 4966 return historical_data 4967 4968 def _parse_changes(self) -> exp.Changes | None: 4969 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4970 return None 4971 4972 information = self._parse_var(any_token=True) 4973 self._match_r_paren() 4974 4975 return self.expression( 4976 exp.Changes( 4977 information=information, 4978 at_before=self._parse_historical_data(), 4979 end=self._parse_historical_data(), 4980 ) 4981 ) 4982 4983 def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None: 4984 if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False): 4985 return None 4986 4987 self._advance() 4988 4989 expressions = self._parse_wrapped_csv(self._parse_equality) 4990 offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4991 4992 alias = self._parse_table_alias() if with_alias else None 4993 4994 if alias: 4995 if self.dialect.UNNEST_COLUMN_ONLY: 4996 if alias.args.get("columns"): 4997 self.raise_error("Unexpected extra column alias in unnest.") 4998 4999 alias.set("columns", [alias.this]) 5000 alias.set("this", None) 5001 5002 columns = alias.args.get("columns") or [] 5003 if offset and len(expressions) < len(columns): 5004 offset = columns.pop() 5005 5006 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 5007 self._match(TokenType.ALIAS) 5008 offset = self._parse_id_var( 5009 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 5010 ) or exp.to_identifier("offset") 5011 5012 return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset)) 5013 5014 def _parse_derived_table_values(self) -> exp.Values | None: 5015 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 5016 if not is_derived and not ( 5017 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 5018 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 5019 ): 5020 return None 5021 5022 expressions = self._parse_csv(self._parse_value) 5023 alias = self._parse_table_alias() 5024 5025 if is_derived: 5026 self._match_r_paren() 5027 5028 return self.expression( 5029 exp.Values(expressions=expressions, alias=alias or self._parse_table_alias()) 5030 ) 5031 5032 def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None: 5033 if not self._match(TokenType.TABLE_SAMPLE) and not ( 5034 as_modifier and self._match_text_seq("USING", "SAMPLE") 5035 ): 5036 return None 5037 5038 bucket_numerator = None 5039 bucket_denominator = None 5040 bucket_field = None 5041 percent = None 5042 size = None 5043 seed = None 5044 5045 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 5046 matched_l_paren = self._match(TokenType.L_PAREN) 5047 5048 if self.TABLESAMPLE_CSV: 5049 num = None 5050 expressions = self._parse_csv(self._parse_primary) 5051 else: 5052 expressions = None 5053 num = ( 5054 self._parse_factor() 5055 if self._match(TokenType.NUMBER, advance=False) 5056 else self._parse_primary() or self._parse_placeholder() 5057 ) 5058 5059 if self._match_text_seq("BUCKET"): 5060 bucket_numerator = self._parse_number() 5061 self._match_text_seq("OUT", "OF") 5062 bucket_denominator = bucket_denominator = self._parse_number() 5063 self._match(TokenType.ON) 5064 bucket_field = self._parse_field() 5065 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 5066 percent = num 5067 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 5068 size = num 5069 else: 5070 percent = num 5071 5072 if matched_l_paren: 5073 self._match_r_paren() 5074 5075 if self._match(TokenType.L_PAREN): 5076 method = self._parse_var(upper=True) 5077 seed = self._match(TokenType.COMMA) and self._parse_number() 5078 self._match_r_paren() 5079 elif self._match_texts(("SEED", "REPEATABLE")): 5080 seed = self._parse_wrapped(self._parse_number) 5081 5082 if not method and self.DEFAULT_SAMPLING_METHOD: 5083 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 5084 5085 return self.expression( 5086 exp.TableSample( 5087 expressions=expressions, 5088 method=method, 5089 bucket_numerator=bucket_numerator, 5090 bucket_denominator=bucket_denominator, 5091 bucket_field=bucket_field, 5092 percent=percent, 5093 size=size, 5094 seed=seed, 5095 ) 5096 ) 5097 5098 def _parse_pivots(self) -> list[exp.Pivot] | None: 5099 if self._curr.token_type not in (TokenType.PIVOT, TokenType.UNPIVOT): 5100 return None 5101 return list(iter(self._parse_pivot, None)) or None 5102 5103 def _parse_joins( 5104 self, alias_tokens: t.Collection[TokenType] | None = None 5105 ) -> t.Iterator[exp.Join]: 5106 return iter(lambda: self._parse_join(alias_tokens=alias_tokens), None) 5107 5108 def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None: 5109 if not self._match(TokenType.INTO): 5110 return None 5111 5112 return self.expression( 5113 exp.UnpivotColumns( 5114 this=self._match_text_seq("NAME") and self._parse_column(), 5115 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 5116 ) 5117 ) 5118 5119 # https://duckdb.org/docs/sql/statements/pivot 5120 def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot: 5121 def _parse_on() -> exp.Expr | None: 5122 this = self._parse_bitwise() 5123 5124 if self._match(TokenType.IN): 5125 # PIVOT ... ON col IN (row_val1, row_val2) 5126 return self._parse_in(this) 5127 if self._match(TokenType.ALIAS, advance=False): 5128 # UNPIVOT ... ON (col1, col2, col3) AS row_val 5129 return self._parse_alias(this) 5130 5131 return this 5132 5133 this = self._parse_table() 5134 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 5135 into = self._parse_unpivot_columns() 5136 using = self._match(TokenType.USING) and self._parse_csv( 5137 lambda: self._parse_alias(self._parse_column()) 5138 ) 5139 group = self._parse_group() 5140 5141 return self.expression( 5142 exp.Pivot( 5143 this=this, 5144 expressions=expressions, 5145 using=using, 5146 group=group, 5147 unpivot=is_unpivot, 5148 into=into, 5149 ) 5150 ) 5151 5152 def _parse_pivot_in(self) -> exp.In: 5153 def _parse_aliased_expression() -> exp.Expr | None: 5154 this = self._parse_select_or_expression() 5155 5156 self._match(TokenType.ALIAS) 5157 alias = self._parse_bitwise() 5158 if alias: 5159 if isinstance(alias, exp.Column) and not alias.db: 5160 alias = alias.this 5161 return self.expression(exp.PivotAlias(this=this, alias=alias)) 5162 5163 return this 5164 5165 value = self._parse_column() 5166 5167 if not self._match(TokenType.IN): 5168 self.raise_error("Expecting IN") 5169 5170 if self._match(TokenType.L_PAREN): 5171 if self._match(TokenType.ANY): 5172 exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order())) 5173 else: 5174 exprs = self._parse_csv(_parse_aliased_expression) 5175 self._match_r_paren() 5176 return self.expression(exp.In(this=value, expressions=exprs)) 5177 5178 return self.expression(exp.In(this=value, field=self._parse_id_var())) 5179 5180 def _parse_pivot_aggregation(self) -> exp.Expr | None: 5181 func = self._parse_function() 5182 if not func: 5183 if self._prev.token_type == TokenType.COMMA: 5184 return None 5185 self.raise_error("Expecting an aggregation function in PIVOT") 5186 5187 return self._parse_alias(func) 5188 5189 def _parse_pivot(self) -> exp.Pivot | None: 5190 index = self._index 5191 include_nulls = None 5192 5193 if self._match(TokenType.PIVOT): 5194 unpivot = False 5195 elif self._match(TokenType.UNPIVOT): 5196 unpivot = True 5197 5198 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 5199 if self._match_text_seq("INCLUDE", "NULLS"): 5200 include_nulls = True 5201 elif self._match_text_seq("EXCLUDE", "NULLS"): 5202 include_nulls = False 5203 else: 5204 return None 5205 5206 expressions = [] 5207 5208 if not self._match(TokenType.L_PAREN): 5209 self._retreat(index) 5210 return None 5211 5212 if unpivot: 5213 expressions = self._parse_csv(self._parse_column) 5214 else: 5215 expressions = self._parse_csv(self._parse_pivot_aggregation) 5216 5217 if not expressions: 5218 self.raise_error("Failed to parse PIVOT's aggregation list") 5219 5220 if not self._match(TokenType.FOR): 5221 self.raise_error("Expecting FOR") 5222 5223 fields = [] 5224 while True: 5225 field = self._try_parse(self._parse_pivot_in) 5226 if not field: 5227 break 5228 fields.append(field) 5229 5230 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 5231 self._parse_bitwise 5232 ) 5233 5234 group = self._parse_group() 5235 5236 self._match_r_paren() 5237 5238 pivot = self.expression( 5239 exp.Pivot( 5240 expressions=expressions, 5241 fields=fields, 5242 unpivot=unpivot, 5243 include_nulls=include_nulls, 5244 default_on_null=default_on_null, 5245 group=group, 5246 ) 5247 ) 5248 5249 if unpivot: 5250 pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions]) 5251 for pivot_field in pivot.fields: 5252 if isinstance(pivot_field, exp.In): 5253 pivot_field.set("this", _unpivot_target(pivot_field.this)) 5254 5255 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 5256 pivot.set("alias", self._parse_table_alias()) 5257 5258 if not unpivot: 5259 names = self._pivot_column_names(t.cast(list[exp.Expr], expressions)) 5260 5261 columns: list[exp.Expr] = [] 5262 all_fields = [] 5263 for pivot_field in pivot.fields: 5264 pivot_field_expressions = pivot_field.expressions 5265 5266 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 5267 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 5268 continue 5269 5270 all_fields.append( 5271 [ 5272 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 5273 for fld in pivot_field_expressions 5274 ] 5275 ) 5276 5277 if all_fields: 5278 if names: 5279 all_fields.append(names) 5280 5281 # Generate all possible combinations of the pivot columns 5282 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 5283 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 5284 for fld_parts_tuple in itertools.product(*all_fields): 5285 fld_parts = list(fld_parts_tuple) 5286 5287 if names and self.PREFIXED_PIVOT_COLUMNS: 5288 # Move the "name" to the front of the list 5289 fld_parts.insert(0, fld_parts.pop(-1)) 5290 5291 columns.append(exp.to_identifier("_".join(fld_parts))) 5292 5293 pivot.set("columns", columns) 5294 pivot.set("identify_pivot_strings", self.IDENTIFY_PIVOT_STRINGS) 5295 pivot.set("prefixed_pivot_columns", self.PREFIXED_PIVOT_COLUMNS) 5296 pivot.set("pivot_column_naming", self.PIVOT_COLUMN_NAMING) 5297 5298 return pivot 5299 5300 def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]: 5301 return [agg.alias for agg in aggregations if agg.alias] 5302 5303 def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None: 5304 if not skip_where_token and not self._match(TokenType.PREWHERE): 5305 return None 5306 5307 comments = self._prev_comments 5308 return self.expression( 5309 exp.PreWhere(this=self._parse_disjunction()), 5310 comments=comments, 5311 ) 5312 5313 def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None: 5314 if not skip_where_token and not self._match(TokenType.WHERE): 5315 return None 5316 5317 comments = self._prev_comments 5318 return self.expression( 5319 exp.Where(this=self._parse_disjunction()), 5320 comments=comments, 5321 ) 5322 5323 def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None: 5324 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 5325 return None 5326 comments = self._prev_comments 5327 5328 elements: dict[str, t.Any] = defaultdict(list) 5329 5330 if self._match(TokenType.ALL): 5331 elements["all"] = True 5332 elif self._match(TokenType.DISTINCT): 5333 elements["all"] = False 5334 5335 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 5336 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5337 5338 while True: 5339 index = self._index 5340 5341 elements["expressions"].extend( 5342 self._parse_csv( 5343 lambda: ( 5344 None 5345 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 5346 else self._parse_disjunction() 5347 ) 5348 ) 5349 ) 5350 5351 before_with_index = self._index 5352 with_prefix = self._match(TokenType.WITH) 5353 5354 if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix): 5355 key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube" 5356 elements[key].append(cube_or_rollup) 5357 elif grouping_sets := self._parse_grouping_sets(): 5358 elements["grouping_sets"].append(grouping_sets) 5359 elif self._match_text_seq("TOTALS"): 5360 elements["totals"] = True # type: ignore 5361 5362 if before_with_index <= self._index <= before_with_index + 1: 5363 self._retreat(before_with_index) 5364 break 5365 5366 if index == self._index: 5367 break 5368 5369 return self.expression(exp.Group(**elements), comments=comments) # type: ignore 5370 5371 def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None: 5372 if self._match(TokenType.CUBE): 5373 kind: type[exp.Cube | exp.Rollup] = exp.Cube 5374 elif self._match(TokenType.ROLLUP): 5375 kind = exp.Rollup 5376 else: 5377 return None 5378 5379 return self.expression( 5380 kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise)) 5381 ) 5382 5383 def _parse_grouping_sets(self) -> exp.GroupingSets | None: 5384 if self._match(TokenType.GROUPING_SETS): 5385 return self.expression( 5386 exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set)) 5387 ) 5388 return None 5389 5390 def _parse_grouping_set(self) -> exp.Expr | None: 5391 return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise() 5392 5393 def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None: 5394 if not skip_having_token and not self._match(TokenType.HAVING): 5395 return None 5396 comments = self._prev_comments 5397 return self.expression( 5398 exp.Having(this=self._parse_disjunction()), 5399 comments=comments, 5400 ) 5401 5402 def _parse_qualify(self) -> exp.Qualify | None: 5403 if not self._match(TokenType.QUALIFY): 5404 return None 5405 return self.expression(exp.Qualify(this=self._parse_disjunction())) 5406 5407 def _parse_connect_with_prior(self) -> exp.Expr | None: 5408 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 5409 exp.Prior(this=self._parse_bitwise()) 5410 ) 5411 connect = self._parse_disjunction() 5412 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 5413 return connect 5414 5415 def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None: 5416 if skip_start_token: 5417 start = None 5418 elif self._match(TokenType.START_WITH): 5419 start = self._parse_disjunction() 5420 else: 5421 return None 5422 5423 self._match(TokenType.CONNECT_BY) 5424 nocycle = self._match_text_seq("NOCYCLE") 5425 connect = self._parse_connect_with_prior() 5426 5427 if not start and self._match(TokenType.START_WITH): 5428 start = self._parse_disjunction() 5429 5430 return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle)) 5431 5432 def _parse_name_as_expression(self) -> exp.Expr | None: 5433 this = self._parse_id_var(any_token=True) 5434 if self._match(TokenType.ALIAS): 5435 this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction())) 5436 return this 5437 5438 def _parse_interpolate(self) -> list[exp.Expr] | None: 5439 if self._match_text_seq("INTERPOLATE"): 5440 return self._parse_wrapped_csv(self._parse_name_as_expression) 5441 return None 5442 5443 def _parse_order( 5444 self, this: exp.Expr | None = None, skip_order_token: bool = False 5445 ) -> exp.Expr | None: 5446 siblings = None 5447 if not skip_order_token and not self._match(TokenType.ORDER_BY): 5448 if not self._match(TokenType.ORDER_SIBLINGS_BY): 5449 return this 5450 5451 siblings = True 5452 5453 comments = self._prev_comments 5454 return self.expression( 5455 exp.Order( 5456 this=this, 5457 expressions=self._parse_csv(self._parse_ordered), 5458 siblings=siblings, 5459 ), 5460 comments=comments, 5461 ) 5462 5463 def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None: 5464 if not self._match(token): 5465 return None 5466 return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered))) 5467 5468 def _parse_ordered( 5469 self, parse_method: t.Callable[[], exp.Expr | None] | None = None 5470 ) -> exp.Ordered | None: 5471 this = parse_method() if parse_method else self._parse_disjunction() 5472 if not this: 5473 return None 5474 5475 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 5476 this = exp.var("ALL") 5477 5478 asc = self._match(TokenType.ASC) 5479 desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None) 5480 5481 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 5482 is_nulls_last = self._match_text_seq("NULLS", "LAST") 5483 5484 nulls_first = is_nulls_first or False 5485 explicitly_null_ordered = is_nulls_first or is_nulls_last 5486 5487 if ( 5488 not explicitly_null_ordered 5489 and ( 5490 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 5491 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 5492 ) 5493 and self.dialect.NULL_ORDERING != "nulls_are_last" 5494 ): 5495 nulls_first = True 5496 5497 if self._match_text_seq("WITH", "FILL"): 5498 with_fill = self.expression( 5499 exp.WithFill( 5500 from_=self._match(TokenType.FROM) and self._parse_bitwise(), 5501 to=self._match_text_seq("TO") and self._parse_bitwise(), 5502 step=self._match_text_seq("STEP") and self._parse_bitwise(), 5503 interpolate=self._parse_interpolate(), 5504 ) 5505 ) 5506 else: 5507 with_fill = None 5508 5509 return self.expression( 5510 exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill) 5511 ) 5512 5513 def _parse_limit_options(self) -> exp.LimitOptions | None: 5514 percent = self._match_set((TokenType.PERCENT, TokenType.MOD)) 5515 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 5516 self._match_text_seq("ONLY") 5517 with_ties = self._match_text_seq("WITH", "TIES") 5518 5519 if not (percent or rows or with_ties): 5520 return None 5521 5522 return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties)) 5523 5524 def _parse_limit( 5525 self, 5526 this: exp.Expr | None = None, 5527 top: bool = False, 5528 skip_limit_token: bool = False, 5529 ) -> exp.Expr | None: 5530 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 5531 comments = self._prev_comments 5532 if top: 5533 limit_paren = self._match(TokenType.L_PAREN) 5534 expression = ( 5535 self._parse_term() or self._parse_select() 5536 if limit_paren 5537 else self._parse_number() 5538 ) 5539 5540 if limit_paren: 5541 self._match_r_paren() 5542 5543 else: 5544 if self.dialect.SUPPORTS_LIMIT_ALL and self._match(TokenType.ALL): 5545 return this 5546 5547 # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since 5548 # we try to build an exp.Mod expr. For that matter, we backtrack and instead 5549 # consume the factor plus parse the percentage separately 5550 index = self._index 5551 expression = self._try_parse(self._parse_term) 5552 if isinstance(expression, exp.Mod): 5553 self._retreat(index) 5554 expression = self._parse_factor() 5555 elif not expression: 5556 expression = self._parse_factor() 5557 limit_options = self._parse_limit_options() 5558 5559 if self._match(TokenType.COMMA): 5560 offset = expression 5561 expression = self._parse_term() 5562 else: 5563 offset = None 5564 5565 limit_exp = self.expression( 5566 exp.Limit( 5567 this=this, 5568 expression=expression, 5569 offset=offset, 5570 limit_options=limit_options, 5571 expressions=self._parse_limit_by(), 5572 ), 5573 comments=comments, 5574 ) 5575 5576 return limit_exp 5577 5578 if self._match(TokenType.FETCH): 5579 direction = ( 5580 self._prev.text.upper() 5581 if self._match_set((TokenType.FIRST, TokenType.NEXT)) 5582 else "FIRST" 5583 ) 5584 5585 count = self._parse_field(tokens=self.FETCH_TOKENS) 5586 5587 return self.expression( 5588 exp.Fetch( 5589 direction=direction, count=count, limit_options=self._parse_limit_options() 5590 ) 5591 ) 5592 5593 return this 5594 5595 def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None: 5596 if not self._match(TokenType.OFFSET): 5597 return this 5598 5599 count = self._parse_term() 5600 self._match_set((TokenType.ROW, TokenType.ROWS)) 5601 5602 return self.expression( 5603 exp.Offset(this=this, expression=count, expressions=self._parse_limit_by()) 5604 ) 5605 5606 def _can_parse_limit_or_offset(self) -> bool: 5607 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 5608 return False 5609 5610 index = self._index 5611 result = bool( 5612 self._try_parse(self._parse_limit, retreat=True) 5613 or self._try_parse(self._parse_offset, retreat=True) 5614 ) 5615 self._retreat(index) 5616 5617 # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset 5618 if self._next.token_type == TokenType.MATCH_CONDITION: 5619 result = False 5620 5621 return result 5622 5623 def _can_parse_named_window(self) -> bool: 5624 # `WINDOW` is in ID_VAR_TOKENS so it could be mistakenly consumed as an implicit alias. 5625 # Refuse only when the following tokens look like a named-window clause: `WINDOW <id> AS (`. 5626 if not self._match(TokenType.WINDOW, advance=False): 5627 return False 5628 5629 name = self._tokens[self._index + 1] if self._index + 1 < len(self._tokens) else None 5630 if name is None or name.token_type not in self.ID_VAR_TOKENS: 5631 return False 5632 5633 alias_tok = self._tokens[self._index + 2] if self._index + 2 < len(self._tokens) else None 5634 if alias_tok is None or alias_tok.token_type != TokenType.ALIAS: 5635 return False 5636 5637 body = self._tokens[self._index + 3] if self._index + 3 < len(self._tokens) else None 5638 return body is not None and body.token_type == TokenType.L_PAREN 5639 5640 def _parse_limit_by(self) -> list[exp.Expr] | None: 5641 return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None 5642 5643 def _parse_locks(self) -> list[exp.Lock]: 5644 locks = [] 5645 while True: 5646 update, key = None, None 5647 if self._match_text_seq("FOR", "UPDATE"): 5648 update = True 5649 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 5650 "LOCK", "IN", "SHARE", "MODE" 5651 ): 5652 update = False 5653 elif self._match_text_seq("FOR", "KEY", "SHARE"): 5654 update, key = False, True 5655 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 5656 update, key = True, True 5657 else: 5658 break 5659 5660 expressions = None 5661 if self._match_text_seq("OF"): 5662 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 5663 5664 wait: bool | exp.Expr | None = None 5665 if self._match_text_seq("NOWAIT"): 5666 wait = True 5667 elif self._match_text_seq("WAIT"): 5668 wait = self._parse_primary() 5669 elif self._match_text_seq("SKIP", "LOCKED"): 5670 wait = False 5671 5672 locks.append( 5673 self.expression( 5674 exp.Lock(update=update, expressions=expressions, wait=wait, key=key) 5675 ) 5676 ) 5677 5678 return locks 5679 5680 def parse_set_operation( 5681 self, this: exp.Expr | None, consume_pipe: bool = False 5682 ) -> exp.Expr | None: 5683 start = self._index 5684 _, side_token, kind_token = self._parse_join_parts() 5685 5686 side = side_token.text if side_token else None 5687 kind = kind_token.text if kind_token else None 5688 5689 if not self._match_set(self.SET_OPERATIONS): 5690 self._retreat(start) 5691 return None 5692 5693 token_type = self._prev.token_type 5694 5695 if token_type == TokenType.UNION: 5696 operation: type[exp.SetOperation] = exp.Union 5697 elif token_type == TokenType.EXCEPT: 5698 operation = exp.Except 5699 else: 5700 operation = exp.Intersect 5701 5702 comments = self._prev.comments 5703 5704 if self._match(TokenType.DISTINCT): 5705 distinct: bool | None = True 5706 elif self._match(TokenType.ALL): 5707 distinct = False 5708 else: 5709 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5710 if distinct is None: 5711 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5712 5713 by_name = ( 5714 self._match_text_seq("BY", "NAME") 5715 or self._match_text_seq("STRICT", "CORRESPONDING") 5716 or None 5717 ) 5718 if self._match_text_seq("CORRESPONDING"): 5719 by_name = True 5720 if not side and not kind: 5721 kind = "INNER" 5722 5723 on_column_list = None 5724 if by_name and self._match_texts(("ON", "BY")): 5725 on_column_list = self._parse_wrapped_csv(self._parse_column) 5726 5727 expression = self._parse_select( 5728 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5729 ) 5730 5731 return self.expression( 5732 operation( 5733 this=this, 5734 distinct=distinct, 5735 by_name=by_name, 5736 expression=expression, 5737 side=side, 5738 kind=kind, 5739 on=on_column_list, 5740 ), 5741 comments=comments, 5742 ) 5743 5744 def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None: 5745 while this: 5746 setop = self.parse_set_operation(this) 5747 if not setop: 5748 break 5749 this = setop 5750 5751 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 5752 expression = this.expression 5753 5754 if expression: 5755 for arg in self.SET_OP_MODIFIERS: 5756 expr = expression.args.get(arg) 5757 if expr: 5758 this.set(arg, expr.pop()) 5759 5760 return this 5761 5762 def _parse_expression(self) -> exp.Expr | None: 5763 return self._parse_alias(self._parse_assignment()) 5764 5765 def _parse_assignment(self) -> exp.Expr | None: 5766 this = self._parse_disjunction() 5767 if not this and self._next.token_type in self.ASSIGNMENT: 5768 # This allows us to parse <non-identifier token> := <expr> 5769 this = exp.column( 5770 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 5771 ) 5772 5773 while self._match_set(self.ASSIGNMENT): 5774 if isinstance(this, exp.Column) and len(this.parts) == 1: 5775 this = this.this 5776 5777 comments = self._prev_comments 5778 this = self.expression( 5779 self.ASSIGNMENT[self._prev.token_type]( 5780 this=this, expression=self._parse_assignment() 5781 ), 5782 comments=comments, 5783 ) 5784 5785 return this 5786 5787 def _parse_disjunction(self) -> exp.Expr | None: 5788 this = self._parse_conjunction() 5789 while self._match_set(self.DISJUNCTION): 5790 comments = self._prev_comments 5791 this = self.expression( 5792 self.DISJUNCTION[self._prev.token_type]( 5793 this=this, expression=self._parse_conjunction() 5794 ), 5795 comments=comments, 5796 ) 5797 return this 5798 5799 def _parse_conjunction(self) -> exp.Expr | None: 5800 this = self._parse_equality() 5801 while self._match_set(self.CONJUNCTION): 5802 comments = self._prev_comments 5803 this = self.expression( 5804 self.CONJUNCTION[self._prev.token_type]( 5805 this=this, expression=self._parse_equality() 5806 ), 5807 comments=comments, 5808 ) 5809 return this 5810 5811 def _parse_equality(self) -> exp.Expr | None: 5812 this = self._parse_comparison() 5813 while self._match_set(self.EQUALITY): 5814 comments = self._prev_comments 5815 this = self.expression( 5816 self.EQUALITY[self._prev.token_type]( 5817 this=this, expression=self._parse_comparison() 5818 ), 5819 comments=comments, 5820 ) 5821 return this 5822 5823 def _parse_comparison(self) -> exp.Expr | None: 5824 this = self._parse_range() 5825 while self._match_set(self.COMPARISON): 5826 comments = self._prev_comments 5827 this = self.expression( 5828 self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()), 5829 comments=comments, 5830 ) 5831 return this 5832 5833 def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5834 this = this or self._parse_bitwise() 5835 negate = self._match(TokenType.NOT) 5836 5837 if self._match_set(self.RANGE_PARSERS): 5838 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 5839 if not expression: 5840 return this 5841 5842 this = expression 5843 elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)): 5844 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5845 5846 # Postgres supports ISNULL and NOTNULL for conditions. 5847 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 5848 if self._match(TokenType.NOTNULL): 5849 this = self.expression(exp.Is(this=this, expression=exp.Null())) 5850 this = self.expression(exp.Not(this=this)) 5851 5852 if negate: 5853 this = self._negate_range(this) 5854 5855 if self._match(TokenType.IS): 5856 this = self._parse_is(this) 5857 5858 return this 5859 5860 def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None: 5861 if not this: 5862 return this 5863 5864 expression = this.this if isinstance(this, exp.Escape) else this 5865 if isinstance(expression, (exp.Like, exp.ILike)): 5866 expression.set("negate", True) 5867 return this 5868 5869 return self.expression(exp.Not(this=this)) 5870 5871 def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None: 5872 index = self._index - 1 5873 negate = self._match(TokenType.NOT) 5874 5875 if self._match_text_seq("DISTINCT", "FROM"): 5876 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 5877 return self.expression(klass(this=this, expression=self._parse_bitwise())) 5878 5879 if self._match(TokenType.JSON): 5880 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5881 5882 if self._match_text_seq("WITH"): 5883 _with = True 5884 elif self._match_text_seq("WITHOUT"): 5885 _with = False 5886 else: 5887 _with = None 5888 5889 unique = self._match(TokenType.UNIQUE) 5890 self._match_text_seq("KEYS") 5891 expression: exp.Expr | None = self.expression( 5892 exp.JSON(this=kind, with_=_with, unique=unique) 5893 ) 5894 else: 5895 expression = self._parse_null() or self._parse_bitwise() 5896 if not expression: 5897 self._retreat(index) 5898 return None 5899 5900 this = self.expression(exp.Is(this=this, expression=expression)) 5901 this = self.expression(exp.Not(this=this)) if negate else this 5902 return self._parse_column_ops(this) 5903 5904 def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In: 5905 unnest = self._parse_unnest(with_alias=False) 5906 if unnest: 5907 this = self.expression(exp.In(this=this, unnest=unnest)) 5908 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5909 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5910 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5911 5912 if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query): 5913 this = self.expression( 5914 exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False)) 5915 ) 5916 else: 5917 this = self.expression(exp.In(this=this, expressions=expressions)) 5918 5919 if matched_l_paren: 5920 self._match_r_paren(this) 5921 elif not self._match(TokenType.R_BRACKET, expression=this): 5922 self.raise_error("Expecting ]") 5923 else: 5924 this = self.expression(exp.In(this=this, field=self._parse_column())) 5925 5926 return this 5927 5928 def _parse_between(self, this: exp.Expr | None) -> exp.Between: 5929 symmetric = None 5930 if self._match_text_seq("SYMMETRIC"): 5931 symmetric = True 5932 elif self._match_text_seq("ASYMMETRIC"): 5933 symmetric = False 5934 5935 low = self._parse_bitwise() 5936 self._match(TokenType.AND) 5937 high = self._parse_bitwise() 5938 5939 return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric)) 5940 5941 def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None: 5942 if not self._match(TokenType.ESCAPE): 5943 return this 5944 return self.expression( 5945 exp.Escape(this=this, expression=self._parse_string() or self._parse_null()) 5946 ) 5947 5948 def _parse_interval_span(self, this: exp.Expr) -> exp.Interval: 5949 # handle day-time format interval span with omitted units: 5950 # INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`> 5951 interval_span_units_omitted = None 5952 if ( 5953 this 5954 and this.is_string 5955 and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT 5956 and exp.INTERVAL_DAY_TIME_RE.match(this.name) 5957 ): 5958 index = self._index 5959 5960 # Var "TO" Var 5961 first_unit = self._parse_var(any_token=True, upper=True) 5962 second_unit = None 5963 if first_unit and self._match_text_seq("TO"): 5964 second_unit = self._parse_var(any_token=True, upper=True) 5965 5966 interval_span_units_omitted = not (first_unit and second_unit) 5967 5968 self._retreat(index) 5969 5970 if interval_span_units_omitted: 5971 unit = None 5972 else: 5973 unit = self._parse_function() 5974 if not unit and ( 5975 self._curr.token_type == TokenType.VAR 5976 or self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS 5977 ): 5978 unit = self._parse_var(any_token=True, upper=True) 5979 5980 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5981 # each INTERVAL expression into this canonical form so it's easy to transpile 5982 if this and this.is_number: 5983 this = exp.Literal.string(this.to_py()) 5984 elif this and this.is_string: 5985 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5986 if parts and unit: 5987 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5988 unit = None 5989 self._retreat(self._index - 1) 5990 5991 if len(parts) == 1: 5992 this = exp.Literal.string(parts[0][0]) 5993 unit = self.expression(exp.Var(this=parts[0][1].upper())) 5994 5995 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5996 unit = self.expression( 5997 exp.IntervalSpan( 5998 this=unit, 5999 expression=self._parse_function() 6000 or self._parse_var(any_token=True, upper=True), 6001 ) 6002 ) 6003 6004 return self.expression(exp.Interval(this=this, unit=unit)) 6005 6006 def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None: 6007 index = self._index 6008 6009 if not self._match(TokenType.INTERVAL) and require_interval: 6010 return None 6011 6012 if self._match(TokenType.STRING, advance=False): 6013 this = self._parse_primary() 6014 else: 6015 this = self._parse_term() 6016 6017 if not this or ( 6018 isinstance(this, exp.Column) 6019 and not this.table 6020 and not this.this.quoted 6021 and self._curr 6022 and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS 6023 ): 6024 self._retreat(index) 6025 return None 6026 6027 interval = self._parse_interval_span(this) 6028 6029 index = self._index 6030 self._match(TokenType.PLUS) 6031 6032 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 6033 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 6034 return self.expression(exp.Add(this=interval, expression=self._parse_interval(False))) 6035 6036 self._retreat(index) 6037 return interval 6038 6039 def _parse_bitwise(self) -> exp.Expr | None: 6040 this = self._parse_term() 6041 6042 while True: 6043 if self._match_set(self.BITWISE): 6044 this = self.expression( 6045 self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term()) 6046 ) 6047 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 6048 this = self.expression( 6049 exp.DPipe( 6050 this=this, 6051 expression=self._parse_term(), 6052 safe=not self.dialect.STRICT_STRING_CONCAT, 6053 ) 6054 ) 6055 elif self._match(TokenType.DQMARK): 6056 this = self.expression( 6057 exp.Coalesce(this=this, expressions=ensure_list(self._parse_term())) 6058 ) 6059 elif self._match_pair(TokenType.LT, TokenType.LT): 6060 this = self.expression( 6061 exp.BitwiseLeftShift(this=this, expression=self._parse_term()) 6062 ) 6063 elif self._match_pair(TokenType.GT, TokenType.GT): 6064 this = self.expression( 6065 exp.BitwiseRightShift(this=this, expression=self._parse_term()) 6066 ) 6067 else: 6068 break 6069 6070 return this 6071 6072 def _parse_term(self) -> exp.Expr | None: 6073 this = self._parse_factor() 6074 6075 while self._match_set(self.TERM): 6076 klass = self.TERM[self._prev.token_type] 6077 comments = self._prev_comments 6078 expression = self._parse_factor() 6079 6080 this = self.expression(klass(this=this, expression=expression), comments=comments) 6081 6082 if isinstance(this, exp.Collate): 6083 expr = this.expression 6084 6085 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 6086 # fallback to Identifier / Var 6087 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 6088 ident = expr.this 6089 if isinstance(ident, exp.Identifier): 6090 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 6091 6092 return this 6093 6094 def _parse_factor(self) -> exp.Expr | None: 6095 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 6096 this = self._parse_at_time_zone(parse_method()) 6097 6098 while self._match_set(self.FACTOR): 6099 klass = self.FACTOR[self._prev.token_type] 6100 comments = self._prev_comments 6101 expression = parse_method() 6102 6103 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 6104 self._retreat(self._index - 1) 6105 return this 6106 6107 this = self.expression(klass(this=this, expression=expression), comments=comments) 6108 6109 if isinstance(this, exp.Div): 6110 this.set("typed", self.dialect.TYPED_DIVISION) 6111 this.set("safe", self.dialect.SAFE_DIVISION) 6112 6113 return this 6114 6115 def _parse_exponent(self) -> exp.Expr | None: 6116 this = self._parse_unary() 6117 while self._match_set(self.EXPONENT): 6118 comments = self._prev_comments 6119 this = self.expression( 6120 self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()), 6121 comments=comments, 6122 ) 6123 return this 6124 6125 def _parse_unary(self) -> exp.Expr | None: 6126 if self._match_set(self.UNARY_PARSERS): 6127 return self.UNARY_PARSERS[self._prev.token_type](self) 6128 return self._parse_type() 6129 6130 def _parse_type( 6131 self, parse_interval: bool = True, fallback_to_identifier: bool = False 6132 ) -> exp.Expr | None: 6133 if not fallback_to_identifier and (atom := self._parse_atom()) is not None: 6134 return atom 6135 6136 if interval := parse_interval and self._parse_interval(): 6137 return self._parse_column_ops(interval) 6138 6139 index = self._index 6140 data_type = self._parse_types(check_func=True, allow_identifiers=False) 6141 6142 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 6143 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 6144 if isinstance(data_type, exp.Cast): 6145 # This constructor can contain ops directly after it, for instance struct unnesting: 6146 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 6147 return self._parse_column_ops(data_type) 6148 6149 if data_type: 6150 index2 = self._index 6151 this = self._parse_primary() 6152 6153 if isinstance(this, exp.Literal): 6154 literal = this.name 6155 this = self._parse_column_ops(this) 6156 6157 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 6158 if parser: 6159 return parser(self, this, data_type) 6160 6161 if ( 6162 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 6163 and data_type.is_type(exp.DType.TIMESTAMP) 6164 and TIME_ZONE_RE.search(literal) 6165 ): 6166 data_type = exp.DType.TIMESTAMPTZ.into_expr() 6167 6168 return self.expression(exp.Cast(this=this, to=data_type)) 6169 6170 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 6171 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 6172 # 6173 # If the index difference here is greater than 1, that means the parser itself must have 6174 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 6175 # 6176 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 6177 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 6178 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 6179 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 6180 # 6181 # In these cases, we don't really want to return the converted type, but instead retreat 6182 # and try to parse a Column or Identifier in the section below. 6183 if data_type.expressions and index2 - index > 1: 6184 self._retreat(index2) 6185 return self._parse_column_ops(data_type) 6186 6187 self._retreat(index) 6188 6189 if fallback_to_identifier: 6190 return self._parse_id_var() 6191 6192 return self._parse_column() 6193 6194 def _parse_type_size(self) -> exp.DataTypeParam | None: 6195 this = self._parse_type() 6196 if not this: 6197 return None 6198 6199 if isinstance(this, exp.Column) and not this.table: 6200 this = exp.var(this.name.upper()) 6201 6202 return self.expression( 6203 exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True)) 6204 ) 6205 6206 def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None: 6207 type_name = identifier.name 6208 6209 while self._match(TokenType.DOT): 6210 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 6211 6212 return exp.DataType.from_str(type_name, dialect=self.dialect, udt=True) 6213 6214 def _parse_types( 6215 self, 6216 check_func: bool = False, 6217 schema: bool = False, 6218 allow_identifiers: bool = True, 6219 with_collation: bool = False, 6220 ) -> exp.Expr | None: 6221 index = self._index 6222 this: exp.Expr | None = None 6223 6224 if self._match_set(self.TYPE_TOKENS): 6225 type_token = self._prev.token_type 6226 else: 6227 type_token = None 6228 identifier = allow_identifiers and self._parse_id_var( 6229 any_token=False, tokens=(TokenType.VAR,) 6230 ) 6231 if isinstance(identifier, exp.Identifier): 6232 try: 6233 tokens = self.dialect.tokenize(identifier.name) 6234 except TokenError: 6235 tokens = None 6236 6237 if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS: 6238 if len(tokens) > 1: 6239 return exp.DataType.from_str(identifier.name, dialect=self.dialect) 6240 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 6241 this = self._parse_user_defined_type(identifier) 6242 else: 6243 self._retreat(self._index - 1) 6244 return None 6245 else: 6246 return None 6247 6248 if type_token == TokenType.PSEUDO_TYPE: 6249 return self.expression(exp.PseudoType(this=self._prev.text.upper())) 6250 6251 if type_token == TokenType.OBJECT_IDENTIFIER: 6252 return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper())) 6253 6254 # https://materialize.com/docs/sql/types/map/ 6255 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 6256 key_type = self._parse_types( 6257 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6258 ) 6259 if not self._match(TokenType.FARROW): 6260 self._retreat(index) 6261 return None 6262 6263 value_type = self._parse_types( 6264 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6265 ) 6266 if not self._match(TokenType.R_BRACKET): 6267 self._retreat(index) 6268 return None 6269 6270 return exp.DataType( 6271 this=exp.DType.MAP, 6272 expressions=[key_type, value_type], 6273 nested=True, 6274 ) 6275 6276 nested = type_token in self.NESTED_TYPE_TOKENS 6277 is_struct = type_token in self.STRUCT_TYPE_TOKENS 6278 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 6279 expressions = None 6280 maybe_func = False 6281 6282 if self._match(TokenType.L_PAREN): 6283 if is_struct: 6284 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6285 elif nested: 6286 expressions = self._parse_csv( 6287 lambda: self._parse_types( 6288 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 6289 ) 6290 ) 6291 if type_token == TokenType.NULLABLE and len(expressions) == 1: 6292 this = expressions[0] 6293 this.set("nullable", True) 6294 self._match_r_paren() 6295 return this 6296 elif type_token in self.ENUM_TYPE_TOKENS: 6297 expressions = self._parse_csv(self._parse_equality) 6298 elif type_token == TokenType.JSON: 6299 # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value) 6300 # https://clickhouse.com/docs/sql-reference/data-types/newjson 6301 expressions = self._parse_csv(self._parse_json_type_arg) 6302 elif is_aggregate: 6303 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 6304 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 6305 ) 6306 if not func_or_ident: 6307 return None 6308 expressions = [func_or_ident] 6309 if self._match(TokenType.COMMA): 6310 expressions.extend( 6311 self._parse_csv( 6312 lambda: self._parse_types( 6313 check_func=check_func, 6314 schema=schema, 6315 allow_identifiers=allow_identifiers, 6316 ) 6317 ) 6318 ) 6319 else: 6320 expressions = self._parse_csv(self._parse_type_size) 6321 6322 # https://docs.snowflake.com/en/sql-reference/data-types-vector 6323 if type_token == TokenType.VECTOR and len(expressions) == 2: 6324 expressions = self._parse_vector_expressions(expressions) 6325 6326 if not self._match(TokenType.R_PAREN): 6327 self._retreat(index) 6328 return None 6329 6330 maybe_func = True 6331 6332 values: list[exp.Expr] | None = None 6333 6334 if nested and self._match(TokenType.LT): 6335 if is_struct: 6336 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 6337 else: 6338 expressions = self._parse_csv( 6339 lambda: self._parse_types( 6340 check_func=check_func, 6341 schema=schema, 6342 allow_identifiers=allow_identifiers, 6343 with_collation=True, 6344 ) 6345 ) 6346 6347 if not self._match(TokenType.GT): 6348 self.raise_error("Expecting >") 6349 6350 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 6351 values = self._parse_csv(self._parse_disjunction) 6352 if not values and is_struct: 6353 values = None 6354 self._retreat(self._index - 1) 6355 else: 6356 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 6357 6358 if type_token in self.TIMESTAMPS: 6359 if self._match_text_seq("WITH", "TIME", "ZONE"): 6360 maybe_func = False 6361 tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ 6362 this = exp.DataType(this=tz_type, expressions=expressions) 6363 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 6364 maybe_func = False 6365 this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions) 6366 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 6367 maybe_func = False 6368 elif type_token == TokenType.INTERVAL: 6369 if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS: 6370 unit = self._parse_var(upper=True) 6371 if self._match_text_seq("TO"): 6372 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 6373 6374 this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit)))) 6375 else: 6376 this = self.expression(exp.DataType(this=exp.DType.INTERVAL)) 6377 elif type_token == TokenType.VOID: 6378 this = exp.DataType(this=exp.DType.NULL) 6379 6380 if maybe_func and check_func: 6381 index2 = self._index 6382 peek = self._parse_string() 6383 6384 if not peek: 6385 self._retreat(index) 6386 return None 6387 6388 self._retreat(index2) 6389 6390 if not this: 6391 assert type_token is not None 6392 if self._match_text_seq("UNSIGNED"): 6393 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 6394 if not unsigned_type_token: 6395 self.raise_error(f"Cannot convert {type_token.name} to unsigned.") 6396 6397 type_token = unsigned_type_token or type_token 6398 6399 # NULLABLE without parentheses can be a column (Presto/Trino) 6400 if type_token == TokenType.NULLABLE and not expressions: 6401 self._retreat(index) 6402 return None 6403 6404 this = exp.DataType( 6405 this=exp.DType[type_token.name], 6406 expressions=expressions, 6407 nested=nested, 6408 ) 6409 6410 # Empty arrays/structs are allowed 6411 if values is not None: 6412 cls = exp.Struct if is_struct else exp.Array 6413 this = exp.cast(cls(expressions=values), this, copy=False) 6414 6415 elif expressions: 6416 this.set("expressions", expressions) 6417 6418 # https://materialize.com/docs/sql/types/list/#type-name 6419 while self._match(TokenType.LIST): 6420 this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True) 6421 6422 index = self._index 6423 6424 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 6425 matched_array = self._match(TokenType.ARRAY) 6426 6427 while self._curr: 6428 datatype_token = self._prev.token_type 6429 matched_l_bracket = self._match(TokenType.L_BRACKET) 6430 6431 if (not matched_l_bracket and not matched_array) or ( 6432 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 6433 ): 6434 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 6435 # not to be confused with the fixed size array parsing 6436 break 6437 6438 matched_array = False 6439 values = self._parse_csv(self._parse_disjunction) or None 6440 if ( 6441 values 6442 and not schema 6443 and ( 6444 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS 6445 or datatype_token == TokenType.ARRAY 6446 or not self._match(TokenType.R_BRACKET, advance=False) 6447 ) 6448 ): 6449 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 6450 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 6451 self._retreat(index) 6452 break 6453 6454 this = exp.DataType( 6455 this=exp.DType.ARRAY, expressions=[this], values=values, nested=True 6456 ) 6457 self._match(TokenType.R_BRACKET) 6458 6459 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType): 6460 converter = self.TYPE_CONVERTERS.get(this.this) 6461 if converter: 6462 this = converter(t.cast(exp.DataType, this)) 6463 6464 if with_collation and isinstance(this, exp.DataType) and self._match(TokenType.COLLATE): 6465 this.set("collate", self._parse_identifier() or self._parse_column()) 6466 6467 return this 6468 6469 def _parse_json_type_arg(self) -> exp.Expr | None: 6470 """Parse a single argument to ClickHouse's JSON type.""" 6471 6472 # SKIP col or SKIP REGEXP 'pattern' 6473 if self._match_text_seq("SKIP"): 6474 regexp = self._match(TokenType.RLIKE) 6475 arg = self._parse_column() 6476 if isinstance(arg, exp.Column): 6477 arg = arg.to_dot() 6478 return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg)) 6479 6480 param_or_col = self._parse_column() 6481 if not isinstance(param_or_col, exp.Column): 6482 return None 6483 6484 # Parameter: name=value (e.g., max_dynamic_paths=2) 6485 if len(param_or_col.parts) == 1 and self._match(TokenType.EQ): 6486 param = param_or_col.name 6487 value = self._parse_primary() 6488 return self.expression(exp.EQ(this=exp.var(param), expression=value)) 6489 6490 # Column type hint: col_name Type 6491 col = param_or_col.to_dot() 6492 kind = self._parse_types(check_func=False, allow_identifiers=False) 6493 return self.expression(exp.ColumnDef(this=col, kind=kind)) 6494 6495 def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]: 6496 return [exp.DataType.from_str(expressions[0].name, dialect=self.dialect), *expressions[1:]] 6497 6498 def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None: 6499 index = self._index 6500 6501 if ( 6502 self._curr 6503 and self._next 6504 and self._curr.token_type in self.TYPE_TOKENS 6505 and self._next.token_type in self.TYPE_TOKENS 6506 ): 6507 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 6508 # type token. Without this, the list will be parsed as a type and we'll eventually crash 6509 this = self._parse_id_var() 6510 else: 6511 this = ( 6512 self._parse_type(parse_interval=False, fallback_to_identifier=True) 6513 or self._parse_id_var() 6514 ) 6515 6516 self._match(TokenType.COLON) 6517 6518 if ( 6519 type_required 6520 and not isinstance(this, exp.DataType) 6521 and not self._match_set(self.TYPE_TOKENS, advance=False) 6522 ): 6523 self._retreat(index) 6524 return self._parse_types() 6525 6526 return self._parse_column_def(this) 6527 6528 def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None: 6529 if not self._match_text_seq("AT", "TIME", "ZONE"): 6530 return this 6531 return self._parse_at_time_zone( 6532 self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary())) 6533 ) 6534 6535 def _parse_atom(self) -> exp.Expr | None: 6536 if ( 6537 self._curr.token_type in self.IDENTIFIER_TOKENS 6538 and (column := self._parse_column()) is not None 6539 ): 6540 return column 6541 6542 token = self._curr 6543 token_type = token.token_type 6544 6545 if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)): 6546 return None 6547 6548 next_type = self._next.token_type 6549 6550 if ( 6551 next_type in self.COLUMN_OPERATORS 6552 or next_type in self.COLUMN_POSTFIX_TOKENS 6553 or (token_type == TokenType.STRING and next_type == TokenType.STRING) 6554 ): 6555 return None 6556 6557 self._advance() 6558 return primary_parser(self, token) 6559 6560 def _parse_column(self) -> exp.Expr | None: 6561 column: exp.Expr | None = self._parse_column_parts_fast() 6562 if column is None: 6563 this = self._parse_column_reference() 6564 if not this: 6565 this = self._parse_bracket(this) 6566 column = self._parse_column_ops(this) if this else this 6567 6568 if column: 6569 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS: 6570 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 6571 if self.COLON_IS_VARIANT_EXTRACT: 6572 column = self._parse_colon_as_variant_extract(column) 6573 6574 return column 6575 6576 def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None: 6577 """Fast path for simple column and dot references (a, a.b, ...). 6578 6579 Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks 6580 that nothing complex follows. If it does, retreats and returns None so 6581 the slow path can handle it. For >4 parts, wraps in exp.Dot nodes. 6582 """ 6583 index = self._index 6584 parts: list[exp.Identifier] | None = None 6585 all_comments: list[str] | None = None 6586 6587 while self._match_set(self.IDENTIFIER_TOKENS): 6588 token = self._prev 6589 comments = self._prev_comments 6590 6591 if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS: 6592 self._retreat(index) 6593 return None 6594 6595 has_dot = self._match(TokenType.DOT) 6596 curr_tt = self._curr.token_type 6597 6598 if not has_dot: 6599 if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS: 6600 self._retreat(index) 6601 return None 6602 elif curr_tt not in self.IDENTIFIER_TOKENS: 6603 self._retreat(index) 6604 return None 6605 6606 if parts is None: 6607 parts = [] 6608 6609 if comments: 6610 if all_comments is None: 6611 all_comments = [] 6612 all_comments.extend(comments) 6613 self._prev_comments = [] 6614 6615 parts.append( 6616 self.expression( 6617 exp.Identifier( 6618 this=token.text, quoted=token.token_type == TokenType.IDENTIFIER 6619 ), 6620 token, 6621 ) 6622 ) 6623 6624 if not has_dot: 6625 break 6626 6627 if parts is None: 6628 return None 6629 6630 n = len(parts) 6631 6632 if n == 1: 6633 column: exp.Column | exp.Dot = exp.Column(this=parts[0]) 6634 elif n == 2: 6635 column = exp.Column(this=parts[1], table=parts[0]) 6636 elif n == 3: 6637 column = exp.Column(this=parts[2], table=parts[1], db=parts[0]) 6638 else: 6639 column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0]) 6640 6641 for i in range(4, n): 6642 column = exp.Dot(this=column, expression=parts[i]) 6643 6644 if all_comments: 6645 column.add_comments(all_comments) 6646 6647 return column 6648 6649 def _parse_column_reference(self) -> exp.Expr | None: 6650 this = self._parse_field() 6651 if ( 6652 not this 6653 and self._match(TokenType.VALUES, advance=False) 6654 and self.VALUES_FOLLOWED_BY_PAREN 6655 and (not self._next or self._next.token_type != TokenType.L_PAREN) 6656 ): 6657 this = self._parse_id_var() 6658 6659 if isinstance(this, exp.Identifier): 6660 # We bubble up comments from the Identifier to the Column 6661 this = self.expression(exp.Column(this=this), comments=this.pop_comments()) 6662 6663 return this 6664 6665 def _build_json_extract( 6666 self, 6667 this: exp.Expr | None, 6668 path_parts: list[exp.JSONPathPart], 6669 escape: bool | None, 6670 ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]: 6671 if len(path_parts) > 1: 6672 this = self.expression( 6673 exp.JSONExtract( 6674 this=this, 6675 expression=exp.JSONPath(expressions=path_parts, escape=escape), 6676 variant_extract=True, 6677 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 6678 ) 6679 ) 6680 path_parts = [exp.JSONPathRoot()] 6681 6682 return this, path_parts 6683 6684 def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None: 6685 path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()] 6686 escape = None 6687 6688 while self._match(TokenType.COLON): 6689 key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6690 6691 if key: 6692 if isinstance(key, exp.Identifier) and key.quoted: 6693 escape = True 6694 path_parts.append(exp.JSONPathKey(this=key.name)) 6695 6696 while True: 6697 if self._match(TokenType.DOT): 6698 next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,)) 6699 6700 if next_key: 6701 if isinstance(next_key, exp.Identifier) and next_key.quoted: 6702 escape = True 6703 path_parts.append(exp.JSONPathKey(this=next_key.name)) 6704 elif self._match(TokenType.L_BRACKET): 6705 bracket_expr = self._parse_bracket_key_value() 6706 6707 if not self._match(TokenType.R_BRACKET): 6708 self.raise_error("Expected ]") 6709 6710 if bracket_expr: 6711 if bracket_expr.is_string: 6712 path_parts.append(exp.JSONPathKey(this=bracket_expr.name)) 6713 escape = True 6714 elif bracket_expr.is_star: 6715 path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard())) 6716 elif bracket_expr.is_number: 6717 path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py())) 6718 else: 6719 this, path_parts = self._build_json_extract(this, path_parts, escape) 6720 escape = None 6721 6722 this = self.expression( 6723 exp.Bracket( 6724 this=this, expressions=[bracket_expr], json_access=True 6725 ), 6726 ) 6727 6728 elif self._match(TokenType.DCOLON): 6729 this, path_parts = self._build_json_extract(this, path_parts, escape) 6730 escape = None 6731 6732 cast_type = self._parse_types() 6733 if cast_type: 6734 this = self.expression(exp.Cast(this=this, to=cast_type)) 6735 else: 6736 self.raise_error("Expected type after '::'") 6737 else: 6738 break 6739 6740 this, _ = self._build_json_extract(this, path_parts, escape) 6741 6742 return this 6743 6744 def _parse_dcolon(self) -> exp.Expr | None: 6745 return self._parse_types() 6746 6747 def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None: 6748 while self._curr.token_type in self.BRACKETS: 6749 this = self._parse_bracket(this) 6750 6751 column_operators = self.COLUMN_OPERATORS 6752 cast_column_operators = self.CAST_COLUMN_OPERATORS 6753 while self._curr: 6754 op_token = self._curr.token_type 6755 6756 if op_token not in column_operators: 6757 break 6758 op = column_operators[op_token] 6759 self._advance() 6760 6761 if op_token in cast_column_operators: 6762 field = self._parse_dcolon() 6763 if not field: 6764 self.raise_error("Expected type") 6765 elif op and self._curr: 6766 field = self._parse_column_reference() or self._parse_bitwise() 6767 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 6768 field = self._parse_column_ops(field) 6769 else: 6770 field = self._parse_field(any_token=True, anonymous_func=True) 6771 6772 # Function calls can be qualified, e.g., x.y.FOO() 6773 # This converts the final AST to a series of Dots leading to the function call 6774 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 6775 if isinstance(field, (exp.Func, exp.Window)) and this: 6776 this = this.transform( 6777 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 6778 ) 6779 6780 if op: 6781 this = op(self, this, field) 6782 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 6783 this = self.expression( 6784 exp.Column( 6785 this=field, 6786 table=this.this, 6787 db=this.args.get("table"), 6788 catalog=this.args.get("db"), 6789 ), 6790 comments=this.comments, 6791 ) 6792 elif isinstance(field, exp.Window): 6793 # Move the exp.Dot's to the window's function 6794 window_func = self.expression(exp.Dot(this=this, expression=field.this)) 6795 field.set("this", window_func) 6796 this = field 6797 else: 6798 this = self.expression(exp.Dot(this=this, expression=field)) 6799 6800 if field and field.comments: 6801 t.cast(exp.Expr, this).add_comments(field.pop_comments()) 6802 6803 this = self._parse_bracket(this) 6804 6805 return this 6806 6807 def _parse_paren(self) -> exp.Expr | None: 6808 if not self._match(TokenType.L_PAREN): 6809 return None 6810 6811 comments = self._prev_comments 6812 query = self._parse_select() 6813 6814 if query: 6815 expressions = [query] 6816 else: 6817 expressions = self._parse_expressions() 6818 6819 this = seq_get(expressions, 0) 6820 6821 if not this and self._match(TokenType.R_PAREN, advance=False): 6822 this = self.expression(exp.Tuple()) 6823 elif isinstance(this, exp.UNWRAPPED_QUERIES): 6824 this = self._parse_subquery(this=this, parse_alias=False) 6825 elif isinstance(this, (exp.Subquery, exp.Values)): 6826 this = self._parse_subquery( 6827 this=self._parse_query_modifiers(self._parse_set_operations(this)), 6828 parse_alias=False, 6829 ) 6830 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 6831 this = self.expression(exp.Tuple(expressions=expressions)) 6832 else: 6833 this = self.expression(exp.Paren(this=this)) 6834 6835 if this: 6836 this.add_comments(comments) 6837 6838 self._match_r_paren(expression=this) 6839 6840 if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc): 6841 return self._parse_window(this) 6842 6843 return this 6844 6845 def _parse_primary(self) -> exp.Expr | None: 6846 if self._match_set(self.PRIMARY_PARSERS): 6847 token_type = self._prev.token_type 6848 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 6849 6850 if token_type == TokenType.STRING: 6851 expressions = [primary] 6852 while self._match(TokenType.STRING): 6853 expressions.append(exp.Literal.string(self._prev.text)) 6854 6855 if len(expressions) > 1: 6856 return self.expression( 6857 exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE) 6858 ) 6859 6860 return primary 6861 6862 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 6863 return exp.Literal.number(f"0.{self._prev.text}") 6864 6865 return self._parse_paren() 6866 6867 def _parse_field( 6868 self, 6869 any_token: bool = False, 6870 tokens: t.Collection[TokenType] | None = None, 6871 anonymous_func: bool = False, 6872 ) -> exp.Expr | None: 6873 if anonymous_func: 6874 field = ( 6875 self._parse_function(anonymous=anonymous_func, any_token=any_token) 6876 or self._parse_primary() 6877 ) 6878 else: 6879 field = self._parse_primary() or self._parse_function( 6880 anonymous=anonymous_func, any_token=any_token 6881 ) 6882 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 6883 6884 def _parse_function( 6885 self, 6886 functions: dict[str, t.Callable] | None = None, 6887 anonymous: bool = False, 6888 optional_parens: bool = True, 6889 any_token: bool = False, 6890 ) -> exp.Expr | None: 6891 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 6892 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 6893 fn_syntax = False 6894 if ( 6895 self._match(TokenType.L_BRACE, advance=False) 6896 and self._next 6897 and self._next.text.upper() == "FN" 6898 ): 6899 self._advance(2) 6900 fn_syntax = True 6901 6902 func = self._parse_function_call( 6903 functions=functions, 6904 anonymous=anonymous, 6905 optional_parens=optional_parens, 6906 any_token=any_token, 6907 ) 6908 6909 if fn_syntax: 6910 self._match(TokenType.R_BRACE) 6911 6912 return func 6913 6914 def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]: 6915 return self._parse_csv(lambda: self._parse_lambda(alias=alias)) 6916 6917 def _parse_function_call( 6918 self, 6919 functions: dict[str, t.Callable] | None = None, 6920 anonymous: bool = False, 6921 optional_parens: bool = True, 6922 any_token: bool = False, 6923 ) -> exp.Expr | None: 6924 if not self._curr: 6925 return None 6926 6927 comments = self._curr.comments 6928 prev = self._prev 6929 token = self._curr 6930 token_type = self._curr.token_type 6931 this: str | exp.Expr = self._curr.text 6932 upper = self._curr.text.upper() 6933 6934 after_dot = prev.token_type == TokenType.DOT 6935 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 6936 if ( 6937 optional_parens 6938 and parser 6939 and token_type not in self.INVALID_FUNC_NAME_TOKENS 6940 and not after_dot 6941 ): 6942 self._advance() 6943 return self._parse_window(parser(self)) 6944 6945 if self._next.token_type != TokenType.L_PAREN: 6946 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS and not after_dot: 6947 self._advance() 6948 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]()) 6949 6950 return None 6951 6952 if any_token: 6953 if token_type in self.RESERVED_TOKENS: 6954 return None 6955 elif token_type not in self.FUNC_TOKENS: 6956 return None 6957 6958 self._advance(2) 6959 6960 parser = self.FUNCTION_PARSERS.get(upper) 6961 if parser and not anonymous: 6962 result = parser(self) 6963 else: 6964 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 6965 6966 if subquery_predicate: 6967 expr = None 6968 if self._curr.token_type in self.SUBQUERY_TOKENS: 6969 expr = self._parse_select() 6970 self._match_r_paren() 6971 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 6972 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 6973 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 6974 self._advance(-1) 6975 expr = self._parse_bitwise() 6976 6977 if expr: 6978 return self.expression(subquery_predicate(this=expr), comments=comments) 6979 6980 if functions is None: 6981 functions = self.FUNCTIONS 6982 6983 function = functions.get(upper) 6984 known_function = function and not anonymous 6985 6986 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 6987 args = self._parse_function_args(alias) 6988 6989 post_func_comments = self._curr.comments if self._curr else None 6990 if known_function and post_func_comments: 6991 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 6992 # call we'll construct it as exp.Anonymous, even if it's "known" 6993 if any( 6994 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 6995 for comment in post_func_comments 6996 ): 6997 known_function = False 6998 6999 if alias and known_function: 7000 args = self._kv_to_prop_eq(args) 7001 7002 if known_function: 7003 func_builder = t.cast(t.Callable, function) 7004 7005 # mypyc compiled functions don't have __code__, so we use 7006 # try/except to check if func_builder accepts 'dialect'. 7007 try: 7008 func = func_builder(args) 7009 except TypeError: 7010 func = func_builder(args, dialect=self.dialect) 7011 7012 func = self.validate_expression(func, args) 7013 if self.dialect.PRESERVE_ORIGINAL_NAMES: 7014 func.meta["name"] = this 7015 7016 result = func 7017 else: 7018 if token_type == TokenType.IDENTIFIER: 7019 this = exp.Identifier(this=this, quoted=True).update_positions(token) 7020 7021 result = self.expression(exp.Anonymous(this=this, expressions=args)) 7022 7023 result = result.update_positions(token) 7024 7025 if isinstance(result, exp.Expr): 7026 result.add_comments(comments) 7027 7028 if parser: 7029 self._match(TokenType.R_PAREN, expression=result) 7030 else: 7031 self._match_r_paren(result) 7032 return self._parse_window(result) 7033 7034 def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr: 7035 return expression 7036 7037 def _kv_to_prop_eq( 7038 self, expressions: list[exp.Expr], parse_map: bool = False 7039 ) -> list[exp.Expr]: 7040 transformed = [] 7041 7042 for index, e in enumerate(expressions): 7043 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 7044 if isinstance(e, exp.Alias): 7045 e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this)) 7046 7047 if not isinstance(e, exp.PropertyEQ): 7048 e = self.expression( 7049 exp.PropertyEQ( 7050 this=e.this if parse_map else exp.to_identifier(e.this.name), 7051 expression=e.expression, 7052 ) 7053 ) 7054 7055 if isinstance(e.this, exp.Column): 7056 e.this.replace(e.this.this) 7057 else: 7058 e = self._to_prop_eq(e, index) 7059 7060 transformed.append(e) 7061 7062 return transformed 7063 7064 def _parse_function_properties(self) -> exp.Properties | None: 7065 # Skip the generic `key = value` fallback in _parse_property since this 7066 # runs post-AS where a function body like `name = expr` can be misread 7067 # as a property. 7068 properties = [] 7069 while True: 7070 if self._match_texts(self.PROPERTY_PARSERS): 7071 prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self) 7072 elif self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 7073 prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 7074 else: 7075 break 7076 for p in ensure_list(prop): 7077 properties.append(p) 7078 7079 return self.expression(exp.Properties(expressions=properties)) if properties else None 7080 7081 def _parse_user_defined_function_expression(self) -> exp.Expr | None: 7082 return self._parse_statement() 7083 7084 def _parse_function_parameter(self) -> exp.Expr | None: 7085 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 7086 7087 def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None: 7088 this = self._parse_table_parts(schema=True) 7089 7090 if not self._match(TokenType.L_PAREN): 7091 return this 7092 7093 expressions = self._parse_csv(self._parse_function_parameter) 7094 self._match_r_paren() 7095 return self.expression( 7096 exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True) 7097 ) 7098 7099 def _parse_macro_overloads( 7100 self, 7101 this: exp.UserDefinedFunction, 7102 first_body: exp.Expr, 7103 first_is_table: bool = False, 7104 ) -> exp.MacroOverloads: 7105 overloads = [ 7106 self.expression( 7107 exp.MacroOverload( 7108 this=first_body, 7109 expressions=this.expressions or None, 7110 is_table=first_is_table, 7111 ) 7112 ) 7113 ] 7114 this.set("expressions", None) 7115 this.set("wrapped", False) 7116 7117 while self._match(TokenType.COMMA): 7118 if not self._match(TokenType.L_PAREN): 7119 break 7120 7121 params = self._parse_csv(self._parse_function_parameter) 7122 self._match_r_paren() 7123 7124 if not self._match(TokenType.ALIAS): 7125 break 7126 7127 is_table = self._match(TokenType.TABLE) 7128 body = self._parse_expression() 7129 macro = exp.MacroOverload(this=body, expressions=params, is_table=is_table) 7130 overloads.append(self.expression(macro)) 7131 7132 return self.expression(exp.MacroOverloads(expressions=overloads)) 7133 7134 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 7135 literal = self._parse_primary() 7136 if literal: 7137 return self.expression(exp.Introducer(this=token.text, expression=literal), token) 7138 7139 return self._identifier_expression(token) 7140 7141 def _parse_session_parameter(self) -> exp.SessionParameter: 7142 kind = None 7143 this = self._parse_id_var() or self._parse_primary() 7144 7145 if this and self._match(TokenType.DOT): 7146 kind = this.name 7147 this = self._parse_var() or self._parse_primary() 7148 7149 return self.expression(exp.SessionParameter(this=this, kind=kind)) 7150 7151 def _parse_lambda_arg(self) -> exp.Expr | None: 7152 return self._parse_id_var() 7153 7154 def _parse_lambda(self, alias: bool = False) -> exp.Expr | None: 7155 next_token_type = self._next.token_type 7156 7157 # Fast path: simple atom (column, literal, null, bool) followed by , or ) 7158 if ( 7159 next_token_type in self.LAMBDA_ARG_TERMINATORS 7160 and (atom := self._parse_atom()) is not None 7161 ): 7162 return atom 7163 7164 index = self._index 7165 7166 if self._match(TokenType.L_PAREN): 7167 expressions = t.cast( 7168 list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg) 7169 ) 7170 7171 if not self._match(TokenType.R_PAREN): 7172 self._retreat(index) 7173 elif self._match_set(self.LAMBDAS): 7174 return self.LAMBDAS[self._prev.token_type](self, expressions) 7175 else: 7176 self._retreat(index) 7177 elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS: 7178 expressions = [self._parse_lambda_arg()] 7179 7180 if self._match_set(self.LAMBDAS): 7181 return self.LAMBDAS[self._prev.token_type](self, expressions) 7182 7183 self._retreat(index) 7184 7185 this: exp.Expr | None 7186 7187 if self._match(TokenType.DISTINCT): 7188 this = self.expression( 7189 exp.Distinct(expressions=self._parse_csv(self._parse_disjunction)) 7190 ) 7191 else: 7192 self._match(TokenType.ALL) # ALL is the default/no-op aggregate modifier (SQL-92) 7193 this = self._parse_select_or_expression(alias=alias) 7194 7195 return self._parse_limit( 7196 self._parse_respect_or_ignore_nulls( 7197 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 7198 ) 7199 ) 7200 7201 def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None: 7202 index = self._index 7203 if not self._match(TokenType.L_PAREN): 7204 return this 7205 7206 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 7207 # expr can be of both types 7208 if self._match_set(self.SELECT_START_TOKENS): 7209 self._retreat(index) 7210 return this 7211 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 7212 self._match_r_paren() 7213 return self.expression(exp.Schema(this=this, expressions=args)) 7214 7215 def _parse_field_def(self) -> exp.Expr | None: 7216 return self._parse_column_def(self._parse_field(any_token=True)) 7217 7218 def _parse_column_def( 7219 self, this: exp.Expr | None, computed_column: bool = True 7220 ) -> exp.Expr | None: 7221 # column defs are not really columns, they're identifiers 7222 if isinstance(this, exp.Column): 7223 this = this.this 7224 7225 if not computed_column: 7226 self._match(TokenType.ALIAS) 7227 7228 kind = self._parse_types(schema=True) 7229 7230 if self._match_text_seq("FOR", "ORDINALITY"): 7231 return self.expression(exp.ColumnDef(this=this, ordinality=True)) 7232 7233 constraints: list[exp.Expr] = [] 7234 7235 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 7236 ("ALIAS", "MATERIALIZED") 7237 ): 7238 persisted = self._prev.text.upper() == "MATERIALIZED" 7239 constraint_kind = exp.ComputedColumnConstraint( 7240 this=self._parse_disjunction(), 7241 persisted=persisted or self._match_text_seq("PERSISTED"), 7242 data_type=exp.Var(this="AUTO") 7243 if self._match_text_seq("AUTO") 7244 else self._parse_types(), 7245 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 7246 ) 7247 constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind))) 7248 elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False): 7249 in_out_constraint = self.expression( 7250 exp.InOutColumnConstraint( 7251 input_=self._match(TokenType.IN), output=self._match(TokenType.OUT) 7252 ) 7253 ) 7254 constraints.append(in_out_constraint) 7255 kind = self._parse_types() 7256 elif ( 7257 kind 7258 and self._match(TokenType.ALIAS, advance=False) 7259 and ( 7260 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 7261 or self._next.token_type == TokenType.L_PAREN 7262 ) 7263 ): 7264 self._advance() 7265 constraints.append( 7266 self.expression( 7267 exp.ColumnConstraint( 7268 kind=exp.ComputedColumnConstraint( 7269 this=self._parse_disjunction(), 7270 persisted=self._match_texts(("STORED", "VIRTUAL")) 7271 and self._prev.text.upper() == "STORED", 7272 ) 7273 ) 7274 ) 7275 ) 7276 7277 while True: 7278 constraint = self._parse_column_constraint() 7279 if not constraint: 7280 break 7281 constraints.append(constraint) 7282 7283 if not kind and not constraints: 7284 return this 7285 7286 position = None 7287 if self._match_texts(("FIRST", "AFTER")): 7288 pos = self._prev.text 7289 position = self.expression(exp.ColumnPosition(this=self._parse_column(), position=pos)) 7290 7291 return self.expression( 7292 exp.ColumnDef(this=this, kind=kind, constraints=constraints, position=position) 7293 ) 7294 7295 def _parse_auto_increment( 7296 self, 7297 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 7298 start = None 7299 increment = None 7300 order = None 7301 7302 if self._match(TokenType.L_PAREN, advance=False): 7303 args = self._parse_wrapped_csv(self._parse_bitwise) 7304 start = seq_get(args, 0) 7305 increment = seq_get(args, 1) 7306 elif self._match_text_seq("START"): 7307 start = self._parse_bitwise() 7308 self._match_text_seq("INCREMENT") 7309 increment = self._parse_bitwise() 7310 if self._match_text_seq("ORDER"): 7311 order = True 7312 elif self._match_text_seq("NOORDER"): 7313 order = False 7314 7315 if start and increment: 7316 return exp.GeneratedAsIdentityColumnConstraint( 7317 start=start, increment=increment, this=False, order=order 7318 ) 7319 7320 return exp.AutoIncrementColumnConstraint() 7321 7322 def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None: 7323 if not self._match(TokenType.L_PAREN, advance=False): 7324 return None 7325 7326 return self.expression( 7327 exp.CheckColumnConstraint( 7328 this=self._parse_wrapped(self._parse_assignment), 7329 enforced=self._match_text_seq("ENFORCED"), 7330 ) 7331 ) 7332 7333 def _parse_auto_property(self) -> exp.AutoRefreshProperty | None: 7334 if not self._match_text_seq("REFRESH"): 7335 self._retreat(self._index - 1) 7336 return None 7337 return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True))) 7338 7339 def _parse_compress(self) -> exp.CompressColumnConstraint: 7340 if self._match(TokenType.L_PAREN, advance=False): 7341 return self.expression( 7342 exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise)) 7343 ) 7344 7345 return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise())) 7346 7347 def _parse_generated_as_identity( 7348 self, 7349 ) -> ( 7350 exp.GeneratedAsIdentityColumnConstraint 7351 | exp.ComputedColumnConstraint 7352 | exp.GeneratedAsRowColumnConstraint 7353 ): 7354 if self._match_text_seq("BY", "DEFAULT"): 7355 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 7356 this = self.expression( 7357 exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null) 7358 ) 7359 else: 7360 self._match_text_seq("ALWAYS") 7361 this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True)) 7362 7363 self._match(TokenType.ALIAS) 7364 7365 if self._match_text_seq("ROW"): 7366 start = self._match_text_seq("START") 7367 if not start: 7368 self._match(TokenType.END) 7369 hidden = self._match_text_seq("HIDDEN") 7370 return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden)) 7371 7372 identity = self._match_text_seq("IDENTITY") 7373 7374 if self._match(TokenType.L_PAREN): 7375 if self._match(TokenType.START_WITH): 7376 this.set("start", self._parse_bitwise()) 7377 if self._match_text_seq("INCREMENT", "BY"): 7378 this.set("increment", self._parse_bitwise()) 7379 if self._match_text_seq("MINVALUE"): 7380 this.set("minvalue", self._parse_bitwise()) 7381 if self._match_text_seq("MAXVALUE"): 7382 this.set("maxvalue", self._parse_bitwise()) 7383 7384 if self._match_text_seq("CYCLE"): 7385 this.set("cycle", True) 7386 elif self._match_text_seq("NO", "CYCLE"): 7387 this.set("cycle", False) 7388 7389 if not identity: 7390 this.set("expression", self._parse_range()) 7391 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 7392 args = self._parse_csv(self._parse_bitwise) 7393 this.set("start", seq_get(args, 0)) 7394 this.set("increment", seq_get(args, 1)) 7395 7396 self._match_r_paren() 7397 7398 return this 7399 7400 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 7401 self._match_text_seq("LENGTH") 7402 return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise())) 7403 7404 def _parse_not_constraint(self) -> exp.Expr | None: 7405 if self._match_text_seq("NULL"): 7406 return self.expression(exp.NotNullColumnConstraint()) 7407 if self._match_text_seq("CASESPECIFIC"): 7408 return self.expression(exp.CaseSpecificColumnConstraint(not_=True)) 7409 if self._match_text_seq("FOR", "REPLICATION"): 7410 return self.expression(exp.NotForReplicationColumnConstraint()) 7411 7412 # Unconsume the `NOT` token 7413 self._retreat(self._index - 1) 7414 return None 7415 7416 def _parse_column_constraint(self) -> exp.Expr | None: 7417 this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None 7418 7419 procedure_option_follows = ( 7420 self._match(TokenType.WITH, advance=False) 7421 and self._next 7422 and self._next.text.upper() in self.PROCEDURE_OPTIONS 7423 ) 7424 7425 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 7426 constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self) 7427 if not constraint: 7428 self._retreat(self._index - 1) 7429 return None 7430 7431 return self.expression(exp.ColumnConstraint(this=this, kind=constraint)) 7432 7433 return this 7434 7435 def _parse_constraint(self) -> exp.Expr | None: 7436 if not self._match(TokenType.CONSTRAINT): 7437 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 7438 7439 return self.expression( 7440 exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints()) 7441 ) 7442 7443 def _parse_unnamed_constraints(self) -> list[exp.Expr]: 7444 constraints = [] 7445 while True: 7446 constraint = self._parse_unnamed_constraint() or self._parse_function() 7447 if not constraint: 7448 break 7449 constraints.append(constraint) 7450 7451 return constraints 7452 7453 def _parse_unnamed_constraint( 7454 self, constraints: t.Collection[str] | None = None 7455 ) -> exp.Expr | None: 7456 index = self._index 7457 7458 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 7459 constraints or self.CONSTRAINT_PARSERS 7460 ): 7461 return None 7462 7463 constraint_key = self._prev.text.upper() 7464 if constraint_key not in self.CONSTRAINT_PARSERS: 7465 self.raise_error(f"No parser found for schema constraint {constraint_key}.") 7466 7467 result = self.CONSTRAINT_PARSERS[constraint_key](self) 7468 if not result: 7469 self._retreat(index) 7470 7471 return result 7472 7473 def _parse_unique_key(self) -> exp.Expr | None: 7474 if ( 7475 self._curr 7476 and self._curr.token_type != TokenType.IDENTIFIER 7477 and self._curr.text.upper() in self.CONSTRAINT_PARSERS 7478 ): 7479 return None 7480 return self._parse_id_var(any_token=False) 7481 7482 def _parse_unique(self) -> exp.UniqueColumnConstraint: 7483 self._match_texts(("KEY", "INDEX")) 7484 return self.expression( 7485 exp.UniqueColumnConstraint( 7486 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 7487 this=self._parse_schema(self._parse_unique_key()), 7488 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 7489 on_conflict=self._parse_on_conflict(), 7490 options=self._parse_key_constraint_options(), 7491 ) 7492 ) 7493 7494 def _parse_key_constraint_options(self) -> list[str]: 7495 options = [] 7496 while True: 7497 if not self._curr: 7498 break 7499 7500 if self._match(TokenType.ON): 7501 action = None 7502 on = self._advance_any() and self._prev.text 7503 7504 if self._match_text_seq("NO", "ACTION"): 7505 action = "NO ACTION" 7506 elif self._match_text_seq("CASCADE"): 7507 action = "CASCADE" 7508 elif self._match_text_seq("RESTRICT"): 7509 action = "RESTRICT" 7510 elif self._match_pair(TokenType.SET, TokenType.NULL): 7511 action = "SET NULL" 7512 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 7513 action = "SET DEFAULT" 7514 else: 7515 self.raise_error("Invalid key constraint") 7516 7517 options.append(f"ON {on} {action}") 7518 else: 7519 var = self._parse_var_from_options( 7520 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 7521 ) 7522 if not var: 7523 break 7524 options.append(var.name) 7525 7526 return options 7527 7528 def _parse_references(self, match: bool = True) -> exp.Reference | None: 7529 if match and not self._match(TokenType.REFERENCES): 7530 return None 7531 7532 expressions: list | None = None 7533 this = self._parse_table(schema=True) 7534 options = self._parse_key_constraint_options() 7535 return self.expression(exp.Reference(this=this, expressions=expressions, options=options)) 7536 7537 def _parse_foreign_key(self) -> exp.ForeignKey: 7538 expressions = ( 7539 self._parse_wrapped_id_vars() 7540 if not self._match(TokenType.REFERENCES, advance=False) 7541 else None 7542 ) 7543 reference = self._parse_references() 7544 on_options = {} 7545 7546 while self._match(TokenType.ON): 7547 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 7548 self.raise_error("Expected DELETE or UPDATE") 7549 7550 kind = self._prev.text.lower() 7551 7552 if self._match_text_seq("NO", "ACTION"): 7553 action = "NO ACTION" 7554 elif self._match(TokenType.SET): 7555 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 7556 action = "SET " + self._prev.text.upper() 7557 else: 7558 self._advance() 7559 action = self._prev.text.upper() 7560 7561 on_options[kind] = action 7562 7563 return self.expression( 7564 exp.ForeignKey( 7565 expressions=expressions, 7566 reference=reference, 7567 options=self._parse_key_constraint_options(), 7568 **on_options, 7569 ) 7570 ) 7571 7572 def _parse_primary_key_part(self) -> exp.Expr | None: 7573 return self._parse_field() 7574 7575 def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None: 7576 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 7577 self._retreat(self._index - 1) 7578 return None 7579 7580 id_vars = self._parse_wrapped_id_vars() 7581 return self.expression( 7582 exp.PeriodForSystemTimeConstraint( 7583 this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1) 7584 ) 7585 ) 7586 7587 def _parse_primary_key( 7588 self, 7589 wrapped_optional: bool = False, 7590 in_props: bool = False, 7591 named_primary_key: bool = False, 7592 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 7593 desc = ( 7594 self._prev.token_type == TokenType.DESC 7595 if self._match_set((TokenType.ASC, TokenType.DESC)) 7596 else None 7597 ) 7598 7599 this = None 7600 if ( 7601 named_primary_key 7602 and self._curr.text.upper() not in self.CONSTRAINT_PARSERS 7603 and self._next 7604 and self._next.token_type == TokenType.L_PAREN 7605 ): 7606 this = self._parse_id_var() 7607 7608 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 7609 return self.expression( 7610 exp.PrimaryKeyColumnConstraint( 7611 desc=desc, options=self._parse_key_constraint_options() 7612 ) 7613 ) 7614 7615 expressions = self._parse_wrapped_csv( 7616 self._parse_primary_key_part, optional=wrapped_optional 7617 ) 7618 7619 return self.expression( 7620 exp.PrimaryKey( 7621 this=this, 7622 expressions=expressions, 7623 include=self._parse_index_params(), 7624 options=self._parse_key_constraint_options(), 7625 ) 7626 ) 7627 7628 def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None: 7629 return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True)) 7630 7631 def _parse_odbc_datetime_literal(self) -> exp.Expr: 7632 """ 7633 Parses a datetime column in ODBC format. We parse the column into the corresponding 7634 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 7635 same as we did for `DATE('yyyy-mm-dd')`. 7636 7637 Reference: 7638 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 7639 """ 7640 self._match(TokenType.VAR) 7641 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 7642 expression = self.expression(exp_class(this=self._parse_string())) 7643 if not self._match(TokenType.R_BRACE): 7644 self.raise_error("Expected }") 7645 return expression 7646 7647 def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None: 7648 if not self._match_set(self.BRACKETS): 7649 return this 7650 7651 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 7652 map_token = seq_get(self._tokens, self._index - 2) 7653 parse_map = map_token is not None and map_token.text.upper() == "MAP" 7654 else: 7655 parse_map = False 7656 7657 bracket_kind = self._prev.token_type 7658 if ( 7659 bracket_kind == TokenType.L_BRACE 7660 and self._curr 7661 and self._curr.token_type == TokenType.VAR 7662 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 7663 ): 7664 return self._parse_odbc_datetime_literal() 7665 7666 expressions = self._parse_csv( 7667 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 7668 ) 7669 7670 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 7671 self.raise_error("Expected ]") 7672 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 7673 self.raise_error("Expected }") 7674 7675 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 7676 if bracket_kind == TokenType.L_BRACE: 7677 this = self.expression( 7678 exp.Struct( 7679 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map) 7680 ) 7681 ) 7682 elif not this: 7683 this = build_array_constructor( 7684 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 7685 ) 7686 else: 7687 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 7688 if constructor_type: 7689 return build_array_constructor( 7690 constructor_type, 7691 args=expressions, 7692 bracket_kind=bracket_kind, 7693 dialect=self.dialect, 7694 ) 7695 7696 expressions = apply_index_offset( 7697 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 7698 ) 7699 this = self.expression( 7700 exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments() 7701 ) 7702 7703 self._add_comments(this) 7704 return self._parse_bracket(this) 7705 7706 def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None: 7707 if not self._match(TokenType.COLON): 7708 return this 7709 7710 if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False): 7711 self._advance() 7712 end: exp.Expr | None = -exp.Literal.number("1") 7713 else: 7714 end = self._parse_assignment() 7715 step = self._parse_unary() if self._match(TokenType.COLON) else None 7716 return self.expression(exp.Slice(this=this, expression=end, step=step)) 7717 7718 def _parse_case(self) -> exp.Expr | None: 7719 if self._match(TokenType.DOT, advance=False): 7720 # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake 7721 self._retreat(self._index - 1) 7722 return None 7723 7724 ifs = [] 7725 default = None 7726 7727 comments = self._prev_comments 7728 expression = self._parse_disjunction() 7729 7730 while self._match(TokenType.WHEN): 7731 this = self._parse_disjunction() 7732 self._match(TokenType.THEN) 7733 then = self._parse_disjunction() 7734 ifs.append(self.expression(exp.If(this=this, true=then))) 7735 7736 if self._match(TokenType.ELSE): 7737 default = self._parse_disjunction() 7738 7739 if not self._match(TokenType.END): 7740 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 7741 default = exp.column("interval") 7742 else: 7743 self.raise_error("Expected END after CASE", self._prev) 7744 7745 return self.expression( 7746 exp.Case(this=expression, ifs=ifs, default=default), comments=comments 7747 ) 7748 7749 def _parse_if(self) -> exp.Expr | None: 7750 if self._match(TokenType.L_PAREN): 7751 args = self._parse_csv( 7752 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 7753 ) 7754 this = self.validate_expression(exp.If.from_arg_list(args), args) 7755 self._match_r_paren() 7756 else: 7757 index = self._index - 1 7758 7759 if self.NO_PAREN_IF_COMMANDS and index == 0: 7760 return self._parse_as_command(self._prev) 7761 7762 condition = self._parse_disjunction() 7763 7764 if not condition: 7765 self._retreat(index) 7766 return None 7767 7768 self._match(TokenType.THEN) 7769 true = self._parse_disjunction() 7770 false = self._parse_disjunction() if self._match(TokenType.ELSE) else None 7771 self._match(TokenType.END) 7772 this = self.expression(exp.If(this=condition, true=true, false=false)) 7773 7774 return this 7775 7776 def _parse_next_value_for(self) -> exp.Expr | None: 7777 if not self._match_text_seq("VALUE", "FOR"): 7778 self._retreat(self._index - 1) 7779 return None 7780 7781 return self.expression( 7782 exp.NextValueFor( 7783 this=self._parse_column(), 7784 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 7785 ) 7786 ) 7787 7788 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 7789 this = self._parse_function() or self._parse_var_or_string(upper=True) 7790 7791 if self._match(TokenType.FROM): 7792 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7793 7794 if not self._match(TokenType.COMMA): 7795 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 7796 7797 return self.expression(exp.Extract(this=this, expression=self._parse_bitwise())) 7798 7799 def _parse_gap_fill(self) -> exp.GapFill: 7800 self._match(TokenType.TABLE) 7801 this = self._parse_table() 7802 7803 self._match(TokenType.COMMA) 7804 args = [this, *self._parse_csv(self._parse_lambda)] 7805 7806 gap_fill = exp.GapFill.from_arg_list(args) 7807 return self.validate_expression(gap_fill, args) 7808 7809 def _parse_char(self) -> exp.Chr: 7810 return self.expression( 7811 exp.Chr( 7812 expressions=self._parse_csv(self._parse_assignment), 7813 charset=self._match(TokenType.USING) and self._parse_charset_name(), 7814 ) 7815 ) 7816 7817 def _parse_charset_name(self) -> exp.Expr | None: 7818 """ 7819 Parse a charset name after USING or CHARACTER SET. Dialects that need to preserve quoting 7820 for specific name shapes override this. 7821 """ 7822 return self._parse_var( 7823 tokens={TokenType.BINARY, TokenType.IDENTIFIER}, 7824 ) 7825 7826 def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr: 7827 this = self._parse_assignment() 7828 7829 if not self._match(TokenType.ALIAS): 7830 if self._match(TokenType.COMMA): 7831 return self.expression(exp.CastToStrType(this=this, to=self._parse_string())) 7832 7833 self.raise_error("Expected AS after CAST") 7834 7835 fmt = None 7836 to = self._parse_types(with_collation=True) 7837 7838 default = None 7839 if self._match(TokenType.DEFAULT): 7840 default = self._parse_bitwise() 7841 self._match_text_seq("ON", "CONVERSION", "ERROR") 7842 7843 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 7844 fmt_string = self._parse_wrapped(self._parse_string, optional=True) 7845 fmt = self._parse_at_time_zone(fmt_string) 7846 7847 if not to: 7848 to = exp.DType.UNKNOWN.into_expr() 7849 if to.this in exp.DataType.TEMPORAL_TYPES: 7850 this = self.expression( 7851 (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)( 7852 this=this, 7853 format=exp.Literal.string( 7854 format_time( 7855 fmt_string.this if fmt_string else "", 7856 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 7857 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 7858 ) 7859 ), 7860 safe=safe, 7861 ) 7862 ) 7863 7864 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 7865 this.set("zone", fmt.args["zone"]) 7866 return this 7867 elif not to: 7868 self.raise_error("Expected TYPE after CAST") 7869 elif isinstance(to, exp.Identifier): 7870 to = exp.DataType.from_str(to.name, dialect=self.dialect, udt=True) 7871 elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET): 7872 to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string()) 7873 7874 return self.build_cast( 7875 strict=strict, 7876 this=this, 7877 to=to, 7878 format=fmt, 7879 safe=safe, 7880 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 7881 default=default, 7882 ) 7883 7884 def _parse_string_agg(self) -> exp.GroupConcat: 7885 if self._match(TokenType.DISTINCT): 7886 args: list[exp.Expr | None] = [ 7887 self.expression(exp.Distinct(expressions=[self._parse_disjunction()])) 7888 ] 7889 if self._match(TokenType.COMMA): 7890 args.extend(self._parse_csv(self._parse_disjunction)) 7891 else: 7892 args = self._parse_csv(self._parse_disjunction) # type: ignore 7893 7894 if self._match_text_seq("ON", "OVERFLOW"): 7895 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 7896 if self._match_text_seq("ERROR"): 7897 on_overflow: exp.Expr | None = exp.var("ERROR") 7898 else: 7899 self._match_text_seq("TRUNCATE") 7900 on_overflow = self.expression( 7901 exp.OverflowTruncateBehavior( 7902 this=self._parse_string(), 7903 with_count=( 7904 self._match_text_seq("WITH", "COUNT") 7905 or not self._match_text_seq("WITHOUT", "COUNT") 7906 ), 7907 ) 7908 ) 7909 else: 7910 on_overflow = None 7911 7912 index = self._index 7913 if not self._match(TokenType.R_PAREN) and args: 7914 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 7915 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 7916 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 7917 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 7918 return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1))) 7919 7920 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 7921 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 7922 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 7923 if not self._match_text_seq("WITHIN", "GROUP"): 7924 self._retreat(index) 7925 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 7926 7927 # The corresponding match_r_paren will be called in parse_function (caller) 7928 self._match_l_paren() 7929 7930 return self.expression( 7931 exp.GroupConcat( 7932 this=self._parse_order(this=seq_get(args, 0)), 7933 separator=seq_get(args, 1), 7934 on_overflow=on_overflow, 7935 ) 7936 ) 7937 7938 def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None: 7939 this = self._parse_bitwise() 7940 7941 if self._match(TokenType.USING): 7942 to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_charset_name()) 7943 elif self._match(TokenType.COMMA): 7944 to = self._parse_types() 7945 else: 7946 to = None 7947 7948 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 7949 7950 def _parse_xml_element(self) -> exp.XMLElement: 7951 if self._match_text_seq("EVALNAME"): 7952 evalname = True 7953 this = self._parse_bitwise() 7954 else: 7955 evalname = None 7956 self._match_text_seq("NAME") 7957 this = self._parse_id_var() 7958 7959 return self.expression( 7960 exp.XMLElement( 7961 this=this, 7962 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise), 7963 evalname=evalname, 7964 ) 7965 ) 7966 7967 def _parse_xml_table(self) -> exp.XMLTable: 7968 namespaces = None 7969 passing = None 7970 columns = None 7971 7972 if self._match_text_seq("XMLNAMESPACES", "("): 7973 namespaces = self._parse_xml_namespace() 7974 self._match_text_seq(")", ",") 7975 7976 this = self._parse_string() 7977 7978 if self._match_text_seq("PASSING"): 7979 # The BY VALUE keywords are optional and are provided for semantic clarity 7980 self._match_text_seq("BY", "VALUE") 7981 passing = self._parse_csv(self._parse_column) 7982 7983 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 7984 7985 if self._match_text_seq("COLUMNS"): 7986 columns = self._parse_csv(self._parse_field_def) 7987 7988 return self.expression( 7989 exp.XMLTable( 7990 this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref 7991 ) 7992 ) 7993 7994 def _parse_xml_namespace(self) -> list[exp.XMLNamespace]: 7995 namespaces = [] 7996 7997 while True: 7998 if self._match(TokenType.DEFAULT): 7999 uri = self._parse_string() 8000 else: 8001 uri = self._parse_alias(self._parse_string()) 8002 namespaces.append(self.expression(exp.XMLNamespace(this=uri))) 8003 if not self._match(TokenType.COMMA): 8004 break 8005 8006 return namespaces 8007 8008 def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None: 8009 args = self._parse_csv(self._parse_disjunction) 8010 8011 if len(args) < 3: 8012 return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1))) 8013 8014 return self.expression(exp.DecodeCase(expressions=args)) 8015 8016 def _parse_json_key_value(self) -> exp.JSONKeyValue | None: 8017 self._match_text_seq("KEY") 8018 key = self._parse_column() 8019 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 8020 self._match_text_seq("VALUE") 8021 value = self._parse_bitwise() 8022 8023 if not key and not value: 8024 return None 8025 return self.expression(exp.JSONKeyValue(this=key, expression=value)) 8026 8027 def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None: 8028 if not this or not self._match_text_seq("FORMAT", "JSON"): 8029 return this 8030 8031 return self.expression(exp.FormatJson(this=this)) 8032 8033 def _parse_on_condition(self) -> exp.OnCondition | None: 8034 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 8035 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 8036 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 8037 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 8038 else: 8039 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 8040 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 8041 8042 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 8043 8044 if not empty and not error and not null: 8045 return None 8046 8047 return self.expression(exp.OnCondition(empty=empty, error=error, null=null)) 8048 8049 def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None: 8050 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 8051 for value in values: 8052 if self._match_text_seq(value, "ON", on): 8053 return f"{value} ON {on}" 8054 8055 index = self._index 8056 if self._match(TokenType.DEFAULT): 8057 default_value = self._parse_bitwise() 8058 if self._match_text_seq("ON", on): 8059 return default_value 8060 8061 self._retreat(index) 8062 8063 return None 8064 8065 @t.overload 8066 def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ... 8067 8068 @t.overload 8069 def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ... 8070 8071 def _parse_json_object(self, agg=False): 8072 star = self._parse_star() 8073 expressions = ( 8074 [star] 8075 if star 8076 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 8077 ) 8078 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 8079 8080 unique_keys = None 8081 if self._match_text_seq("WITH", "UNIQUE"): 8082 unique_keys = True 8083 elif self._match_text_seq("WITHOUT", "UNIQUE"): 8084 unique_keys = False 8085 8086 self._match_text_seq("KEYS") 8087 8088 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 8089 self._parse_type() 8090 ) 8091 encoding = self._match_text_seq("ENCODING") and self._parse_var() 8092 8093 return self.expression( 8094 (exp.JSONObjectAgg if agg else exp.JSONObject)( 8095 expressions=expressions, 8096 null_handling=null_handling, 8097 unique_keys=unique_keys, 8098 return_type=return_type, 8099 encoding=encoding, 8100 ) 8101 ) 8102 8103 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 8104 def _parse_json_column_def(self) -> exp.JSONColumnDef: 8105 if not self._match_text_seq("NESTED"): 8106 this = self._parse_id_var() 8107 ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY) 8108 kind = self._parse_types(allow_identifiers=False) 8109 nested = None 8110 else: 8111 this = None 8112 ordinality = None 8113 kind = None 8114 nested = True 8115 8116 format_json = self._match_text_seq("FORMAT", "JSON") 8117 path = self._match_text_seq("PATH") and self._parse_string() 8118 nested_schema = nested and self._parse_json_schema() 8119 8120 return self.expression( 8121 exp.JSONColumnDef( 8122 this=this, 8123 kind=kind, 8124 path=path, 8125 nested_schema=nested_schema, 8126 ordinality=ordinality, 8127 format_json=format_json, 8128 ) 8129 ) 8130 8131 def _parse_json_schema(self) -> exp.JSONSchema: 8132 self._match_text_seq("COLUMNS") 8133 return self.expression( 8134 exp.JSONSchema( 8135 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True) 8136 ) 8137 ) 8138 8139 def _parse_json_table(self) -> exp.JSONTable: 8140 this = self._parse_format_json(self._parse_bitwise()) 8141 path = self._match(TokenType.COMMA) and self._parse_string() 8142 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 8143 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 8144 schema = self._parse_json_schema() 8145 8146 return exp.JSONTable( 8147 this=this, 8148 schema=schema, 8149 path=path, 8150 error_handling=error_handling, 8151 empty_handling=empty_handling, 8152 ) 8153 8154 def _parse_match_against(self) -> exp.MatchAgainst: 8155 if self._match_text_seq("TABLE"): 8156 # parse SingleStore MATCH(TABLE ...) syntax 8157 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 8158 expressions = [] 8159 table = self._parse_table() 8160 if table: 8161 expressions = [table] 8162 else: 8163 expressions = self._parse_csv(self._parse_column) 8164 8165 self._match_text_seq(")", "AGAINST", "(") 8166 8167 this = self._parse_string() 8168 8169 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 8170 modifier = "IN NATURAL LANGUAGE MODE" 8171 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8172 modifier = f"{modifier} WITH QUERY EXPANSION" 8173 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 8174 modifier = "IN BOOLEAN MODE" 8175 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 8176 modifier = "WITH QUERY EXPANSION" 8177 else: 8178 modifier = None 8179 8180 return self.expression( 8181 exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier) 8182 ) 8183 8184 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 8185 def _parse_open_json(self) -> exp.OpenJSON: 8186 this = self._parse_bitwise() 8187 path = self._match(TokenType.COMMA) and self._parse_string() 8188 8189 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 8190 this = self._parse_field(any_token=True) 8191 kind = self._parse_types() 8192 path = self._parse_string() 8193 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 8194 8195 return self.expression( 8196 exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json) 8197 ) 8198 8199 expressions = None 8200 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 8201 self._match_l_paren() 8202 expressions = self._parse_csv(_parse_open_json_column_def) 8203 8204 return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions)) 8205 8206 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 8207 args = self._parse_csv(self._parse_bitwise) 8208 8209 if self._match(TokenType.IN): 8210 return self.expression( 8211 exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0)) 8212 ) 8213 8214 if haystack_first: 8215 haystack = seq_get(args, 0) 8216 needle = seq_get(args, 1) 8217 else: 8218 haystack = seq_get(args, 1) 8219 needle = seq_get(args, 0) 8220 8221 return self.expression( 8222 exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2)) 8223 ) 8224 8225 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 8226 args = self._parse_csv(self._parse_table) 8227 return exp.JoinHint(this=func_name.upper(), expressions=args) 8228 8229 def _parse_substring(self) -> exp.Substring: 8230 # Postgres supports the form: substring(string [from int] [for int]) 8231 # (despite being undocumented, the reverse order also works) 8232 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 8233 8234 args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise)) 8235 8236 start, length = None, None 8237 8238 while self._curr: 8239 if self._match(TokenType.FROM): 8240 start = self._parse_bitwise() 8241 elif self._match(TokenType.FOR): 8242 if not start: 8243 start = exp.Literal.number(1) 8244 length = self._parse_bitwise() 8245 else: 8246 break 8247 8248 if start: 8249 args.append(start) 8250 if length: 8251 args.append(length) 8252 8253 return self.validate_expression(exp.Substring.from_arg_list(args), args) 8254 8255 def _parse_trim(self) -> exp.Trim: 8256 # https://www.w3resource.com/sql/character-functions/trim.php 8257 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 8258 8259 position = None 8260 collation = None 8261 expression = None 8262 8263 if self._match_texts(self.TRIM_TYPES): 8264 position = self._prev.text.upper() 8265 8266 this = self._parse_bitwise() 8267 if self._match_set((TokenType.FROM, TokenType.COMMA)): 8268 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 8269 expression = self._parse_bitwise() 8270 8271 if invert_order: 8272 this, expression = expression, this 8273 8274 if self._match(TokenType.COLLATE): 8275 collation = self._parse_bitwise() 8276 8277 return self.expression( 8278 exp.Trim(this=this, position=position, expression=expression, collation=collation) 8279 ) 8280 8281 def _parse_window_clause(self) -> list[exp.Expr] | None: 8282 return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None 8283 8284 def _parse_named_window(self) -> exp.Expr | None: 8285 return self._parse_window(self._parse_id_var(), alias=True) 8286 8287 def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None: 8288 if self._curr.token_type == TokenType.VAR: 8289 if self._match_text_seq("IGNORE", "NULLS"): 8290 return self.expression(exp.IgnoreNulls(this=this)) 8291 if self._match_text_seq("RESPECT", "NULLS"): 8292 return self.expression(exp.RespectNulls(this=this)) 8293 return this 8294 8295 def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None: 8296 if self._match(TokenType.HAVING): 8297 self._match_texts(("MAX", "MIN")) 8298 max = self._prev.text.upper() != "MIN" 8299 return self.expression( 8300 exp.HavingMax(this=this, expression=self._parse_column(), max=max) 8301 ) 8302 8303 return this 8304 8305 def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None: 8306 func = this 8307 comments = func.comments if isinstance(func, exp.Expr) else None 8308 8309 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 8310 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 8311 if self._match_text_seq("WITHIN", "GROUP"): 8312 order = self._parse_wrapped(self._parse_order) 8313 this = self.expression(exp.WithinGroup(this=this, expression=order)) 8314 8315 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 8316 self._match(TokenType.WHERE) 8317 this = self.expression( 8318 exp.Filter(this=this, expression=self._parse_where(skip_where_token=True)) 8319 ) 8320 self._match_r_paren() 8321 8322 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 8323 # Some dialects choose to implement and some do not. 8324 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 8325 8326 # There is some code above in _parse_lambda that handles 8327 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 8328 8329 # The below changes handle 8330 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 8331 8332 # Oracle allows both formats 8333 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 8334 # and Snowflake chose to do the same for familiarity 8335 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 8336 if isinstance(this, exp.AggFunc): 8337 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 8338 8339 if ignore_respect and ignore_respect is not this: 8340 ignore_respect.replace(ignore_respect.this) 8341 this = self.expression(ignore_respect.__class__(this=this)) 8342 8343 this = self._parse_respect_or_ignore_nulls(this) 8344 8345 # bigquery select from window x AS (partition by ...) 8346 if alias: 8347 over = None 8348 self._match(TokenType.ALIAS) 8349 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 8350 return this 8351 else: 8352 over = self._prev.text.upper() 8353 8354 if comments and isinstance(func, exp.Expr): 8355 func.pop_comments() 8356 8357 if not self._match(TokenType.L_PAREN): 8358 return self.expression( 8359 exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments 8360 ) 8361 8362 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 8363 8364 first: bool | None = True if self._match(TokenType.FIRST) else None 8365 if self._match_text_seq("LAST"): 8366 first = False 8367 8368 partition, order = self._parse_partition_and_order() 8369 kind = ( 8370 self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS") 8371 ) and self._prev.text 8372 8373 if kind: 8374 self._match(TokenType.BETWEEN) 8375 start = self._parse_window_spec() 8376 8377 end = self._parse_window_spec() if self._match(TokenType.AND) else {} 8378 exclude = ( 8379 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 8380 if self._match_text_seq("EXCLUDE") 8381 else None 8382 ) 8383 8384 spec = self.expression( 8385 exp.WindowSpec( 8386 kind=kind, 8387 start=start["value"], 8388 start_side=start["side"], 8389 end=end.get("value"), 8390 end_side=end.get("side"), 8391 exclude=exclude, 8392 ) 8393 ) 8394 else: 8395 spec = None 8396 8397 self._match_r_paren() 8398 8399 window = self.expression( 8400 exp.Window( 8401 this=this, 8402 partition_by=partition, 8403 order=order, 8404 spec=spec, 8405 alias=window_alias, 8406 over=over, 8407 first=first, 8408 ), 8409 comments=comments, 8410 ) 8411 8412 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 8413 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 8414 return self._parse_window(window, alias=alias) 8415 8416 return window 8417 8418 def _parse_partition_and_order( 8419 self, 8420 ) -> tuple[list[exp.Expr], exp.Expr | None]: 8421 return self._parse_partition_by(), self._parse_order() 8422 8423 def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]: 8424 self._match(TokenType.BETWEEN) 8425 8426 return { 8427 "value": ( 8428 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 8429 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 8430 or self._parse_bitwise() 8431 ), 8432 "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None, 8433 } 8434 8435 def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None: 8436 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 8437 # so this section tries to parse the clause version and if it fails, it treats the token 8438 # as an identifier (alias) 8439 if self._can_parse_limit_or_offset(): 8440 return this 8441 8442 # WINDOW is in ID_VAR_TOKENS, so it can be consumed as an implicit alias. Detect the 8443 # named-window clause shape (`WINDOW <ident> AS (...)`) and avoid swallowing it. 8444 if self._can_parse_named_window(): 8445 return this 8446 8447 any_token = self._match(TokenType.ALIAS) 8448 comments = self._prev_comments 8449 8450 if explicit and not any_token: 8451 return this 8452 8453 if self._match(TokenType.L_PAREN): 8454 aliases = self.expression( 8455 exp.Aliases( 8456 this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token)) 8457 ), 8458 comments=comments, 8459 ) 8460 self._match_r_paren(aliases) 8461 return aliases 8462 8463 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 8464 self.STRING_ALIASES and self._parse_string_as_identifier() 8465 ) 8466 8467 if alias: 8468 comments.extend(alias.pop_comments()) 8469 this = self.expression(exp.Alias(this=this, alias=alias), comments=comments) 8470 column = this.this 8471 8472 # Moves the comment next to the alias in `expr /* comment */ AS alias` 8473 if not this.comments and column and column.comments: 8474 this.comments = column.pop_comments() 8475 8476 return this 8477 8478 def _parse_id_var( 8479 self, 8480 any_token: bool = True, 8481 tokens: t.Collection[TokenType] | None = None, 8482 ) -> exp.Expr | None: 8483 expression = self._parse_identifier() 8484 if not expression and ( 8485 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 8486 ): 8487 quoted = self._prev.token_type == TokenType.STRING 8488 expression = self._identifier_expression(quoted=quoted) 8489 8490 return expression 8491 8492 def _parse_string(self) -> exp.Expr | None: 8493 if self._match_set(self.STRING_PARSERS): 8494 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 8495 return self._parse_placeholder() 8496 8497 def _parse_string_as_identifier(self) -> exp.Identifier | None: 8498 if not self._match(TokenType.STRING): 8499 return None 8500 output = exp.to_identifier(self._prev.text, quoted=True) 8501 output.update_positions(self._prev) 8502 return output 8503 8504 def _parse_number(self) -> exp.Expr | None: 8505 if self._match_set(self.NUMERIC_PARSERS): 8506 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 8507 return self._parse_placeholder() 8508 8509 def _parse_identifier(self) -> exp.Expr | None: 8510 if self._match(TokenType.IDENTIFIER): 8511 return self._identifier_expression(quoted=True) 8512 return self._parse_placeholder() 8513 8514 def _parse_var( 8515 self, 8516 any_token: bool = False, 8517 tokens: t.Collection[TokenType] | None = None, 8518 upper: bool = False, 8519 ) -> exp.Expr | None: 8520 if ( 8521 (any_token and self._advance_any()) 8522 or self._match(TokenType.VAR) 8523 or (self._match_set(tokens) if tokens else False) 8524 ): 8525 return self.expression( 8526 exp.Var(this=self._prev.text.upper() if upper else self._prev.text) 8527 ) 8528 return self._parse_placeholder() 8529 8530 def _advance_any(self, ignore_reserved: bool = False) -> Token | None: 8531 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 8532 self._advance() 8533 return self._prev 8534 return None 8535 8536 def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None: 8537 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 8538 8539 def _parse_primary_or_var(self) -> exp.Expr | None: 8540 return self._parse_primary() or self._parse_var(any_token=True) 8541 8542 def _parse_null(self) -> exp.Expr | None: 8543 if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): 8544 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 8545 return self._parse_placeholder() 8546 8547 def _parse_boolean(self) -> exp.Expr | None: 8548 if self._match(TokenType.TRUE): 8549 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 8550 if self._match(TokenType.FALSE): 8551 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 8552 return self._parse_placeholder() 8553 8554 def _parse_star(self) -> exp.Expr | None: 8555 if self._match(TokenType.STAR): 8556 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 8557 return self._parse_placeholder() 8558 8559 def _parse_parameter(self) -> exp.Parameter: 8560 this = self._parse_identifier() or self._parse_primary_or_var() 8561 return self.expression(exp.Parameter(this=this)) 8562 8563 def _parse_placeholder(self) -> exp.Expr | None: 8564 if self._match_set(self.PLACEHOLDER_PARSERS): 8565 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 8566 if placeholder: 8567 return placeholder 8568 self._advance(-1) 8569 return None 8570 8571 def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None: 8572 if not self._match_texts(keywords): 8573 return None 8574 if self._match(TokenType.L_PAREN, advance=False): 8575 return self._parse_wrapped_csv(self._parse_expression) 8576 8577 expression = self._parse_alias(self._parse_disjunction(), explicit=True) 8578 return [expression] if expression else None 8579 8580 def _parse_csv( 8581 self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA 8582 ) -> list[T]: 8583 parse_result = parse_method() 8584 items = [parse_result] if parse_result is not None else [] 8585 8586 while self._match(sep): 8587 if isinstance(parse_result, exp.Expr): 8588 self._add_comments(parse_result) 8589 parse_result = parse_method() 8590 if parse_result is not None: 8591 items.append(parse_result) 8592 8593 return items 8594 8595 def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]: 8596 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 8597 8598 def _parse_wrapped_csv( 8599 self, 8600 parse_method: t.Callable[[], T | None], 8601 sep: TokenType = TokenType.COMMA, 8602 optional: bool = False, 8603 ) -> list[T]: 8604 return self._parse_wrapped( 8605 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 8606 ) 8607 8608 def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T: 8609 wrapped = self._match(TokenType.L_PAREN) 8610 if not wrapped and not optional: 8611 self.raise_error("Expecting (") 8612 parse_result = parse_method() 8613 if wrapped: 8614 self._match_r_paren() 8615 return parse_result 8616 8617 def _parse_expressions(self) -> list[exp.Expr]: 8618 return self._parse_csv(self._parse_expression) 8619 8620 def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None: 8621 return ( 8622 self._parse_set_operations( 8623 self._parse_alias(self._parse_assignment(), explicit=True) 8624 if alias 8625 else self._parse_assignment() 8626 ) 8627 or self._parse_select() 8628 ) 8629 8630 def _parse_ddl_select(self) -> exp.Expr | None: 8631 return self._parse_query_modifiers( 8632 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 8633 ) 8634 8635 def _parse_transaction(self) -> exp.Transaction | exp.Command: 8636 this = None 8637 if self._match_texts(self.TRANSACTION_KIND): 8638 this = self._prev.text 8639 8640 self._match_texts(("TRANSACTION", "WORK")) 8641 8642 modes = [] 8643 while True: 8644 mode = [] 8645 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 8646 mode.append(self._prev.text) 8647 8648 if mode: 8649 modes.append(" ".join(mode)) 8650 if not self._match(TokenType.COMMA): 8651 break 8652 8653 return self.expression(exp.Transaction(this=this, modes=modes)) 8654 8655 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 8656 chain = None 8657 savepoint = None 8658 is_rollback = self._prev.token_type == TokenType.ROLLBACK 8659 8660 self._match_texts(("TRANSACTION", "WORK")) 8661 8662 if self._match_text_seq("TO"): 8663 self._match_text_seq("SAVEPOINT") 8664 savepoint = self._parse_id_var() 8665 8666 if self._match(TokenType.AND): 8667 chain = not self._match_text_seq("NO") 8668 self._match_text_seq("CHAIN") 8669 8670 if is_rollback: 8671 return self.expression(exp.Rollback(savepoint=savepoint)) 8672 8673 return self.expression(exp.Commit(chain=chain)) 8674 8675 def _parse_refresh(self) -> exp.Refresh | exp.Command: 8676 if self._match(TokenType.TABLE): 8677 kind = "TABLE" 8678 elif self._match_text_seq("MATERIALIZED", "VIEW"): 8679 kind = "MATERIALIZED VIEW" 8680 else: 8681 kind = "" 8682 8683 this = self._parse_string() or self._parse_table() 8684 if not kind and not isinstance(this, exp.Literal): 8685 return self._parse_as_command(self._prev) 8686 8687 return self.expression(exp.Refresh(this=this, kind=kind)) 8688 8689 def _parse_column_def_with_exists(self): 8690 start = self._index 8691 self._match(TokenType.COLUMN) 8692 8693 exists_column = self._parse_exists(not_=True) 8694 expression = self._parse_field_def() 8695 8696 if not isinstance(expression, exp.ColumnDef): 8697 self._retreat(start) 8698 return None 8699 8700 expression.set("exists", exists_column) 8701 8702 return expression 8703 8704 def _parse_add_column(self) -> exp.ColumnDef | None: 8705 if not self._prev.text.upper() == "ADD": 8706 return None 8707 8708 return self._parse_column_def_with_exists() 8709 8710 def _parse_drop_column(self) -> exp.Drop | exp.Command | None: 8711 drop = self._parse_drop() if self._match(TokenType.DROP) else None 8712 if drop and not isinstance(drop, exp.Command): 8713 drop.set("kind", drop.args.get("kind", "COLUMN")) 8714 return drop 8715 8716 def _parse_alter_drop_action(self) -> exp.Expr | None: 8717 return self._parse_drop_column() 8718 8719 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 8720 def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition: 8721 return self.expression( 8722 exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists) 8723 ) 8724 8725 def _parse_alter_table_add(self) -> list[exp.Expr]: 8726 def _parse_add_alteration() -> exp.Expr | None: 8727 self._match_text_seq("ADD") 8728 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 8729 return self.expression( 8730 exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint)) 8731 ) 8732 8733 column_def = self._parse_add_column() 8734 if isinstance(column_def, exp.ColumnDef): 8735 return column_def 8736 8737 exists = self._parse_exists(not_=True) 8738 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 8739 return self.expression( 8740 exp.AddPartition( 8741 exists=exists, 8742 this=self._parse_field(any_token=True), 8743 location=self._match_text_seq("LOCATION", advance=False) 8744 and self._parse_property(), 8745 ) 8746 ) 8747 8748 return None 8749 8750 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 8751 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 8752 or self._match_text_seq("COLUMNS") 8753 ): 8754 schema = self._parse_schema() 8755 8756 return ( 8757 ensure_list(schema) 8758 if schema 8759 else self._parse_csv(self._parse_column_def_with_exists) 8760 ) 8761 8762 return self._parse_csv(_parse_add_alteration) 8763 8764 def _parse_alter_table_alter(self) -> exp.Expr | None: 8765 if self._match_texts(self.ALTER_ALTER_PARSERS): 8766 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 8767 8768 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 8769 # keyword after ALTER we default to parsing this statement 8770 self._match(TokenType.COLUMN) 8771 column = self._parse_field(any_token=True) 8772 8773 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 8774 return self.expression(exp.AlterColumn(this=column, drop=True)) 8775 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 8776 return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction())) 8777 if self._match(TokenType.COMMENT): 8778 return self.expression(exp.AlterColumn(this=column, comment=self._parse_string())) 8779 if self._match_text_seq("DROP", "NOT", "NULL"): 8780 return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True)) 8781 if self._match_text_seq("SET", "NOT", "NULL"): 8782 return self.expression(exp.AlterColumn(this=column, allow_null=False)) 8783 8784 if self._match_text_seq("SET", "VISIBLE"): 8785 return self.expression(exp.AlterColumn(this=column, visible="VISIBLE")) 8786 if self._match_text_seq("SET", "INVISIBLE"): 8787 return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE")) 8788 8789 self._match_text_seq("SET", "DATA") 8790 self._match_text_seq("TYPE") 8791 return self.expression( 8792 exp.AlterColumn( 8793 this=column, 8794 dtype=self._parse_types(), 8795 collate=self._match(TokenType.COLLATE) and self._parse_term(), 8796 using=self._match(TokenType.USING) and self._parse_disjunction(), 8797 ) 8798 ) 8799 8800 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 8801 if self._match_texts(("ALL", "EVEN", "AUTO")): 8802 return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper()))) 8803 8804 self._match_text_seq("KEY", "DISTKEY") 8805 return self.expression(exp.AlterDistStyle(this=self._parse_column())) 8806 8807 def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey: 8808 if compound: 8809 self._match_text_seq("SORTKEY") 8810 8811 if self._match(TokenType.L_PAREN, advance=False): 8812 return self.expression( 8813 exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound) 8814 ) 8815 8816 self._match_texts(("AUTO", "NONE")) 8817 return self.expression( 8818 exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound) 8819 ) 8820 8821 def _parse_alter_table_drop(self) -> list[exp.Expr]: 8822 index = self._index - 1 8823 8824 partition_exists = self._parse_exists() 8825 if self._match(TokenType.PARTITION, advance=False): 8826 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 8827 8828 self._retreat(index) 8829 return self._parse_csv(self._parse_alter_drop_action) 8830 8831 def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None: 8832 if self._match(TokenType.COLUMN) or ( 8833 not self.ALTER_RENAME_REQUIRES_COLUMN and not self._match_text_seq("TO", advance=False) 8834 ): 8835 exists = self._parse_exists() 8836 old_column = self._parse_column() 8837 to = self._match_text_seq("TO") 8838 new_column = self._parse_column() 8839 8840 if old_column is None or not to or new_column is None: 8841 return None 8842 8843 return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists)) 8844 8845 self._match_text_seq("TO") 8846 return self.expression(exp.AlterRename(this=self._parse_table(schema=True))) 8847 8848 def _parse_alter_table_set(self) -> exp.AlterSet: 8849 alter_set = self.expression(exp.AlterSet()) 8850 8851 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 8852 "TABLE", "PROPERTIES" 8853 ): 8854 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 8855 elif self._match_text_seq("FILESTREAM_ON", advance=False): 8856 alter_set.set("expressions", [self._parse_assignment()]) 8857 elif self._match_texts(("LOGGED", "UNLOGGED")): 8858 alter_set.set("option", exp.var(self._prev.text.upper())) 8859 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 8860 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 8861 elif self._match_text_seq("LOCATION"): 8862 alter_set.set("location", self._parse_field()) 8863 elif self._match_text_seq("ACCESS", "METHOD"): 8864 alter_set.set("access_method", self._parse_field()) 8865 elif self._match_text_seq("TABLESPACE"): 8866 alter_set.set("tablespace", self._parse_field()) 8867 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 8868 alter_set.set("file_format", [self._parse_field()]) 8869 elif self._match_text_seq("STAGE_FILE_FORMAT"): 8870 alter_set.set("file_format", self._parse_wrapped_options()) 8871 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 8872 alter_set.set("copy_options", self._parse_wrapped_options()) 8873 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 8874 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 8875 else: 8876 if self._match_text_seq("SERDE"): 8877 alter_set.set("serde", self._parse_field()) 8878 8879 properties = self._parse_wrapped(self._parse_properties, optional=True) 8880 alter_set.set("expressions", [properties]) 8881 8882 return alter_set 8883 8884 def _parse_alter_session(self) -> exp.AlterSession: 8885 """Parse ALTER SESSION SET/UNSET statements.""" 8886 if self._match(TokenType.SET): 8887 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 8888 return self.expression(exp.AlterSession(expressions=expressions, unset=False)) 8889 8890 self._match_text_seq("UNSET") 8891 expressions = self._parse_csv( 8892 lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True))) 8893 ) 8894 return self.expression(exp.AlterSession(expressions=expressions, unset=True)) 8895 8896 def _parse_alter(self) -> exp.Alter | exp.Command: 8897 start = self._prev 8898 8899 iceberg = self._match_text_seq("ICEBERG") 8900 8901 alter_token = self._match_set(self.ALTERABLES) and self._prev 8902 if not alter_token: 8903 return self._parse_as_command(start) 8904 if iceberg and alter_token.token_type != TokenType.TABLE: 8905 return self._parse_as_command(start) 8906 8907 exists = self._parse_exists() 8908 only = self._match_text_seq("ONLY") 8909 8910 if alter_token.token_type == TokenType.SESSION: 8911 this = None 8912 check = None 8913 cluster = None 8914 else: 8915 this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS) 8916 check = self._match_text_seq("WITH", "CHECK") 8917 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8918 8919 if self._next: 8920 self._advance() 8921 8922 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 8923 if parser: 8924 actions = ensure_list(parser(self)) 8925 not_valid = self._match_text_seq("NOT", "VALID") 8926 options = self._parse_csv(self._parse_property) 8927 cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE") 8928 8929 if not self._curr and actions: 8930 return self.expression( 8931 exp.Alter( 8932 this=this, 8933 kind=alter_token.text.upper(), 8934 exists=exists, 8935 actions=actions, 8936 only=only, 8937 options=options, 8938 cluster=cluster, 8939 not_valid=not_valid, 8940 check=check, 8941 cascade=cascade, 8942 iceberg=iceberg, 8943 ) 8944 ) 8945 8946 return self._parse_as_command(start) 8947 8948 def _parse_analyze(self) -> exp.Analyze | exp.Command: 8949 start = self._prev 8950 # https://duckdb.org/docs/sql/statements/analyze 8951 if not self._curr: 8952 return self.expression(exp.Analyze()) 8953 8954 options = [] 8955 while self._match_texts(self.ANALYZE_STYLES): 8956 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 8957 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 8958 else: 8959 options.append(self._prev.text.upper()) 8960 8961 this: exp.Expr | None = None 8962 inner_expression: exp.Expr | None = None 8963 8964 kind = self._curr.text.upper() if self._curr else None 8965 8966 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 8967 this = self._parse_table_parts() 8968 elif self._match_text_seq("TABLES"): 8969 if self._match_set((TokenType.FROM, TokenType.IN)): 8970 kind = f"{kind} {self._prev.text.upper()}" 8971 this = self._parse_table(schema=True, is_db_reference=True) 8972 elif self._match_text_seq("DATABASE"): 8973 this = self._parse_table(schema=True, is_db_reference=True) 8974 elif self._match_text_seq("CLUSTER"): 8975 this = self._parse_table() 8976 # Try matching inner expr keywords before fallback to parse table. 8977 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8978 kind = None 8979 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8980 else: 8981 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 8982 kind = None 8983 this = self._parse_table_parts() 8984 8985 partition = self._try_parse(self._parse_partition) 8986 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 8987 return self._parse_as_command(start) 8988 8989 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 8990 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 8991 "WITH", "ASYNC", "MODE" 8992 ): 8993 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 8994 else: 8995 mode = None 8996 8997 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 8998 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 8999 9000 properties = self._parse_properties() 9001 return self.expression( 9002 exp.Analyze( 9003 kind=kind, 9004 this=this, 9005 mode=mode, 9006 partition=partition, 9007 properties=properties, 9008 expression=inner_expression, 9009 options=options, 9010 ) 9011 ) 9012 9013 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 9014 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 9015 this = None 9016 kind = self._prev.text.upper() 9017 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 9018 expressions = [] 9019 9020 if not self._match_text_seq("STATISTICS"): 9021 self.raise_error("Expecting token STATISTICS") 9022 9023 if self._match_text_seq("NOSCAN"): 9024 this = "NOSCAN" 9025 elif self._match(TokenType.FOR): 9026 if self._match_text_seq("ALL", "COLUMNS"): 9027 this = "FOR ALL COLUMNS" 9028 if self._match_texts("COLUMNS"): 9029 this = "FOR COLUMNS" 9030 expressions = self._parse_csv(self._parse_column_reference) 9031 elif self._match_text_seq("SAMPLE"): 9032 sample = self._parse_number() 9033 expressions = [ 9034 self.expression( 9035 exp.AnalyzeSample( 9036 sample=sample, 9037 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 9038 ) 9039 ) 9040 ] 9041 9042 return self.expression( 9043 exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions) 9044 ) 9045 9046 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 9047 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 9048 kind = None 9049 this = None 9050 expression: exp.Expr | None = None 9051 if self._match_text_seq("REF", "UPDATE"): 9052 kind = "REF" 9053 this = "UPDATE" 9054 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 9055 this = "UPDATE SET DANGLING TO NULL" 9056 elif self._match_text_seq("STRUCTURE"): 9057 kind = "STRUCTURE" 9058 if self._match_text_seq("CASCADE", "FAST"): 9059 this = "CASCADE FAST" 9060 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 9061 ("ONLINE", "OFFLINE") 9062 ): 9063 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 9064 expression = self._parse_into() 9065 9066 return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression)) 9067 9068 def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None: 9069 this = self._prev.text.upper() 9070 if self._match_text_seq("COLUMNS"): 9071 return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}")) 9072 return None 9073 9074 def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None: 9075 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 9076 if self._match_text_seq("STATISTICS"): 9077 return self.expression(exp.AnalyzeDelete(kind=kind)) 9078 return None 9079 9080 def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None: 9081 if self._match_text_seq("CHAINED", "ROWS"): 9082 return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into())) 9083 return None 9084 9085 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 9086 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 9087 this = self._prev.text.upper() 9088 expression: exp.Expr | None = None 9089 expressions = [] 9090 update_options = None 9091 9092 if self._match_text_seq("HISTOGRAM", "ON"): 9093 expressions = self._parse_csv(self._parse_column_reference) 9094 with_expressions = [] 9095 while self._match(TokenType.WITH): 9096 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 9097 if self._match_texts(("SYNC", "ASYNC")): 9098 if self._match_text_seq("MODE", advance=False): 9099 with_expressions.append(f"{self._prev.text.upper()} MODE") 9100 self._advance() 9101 else: 9102 buckets = self._parse_number() 9103 if self._match_text_seq("BUCKETS"): 9104 with_expressions.append(f"{buckets} BUCKETS") 9105 if with_expressions: 9106 expression = self.expression(exp.AnalyzeWith(expressions=with_expressions)) 9107 9108 if self._match_texts(("MANUAL", "AUTO")) and self._match( 9109 TokenType.UPDATE, advance=False 9110 ): 9111 update_options = self._prev.text.upper() 9112 self._advance() 9113 elif self._match_text_seq("USING", "DATA"): 9114 expression = self.expression(exp.UsingData(this=self._parse_string())) 9115 9116 return self.expression( 9117 exp.AnalyzeHistogram( 9118 this=this, 9119 expressions=expressions, 9120 expression=expression, 9121 update_options=update_options, 9122 ) 9123 ) 9124 9125 def _parse_merge(self) -> exp.Merge: 9126 self._match(TokenType.INTO) 9127 target = self._parse_table() 9128 9129 if target and self._match(TokenType.ALIAS, advance=False): 9130 target.set("alias", self._parse_table_alias()) 9131 9132 self._match(TokenType.USING) 9133 using = self._parse_table() 9134 9135 return self.expression( 9136 exp.Merge( 9137 this=target, 9138 using=using, 9139 on=self._match(TokenType.ON) and self._parse_disjunction(), 9140 using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(), 9141 whens=self._parse_when_matched(), 9142 returning=self._parse_returning(), 9143 ) 9144 ) 9145 9146 def _parse_when_matched(self) -> exp.Whens: 9147 whens = [] 9148 9149 while self._match(TokenType.WHEN): 9150 matched = not self._match(TokenType.NOT) 9151 self._match_text_seq("MATCHED") 9152 source = ( 9153 False 9154 if self._match_text_seq("BY", "TARGET") 9155 else self._match_text_seq("BY", "SOURCE") 9156 ) 9157 condition = self._parse_disjunction() if self._match(TokenType.AND) else None 9158 9159 self._match(TokenType.THEN) 9160 9161 if self._match(TokenType.INSERT): 9162 this = self._parse_star() 9163 if this: 9164 then: exp.Expr | None = self.expression(exp.Insert(this=this)) 9165 else: 9166 then = self.expression( 9167 exp.Insert( 9168 this=exp.var("ROW") 9169 if self._match_text_seq("ROW") 9170 else self._parse_value(values=False), 9171 expression=self._match_text_seq("VALUES") and self._parse_value(), 9172 where=self._parse_where(), 9173 ) 9174 ) 9175 elif self._match(TokenType.UPDATE): 9176 expressions = self._parse_star() 9177 if expressions: 9178 then = self.expression(exp.Update(expressions=expressions)) 9179 else: 9180 then = self.expression( 9181 exp.Update( 9182 expressions=self._match(TokenType.SET) 9183 and self._parse_csv(self._parse_equality), 9184 where=self._parse_where(), 9185 ) 9186 ) 9187 elif self._match(TokenType.DELETE): 9188 then = self.expression(exp.Var(this=self._prev.text)) 9189 else: 9190 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 9191 9192 whens.append( 9193 self.expression( 9194 exp.When(matched=matched, source=source, condition=condition, then=then) 9195 ) 9196 ) 9197 return self.expression(exp.Whens(expressions=whens)) 9198 9199 def _parse_show(self) -> exp.Expr | None: 9200 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 9201 if parser: 9202 return parser(self) 9203 return self._parse_as_command(self._prev) 9204 9205 def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None: 9206 index = self._index 9207 9208 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 9209 return self._parse_set_transaction(global_=kind == "GLOBAL") 9210 9211 left = self._parse_primary() or self._parse_column() 9212 assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS) 9213 9214 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 9215 self._retreat(index) 9216 return None 9217 9218 right = self._parse_statement() or self._parse_id_var() 9219 if isinstance(right, (exp.Column, exp.Identifier)): 9220 right = exp.var(right.name) 9221 9222 this = self.expression(exp.EQ(this=left, expression=right)) 9223 return self.expression(exp.SetItem(this=this, kind=kind)) 9224 9225 def _parse_set_transaction(self, global_: bool = False) -> exp.Expr: 9226 self._match_text_seq("TRANSACTION") 9227 characteristics = self._parse_csv( 9228 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 9229 ) 9230 return self.expression( 9231 exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_) 9232 ) 9233 9234 def _parse_set_item(self) -> exp.Expr | None: 9235 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 9236 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 9237 9238 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 9239 index = self._index 9240 set_ = self.expression( 9241 exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag) 9242 ) 9243 9244 if self._curr: 9245 self._retreat(index) 9246 return self._parse_as_command(self._prev) 9247 9248 return set_ 9249 9250 def _parse_var_from_options( 9251 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 9252 ) -> exp.Var | None: 9253 start = self._curr 9254 if not start: 9255 return None 9256 9257 option = start.text.upper() 9258 continuations = options.get(option) 9259 9260 index = self._index 9261 self._advance() 9262 for keywords in continuations or []: 9263 if isinstance(keywords, str): 9264 keywords = (keywords,) 9265 9266 if self._match_text_seq(*keywords): 9267 option = f"{option} {' '.join(keywords)}" 9268 break 9269 else: 9270 if continuations or continuations is None: 9271 if raise_unmatched: 9272 self.raise_error(f"Unknown option {option}") 9273 9274 self._retreat(index) 9275 return None 9276 9277 return exp.var(option) 9278 9279 def _parse_as_command(self, start: Token) -> exp.Command: 9280 while self._curr: 9281 self._advance() 9282 text = self._find_sql(start, self._prev) 9283 size = len(start.text) 9284 self._warn_unsupported() 9285 return exp.Command(this=text[:size], expression=text[size:]) 9286 9287 def _parse_dict_property(self, this: str) -> exp.DictProperty: 9288 settings = [] 9289 9290 self._match_l_paren() 9291 kind = self._parse_id_var() 9292 9293 if self._match(TokenType.L_PAREN): 9294 while True: 9295 key = self._parse_id_var() 9296 value = self._parse_function() or self._parse_primary_or_var() 9297 if not key and value is None: 9298 break 9299 settings.append(self.expression(exp.DictSubProperty(this=key, value=value))) 9300 self._match(TokenType.R_PAREN) 9301 9302 self._match_r_paren() 9303 9304 return self.expression( 9305 exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings) 9306 ) 9307 9308 def _parse_dict_range(self, this: str) -> exp.DictRange: 9309 self._match_l_paren() 9310 has_min = self._match_text_seq("MIN") 9311 if has_min: 9312 min = self._parse_var() or self._parse_primary() 9313 self._match_text_seq("MAX") 9314 max = self._parse_var() or self._parse_primary() 9315 else: 9316 max = self._parse_var() or self._parse_primary() 9317 min = exp.Literal.number(0) 9318 self._match_r_paren() 9319 return self.expression(exp.DictRange(this=this, min=min, max=max)) 9320 9321 def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None: 9322 index = self._index 9323 expression = self._parse_column() 9324 position = self._match(TokenType.COMMA) and self._parse_column() 9325 9326 if not self._match(TokenType.IN): 9327 self._retreat(index - 1) 9328 return None 9329 iterator = self._parse_column() 9330 condition = self._parse_disjunction() if self._match_text_seq("IF") else None 9331 return self.expression( 9332 exp.Comprehension( 9333 this=this, 9334 expression=expression, 9335 position=position, 9336 iterator=iterator, 9337 condition=condition, 9338 ) 9339 ) 9340 9341 def _parse_heredoc(self) -> exp.Heredoc | None: 9342 if self._match(TokenType.HEREDOC_STRING): 9343 return self.expression(exp.Heredoc(this=self._prev.text)) 9344 9345 if not self._match_text_seq("$"): 9346 return None 9347 9348 tags = ["$"] 9349 tag_text = None 9350 9351 if self._is_connected(): 9352 self._advance() 9353 tags.append(self._prev.text.upper()) 9354 else: 9355 self.raise_error("No closing $ found") 9356 9357 if tags[-1] != "$": 9358 if self._is_connected() and self._match_text_seq("$"): 9359 tag_text = tags[-1] 9360 tags.append("$") 9361 else: 9362 self.raise_error("No closing $ found") 9363 9364 heredoc_start = self._curr 9365 9366 while self._curr: 9367 if self._match_text_seq(*tags, advance=False): 9368 this = self._find_sql(heredoc_start, self._prev) 9369 self._advance(len(tags)) 9370 return self.expression(exp.Heredoc(this=this, tag=tag_text)) 9371 9372 self._advance() 9373 9374 self.raise_error(f"No closing {''.join(tags)} found") 9375 return None 9376 9377 def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None: 9378 if not self._curr: 9379 return None 9380 9381 index = self._index 9382 this = [] 9383 while True: 9384 # The current token might be multiple words 9385 curr = self._curr.text.upper() 9386 key = curr.split(" ") 9387 this.append(curr) 9388 9389 self._advance() 9390 result, trie = in_trie(trie, key) 9391 if result == TrieResult.FAILED: 9392 break 9393 9394 if result == TrieResult.EXISTS: 9395 subparser = parsers[" ".join(this)] 9396 return subparser 9397 9398 self._retreat(index) 9399 return None 9400 9401 def _match_l_paren(self, expression: exp.Expr | None = None) -> None: 9402 if not self._match(TokenType.L_PAREN, expression=expression): 9403 self.raise_error("Expecting (") 9404 9405 def _match_r_paren(self, expression: exp.Expr | None = None) -> None: 9406 if not self._match(TokenType.R_PAREN, expression=expression): 9407 self.raise_error("Expecting )") 9408 9409 def _replace_lambda( 9410 self, node: exp.Expr | None, expressions: list[exp.Expr] 9411 ) -> exp.Expr | None: 9412 if not node: 9413 return node 9414 9415 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 9416 9417 for column in node.find_all(exp.Column): 9418 typ = lambda_types.get(column.parts[0].name) 9419 if typ is not None: 9420 dot_or_id = column.to_dot() if column.table else column.this 9421 9422 if typ: 9423 dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ)) 9424 9425 parent = column.parent 9426 9427 while isinstance(parent, exp.Dot): 9428 if not isinstance(parent.parent, exp.Dot): 9429 parent.replace(dot_or_id) 9430 break 9431 parent = parent.parent 9432 else: 9433 if column is node: 9434 node = dot_or_id 9435 else: 9436 column.replace(dot_or_id) 9437 return node 9438 9439 def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr: 9440 start = self._prev 9441 9442 # Not to be confused with TRUNCATE(number, decimals) function call 9443 if self._match(TokenType.L_PAREN): 9444 self._retreat(self._index - 2) 9445 return self._parse_function() 9446 9447 # Clickhouse supports TRUNCATE DATABASE as well 9448 is_database = self._match(TokenType.DATABASE) 9449 9450 self._match(TokenType.TABLE) 9451 9452 exists = self._parse_exists(not_=False) 9453 9454 expressions = self._parse_csv( 9455 lambda: self._parse_table(schema=True, is_db_reference=is_database) 9456 ) 9457 9458 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 9459 9460 if self._match_text_seq("RESTART", "IDENTITY"): 9461 identity = "RESTART" 9462 elif self._match_text_seq("CONTINUE", "IDENTITY"): 9463 identity = "CONTINUE" 9464 else: 9465 identity = None 9466 9467 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 9468 option = self._prev.text 9469 else: 9470 option = None 9471 9472 partition = self._parse_partition() 9473 9474 # Fallback case 9475 if self._curr: 9476 return self._parse_as_command(start) 9477 9478 return self.expression( 9479 exp.TruncateTable( 9480 expressions=expressions, 9481 is_database=is_database, 9482 exists=exists, 9483 cluster=cluster, 9484 identity=identity, 9485 option=option, 9486 partition=partition, 9487 ) 9488 ) 9489 9490 def _parse_with_operator(self) -> exp.Expr | None: 9491 this = self._parse_ordered(self._parse_opclass) 9492 9493 if not self._match(TokenType.WITH): 9494 return this 9495 9496 op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS) 9497 9498 return self.expression(exp.WithOperator(this=this, op=op)) 9499 9500 def _parse_wrapped_options(self) -> list[exp.Expr]: 9501 self._match(TokenType.EQ) 9502 self._match(TokenType.L_PAREN) 9503 9504 opts: list[exp.Expr] = [] 9505 option: exp.Expr | list[exp.Expr] | None 9506 while self._curr and not self._match(TokenType.R_PAREN): 9507 if self._match_text_seq("FORMAT_NAME", "="): 9508 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 9509 option = self._parse_format_name() 9510 else: 9511 option = self._parse_property() 9512 9513 if option is None: 9514 self.raise_error("Unable to parse option") 9515 break 9516 9517 opts.extend(ensure_list(option)) 9518 9519 return opts 9520 9521 def _parse_copy_parameters(self) -> list[exp.CopyParameter]: 9522 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 9523 9524 options = [] 9525 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 9526 option = self._parse_var(any_token=True) 9527 prev = self._prev.text.upper() 9528 9529 # Different dialects might separate options and values by white space, "=" and "AS" 9530 self._match(TokenType.EQ) 9531 self._match(TokenType.ALIAS) 9532 9533 param = self.expression(exp.CopyParameter(this=option)) 9534 9535 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 9536 TokenType.L_PAREN, advance=False 9537 ): 9538 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 9539 param.set("expressions", self._parse_wrapped_options()) 9540 elif prev == "FILE_FORMAT": 9541 # T-SQL's external file format case 9542 param.set("expression", self._parse_field()) 9543 elif ( 9544 prev == "FORMAT" 9545 and self._prev.token_type == TokenType.ALIAS 9546 and self._match_texts(("AVRO", "JSON")) 9547 ): 9548 param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}")) 9549 param.set("expression", self._parse_field()) 9550 else: 9551 param.set("expression", self._parse_unquoted_field() or self._parse_bracket()) 9552 9553 options.append(param) 9554 9555 if sep: 9556 self._match(sep) 9557 9558 return options 9559 9560 def _parse_credentials(self) -> exp.Credentials | None: 9561 expr = self.expression(exp.Credentials()) 9562 9563 if self._match_text_seq("STORAGE_INTEGRATION", "="): 9564 expr.set("storage", self._parse_field()) 9565 if self._match_text_seq("CREDENTIALS"): 9566 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 9567 creds = ( 9568 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 9569 ) 9570 expr.set("credentials", creds) 9571 if self._match_text_seq("ENCRYPTION"): 9572 expr.set("encryption", self._parse_wrapped_options()) 9573 if self._match_text_seq("IAM_ROLE"): 9574 expr.set( 9575 "iam_role", 9576 exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(), 9577 ) 9578 if self._match_text_seq("REGION"): 9579 expr.set("region", self._parse_field()) 9580 9581 return expr 9582 9583 def _parse_file_location(self) -> exp.Expr | None: 9584 return self._parse_field() 9585 9586 def _parse_copy(self) -> exp.Copy | exp.Command: 9587 start = self._prev 9588 9589 self._match(TokenType.INTO) 9590 9591 this = ( 9592 self._parse_select(nested=True, parse_subquery_alias=False) 9593 if self._match(TokenType.L_PAREN, advance=False) 9594 else self._parse_table(schema=True) 9595 ) 9596 9597 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 9598 9599 files = self._parse_csv(self._parse_file_location) 9600 if self._match(TokenType.EQ, advance=False): 9601 # Backtrack one token since we've consumed the lhs of a parameter assignment here. 9602 # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter 9603 # list via `_parse_wrapped(..)` below. 9604 self._advance(-1) 9605 files = [] 9606 9607 credentials = self._parse_credentials() 9608 9609 self._match_text_seq("WITH") 9610 9611 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 9612 9613 # Fallback case 9614 if self._curr: 9615 return self._parse_as_command(start) 9616 9617 return self.expression( 9618 exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params) 9619 ) 9620 9621 def _parse_normalize(self) -> exp.Normalize: 9622 return self.expression( 9623 exp.Normalize( 9624 this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var() 9625 ) 9626 ) 9627 9628 def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor: 9629 args = self._parse_csv(lambda: self._parse_lambda()) 9630 9631 this = seq_get(args, 0) 9632 decimals = seq_get(args, 1) 9633 9634 return expr_type( 9635 this=this, 9636 decimals=decimals, 9637 to=self._parse_var() if self._match_text_seq("TO") else None, 9638 ) 9639 9640 def _parse_star_ops(self) -> exp.Expr | None: 9641 star_token = self._prev 9642 9643 if self._match_text_seq("COLUMNS", "(", advance=False): 9644 this = self._parse_function() 9645 if isinstance(this, exp.Columns): 9646 this.set("unpack", True) 9647 return this 9648 9649 return self.expression( 9650 exp.Star( 9651 except_=self._parse_star_op("EXCEPT", "EXCLUDE"), 9652 replace=self._parse_star_op("REPLACE"), 9653 rename=self._parse_star_op("RENAME"), 9654 ) 9655 ).update_positions(star_token) 9656 9657 def _parse_grant_privilege(self) -> exp.GrantPrivilege | None: 9658 privilege_parts = [] 9659 9660 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 9661 # (end of privilege list) or L_PAREN (start of column list) are met 9662 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 9663 privilege_parts.append(self._curr.text.upper()) 9664 self._advance() 9665 9666 this = exp.var(" ".join(privilege_parts)) 9667 expressions = ( 9668 self._parse_wrapped_csv(self._parse_column) 9669 if self._match(TokenType.L_PAREN, advance=False) 9670 else None 9671 ) 9672 9673 return self.expression(exp.GrantPrivilege(this=this, expressions=expressions)) 9674 9675 def _parse_grant_principal(self) -> exp.GrantPrincipal | None: 9676 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 9677 principal = self._parse_id_var() 9678 9679 if not principal: 9680 return None 9681 9682 return self.expression(exp.GrantPrincipal(this=principal, kind=kind)) 9683 9684 def _parse_grant_revoke_common( 9685 self, 9686 ) -> tuple[list | None, str | None, exp.Expr | None]: 9687 privileges = self._parse_csv(self._parse_grant_privilege) 9688 9689 self._match(TokenType.ON) 9690 kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None 9691 9692 # Attempt to parse the securable e.g. MySQL allows names 9693 # such as "foo.*", "*.*" which are not easily parseable yet 9694 securable = self._try_parse(self._parse_table_parts) 9695 9696 return privileges, kind, securable 9697 9698 def _parse_grant(self) -> exp.Grant | exp.Command: 9699 start = self._prev 9700 9701 privileges, kind, securable = self._parse_grant_revoke_common() 9702 9703 if not securable or not self._match_text_seq("TO"): 9704 return self._parse_as_command(start) 9705 9706 principals = self._parse_csv(self._parse_grant_principal) 9707 9708 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 9709 9710 if self._curr: 9711 return self._parse_as_command(start) 9712 9713 return self.expression( 9714 exp.Grant( 9715 privileges=privileges, 9716 kind=kind, 9717 securable=securable, 9718 principals=principals, 9719 grant_option=grant_option, 9720 ) 9721 ) 9722 9723 def _parse_revoke(self) -> exp.Revoke | exp.Command: 9724 start = self._prev 9725 9726 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 9727 9728 privileges, kind, securable = self._parse_grant_revoke_common() 9729 9730 if not securable or not self._match_text_seq("FROM"): 9731 return self._parse_as_command(start) 9732 9733 principals = self._parse_csv(self._parse_grant_principal) 9734 9735 cascade = None 9736 if self._match_texts(("CASCADE", "RESTRICT")): 9737 cascade = self._prev.text.upper() 9738 9739 if self._curr: 9740 return self._parse_as_command(start) 9741 9742 return self.expression( 9743 exp.Revoke( 9744 privileges=privileges, 9745 kind=kind, 9746 securable=securable, 9747 principals=principals, 9748 grant_option=grant_option, 9749 cascade=cascade, 9750 ) 9751 ) 9752 9753 def _parse_overlay(self) -> exp.Overlay: 9754 def _parse_overlay_arg(text: str) -> exp.Expr | None: 9755 return ( 9756 self._parse_bitwise() 9757 if self._match(TokenType.COMMA) or self._match_text_seq(text) 9758 else None 9759 ) 9760 9761 return self.expression( 9762 exp.Overlay( 9763 this=self._parse_bitwise(), 9764 expression=_parse_overlay_arg("PLACING"), 9765 from_=_parse_overlay_arg("FROM"), 9766 for_=_parse_overlay_arg("FOR"), 9767 ) 9768 ) 9769 9770 def _parse_format_name(self) -> exp.Property: 9771 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 9772 # for FILE_FORMAT = <format_name> 9773 return self.expression( 9774 exp.Property( 9775 this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts() 9776 ) 9777 ) 9778 9779 def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc: 9780 args: list[exp.Expr] = [] 9781 9782 if self._match(TokenType.DISTINCT): 9783 args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()]))) 9784 self._match(TokenType.COMMA) 9785 9786 args.extend(self._parse_function_args()) 9787 9788 return self.expression( 9789 expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2)) 9790 ) 9791 9792 def _identifier_expression( 9793 self, token: Token | None = None, quoted: bool | None = None 9794 ) -> exp.Identifier: 9795 token = token or self._prev 9796 return self.expression(exp.Identifier(this=token.text, quoted=quoted), token) 9797 9798 def _build_pipe_cte( 9799 self, 9800 query: exp.Query, 9801 expressions: list[exp.Expr], 9802 alias_cte: exp.TableAlias | None = None, 9803 ) -> exp.Select: 9804 new_cte: str | exp.TableAlias | None 9805 if alias_cte: 9806 new_cte = alias_cte 9807 else: 9808 self._pipe_cte_counter += 1 9809 new_cte = f"__tmp{self._pipe_cte_counter}" 9810 9811 with_ = query.args.get("with_") 9812 ctes = with_.pop() if with_ else None 9813 9814 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 9815 if ctes: 9816 new_select.set("with_", ctes) 9817 9818 return new_select.with_(new_cte, as_=query, copy=False) 9819 9820 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 9821 select = self._parse_select(consume_pipe=False) 9822 if not select: 9823 return query 9824 9825 return self._build_pipe_cte( 9826 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 9827 ) 9828 9829 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 9830 limit = self._parse_limit() 9831 offset = self._parse_offset() 9832 if limit: 9833 curr_limit = query.args.get("limit", limit) 9834 if curr_limit.expression.to_py() >= limit.expression.to_py(): 9835 query.limit(limit, copy=False) 9836 if offset: 9837 curr_offset = query.args.get("offset") 9838 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 9839 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 9840 9841 return query 9842 9843 def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None: 9844 this = self._parse_disjunction() 9845 if self._match_text_seq("GROUP", "AND", advance=False): 9846 return this 9847 9848 this = self._parse_alias(this) 9849 9850 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 9851 return self._parse_ordered(lambda: this) 9852 9853 return this 9854 9855 def _parse_pipe_syntax_aggregate_group_order_by( 9856 self, query: exp.Select, group_by_exists: bool = True 9857 ) -> exp.Select: 9858 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 9859 aggregates_or_groups, orders = [], [] 9860 for element in expr: 9861 if isinstance(element, exp.Ordered): 9862 this = element.this 9863 if isinstance(this, exp.Alias): 9864 element.set("this", this.args["alias"]) 9865 orders.append(element) 9866 else: 9867 this = element 9868 aggregates_or_groups.append(this) 9869 9870 if group_by_exists: 9871 query.select( 9872 *aggregates_or_groups, *query.expressions, append=False, copy=False 9873 ).group_by( 9874 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 9875 copy=False, 9876 ) 9877 else: 9878 query.select(*aggregates_or_groups, append=False, copy=False) 9879 9880 if orders: 9881 return query.order_by(*orders, append=False, copy=False) 9882 9883 return query 9884 9885 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 9886 self._match_text_seq("AGGREGATE") 9887 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 9888 9889 if self._match(TokenType.GROUP_BY) or ( 9890 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 9891 ): 9892 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 9893 9894 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9895 9896 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None: 9897 first_setop = self.parse_set_operation(this=query) 9898 if not first_setop: 9899 return None 9900 9901 def _parse_and_unwrap_query() -> exp.Expr | None: 9902 expr = self._parse_paren() 9903 return expr.assert_is(exp.Subquery).unnest() if expr else None 9904 9905 first_setop.this.pop() 9906 9907 setops = [ 9908 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 9909 *self._parse_csv(_parse_and_unwrap_query), 9910 ] 9911 9912 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9913 with_ = query.args.get("with_") 9914 ctes = with_.pop() if with_ else None 9915 9916 if isinstance(first_setop, exp.Union): 9917 query = query.union(*setops, copy=False, **first_setop.args) 9918 elif isinstance(first_setop, exp.Except): 9919 query = query.except_(*setops, copy=False, **first_setop.args) 9920 else: 9921 query = query.intersect(*setops, copy=False, **first_setop.args) 9922 9923 query.set("with_", ctes) 9924 9925 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9926 9927 def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None: 9928 join = self._parse_join() 9929 if not join: 9930 return None 9931 9932 if isinstance(query, exp.Select): 9933 return query.join(join, copy=False) 9934 9935 return query 9936 9937 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 9938 pivots = self._parse_pivots() 9939 if not pivots: 9940 return query 9941 9942 from_ = query.args.get("from_") 9943 if from_: 9944 from_.this.set("pivots", pivots) 9945 9946 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9947 9948 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 9949 self._match_text_seq("EXTEND") 9950 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 9951 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 9952 9953 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 9954 sample = self._parse_table_sample() 9955 9956 with_ = query.args.get("with_") 9957 if with_: 9958 with_.expressions[-1].this.set("sample", sample) 9959 else: 9960 query.set("sample", sample) 9961 9962 return query 9963 9964 def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None: 9965 if isinstance(query, exp.Subquery): 9966 query = exp.select("*").from_(query, copy=False) 9967 9968 if not query.args.get("from_"): 9969 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 9970 9971 while self._match(TokenType.PIPE_GT): 9972 start_index = self._index 9973 start_text = self._curr.text.upper() 9974 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text) 9975 if not parser: 9976 # The set operators (UNION, etc) and the JOIN operator have a few common starting 9977 # keywords, making it tricky to disambiguate them without lookahead. The approach 9978 # here is to try and parse a set operation and if that fails, then try to parse a 9979 # join operator. If that fails as well, then the operator is not supported. 9980 parsed_query = self._parse_pipe_syntax_set_operator(query) 9981 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 9982 if not parsed_query: 9983 self._retreat(start_index) 9984 self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.") 9985 break 9986 query = parsed_query 9987 else: 9988 query = parser(self, query) 9989 9990 return query 9991 9992 def _parse_declareitem(self) -> exp.DeclareItem | None: 9993 self._match_texts(("VAR", "VARIABLE")) 9994 9995 vars = self._parse_csv(self._parse_id_var) 9996 if not vars: 9997 return None 9998 9999 self._match(TokenType.ALIAS) 10000 kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types() 10001 default = ( 10002 self._match(TokenType.DEFAULT) or self._match(TokenType.EQ) 10003 ) and self._parse_bitwise() 10004 10005 return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default)) 10006 10007 def _parse_declare(self) -> exp.Declare | exp.Command: 10008 start = self._prev 10009 replace = self._match_text_seq("OR", "REPLACE") 10010 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 10011 10012 if not expressions or self._curr: 10013 return self._parse_as_command(start) 10014 10015 return self.expression(exp.Declare(expressions=expressions, replace=replace)) 10016 10017 def build_cast(self, strict: bool, **kwargs) -> exp.Expr: 10018 exp_class = exp.Cast if strict else exp.TryCast 10019 10020 if exp_class == exp.TryCast: 10021 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 10022 10023 return self.expression(exp_class(**kwargs)) 10024 10025 def _parse_json_value(self) -> exp.JSONValue: 10026 this = self._parse_bitwise() 10027 self._match(TokenType.COMMA) 10028 path = self._parse_bitwise() 10029 10030 returning = self._match(TokenType.RETURNING) and self._parse_type() 10031 10032 return self.expression( 10033 exp.JSONValue( 10034 this=this, 10035 path=self.dialect.to_json_path(path), 10036 returning=returning, 10037 on_condition=self._parse_on_condition(), 10038 ) 10039 ) 10040 10041 def _parse_group_concat(self) -> exp.Expr | None: 10042 def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr: 10043 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 10044 concat_exprs = [ 10045 self.expression( 10046 exp.Concat( 10047 expressions=node.expressions, 10048 safe=True, 10049 coalesce=self.dialect.CONCAT_COALESCE, 10050 ) 10051 ) 10052 ] 10053 node.set("expressions", concat_exprs) 10054 return node 10055 if len(exprs) == 1: 10056 return exprs[0] 10057 return self.expression( 10058 exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE) 10059 ) 10060 10061 args = self._parse_csv(self._parse_lambda) 10062 10063 if args: 10064 order = args[-1] if isinstance(args[-1], exp.Order) else None 10065 10066 if order: 10067 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 10068 # remove 'expr' from exp.Order and add it back to args 10069 args[-1] = order.this 10070 order.set("this", concat_exprs(order.this, args)) 10071 10072 this = order or concat_exprs(args[0], args) 10073 else: 10074 this = None 10075 10076 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 10077 10078 return self.expression(exp.GroupConcat(this=this, separator=separator)) 10079 10080 def _parse_initcap(self) -> exp.Initcap: 10081 expr = exp.Initcap.from_arg_list(self._parse_function_args()) 10082 10083 # attach dialect's default delimiters 10084 if expr.args.get("expression") is None: 10085 expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS)) 10086 10087 return expr 10088 10089 def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None: 10090 while True: 10091 if not self._match(TokenType.L_PAREN): 10092 break 10093 10094 op = "" 10095 while self._curr and not self._match(TokenType.R_PAREN): 10096 op += self._curr.text 10097 self._advance() 10098 10099 comments = self._prev_comments 10100 this = self.expression( 10101 exp.Operator(this=this, operator=op, expression=self._parse_bitwise()), 10102 comments=comments, 10103 ) 10104 10105 if not self._match(TokenType.OPERATOR): 10106 break 10107 10108 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
- max_nodes: Maximum number of AST nodes to prevent memory exhaustion. Set to -1 (default) to disable the check.
1849 def __init__( 1850 self, 1851 error_level: ErrorLevel | None = None, 1852 error_message_context: int = 100, 1853 max_errors: int = 3, 1854 max_nodes: int = -1, 1855 dialect: DialectType = None, 1856 ): 1857 self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE 1858 self.error_message_context: int = error_message_context 1859 self.max_errors: int = max_errors 1860 self.max_nodes: int = max_nodes 1861 self.dialect: t.Any = _resolve_dialect(dialect) 1862 self.sql: str = "" 1863 self.errors: list[ParseError] = [] 1864 self._tokens: list[Token] = [] 1865 self._tokens_size: i64 = 0 1866 self._index: i64 = 0 1867 self._curr: Token = SENTINEL_NONE 1868 self._next: Token = SENTINEL_NONE 1869 self._prev: Token = SENTINEL_NONE 1870 self._prev_comments: list[str] = [] 1871 self._pipe_cte_counter: int = 0 1872 self._chunks: list[list[Token]] = [] 1873 self._chunk_index: i64 = 0 1874 self._node_count: int = 0
1876 def reset(self) -> None: 1877 self.sql = "" 1878 self.errors = [] 1879 self._tokens = [] 1880 self._tokens_size = 0 1881 self._index = 0 1882 self._curr = SENTINEL_NONE 1883 self._next = SENTINEL_NONE 1884 self._prev = SENTINEL_NONE 1885 self._prev_comments = [] 1886 self._pipe_cte_counter = 0 1887 self._chunks = [] 1888 self._chunk_index = 0 1889 self._node_count = 0
1979 def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None: 1980 token = token or self._curr or self._prev or Token.string("") 1981 formatted_sql, start_context, highlight, end_context = highlight_sql( 1982 sql=self.sql, 1983 positions=[(token.start, token.end)], 1984 context_length=self.error_message_context, 1985 ) 1986 formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n {formatted_sql}" 1987 1988 error = ParseError.new( 1989 formatted_message, 1990 description=message, 1991 line=token.line, 1992 col=token.col, 1993 start_context=start_context, 1994 highlight=highlight, 1995 end_context=end_context, 1996 ) 1997 1998 if self.error_level == ErrorLevel.IMMEDIATE: 1999 raise error 2000 2001 self.errors.append(error)
2003 def validate_expression(self, expression: E, args: list | None = None) -> E: 2004 if self.max_nodes > -1: 2005 self._node_count += 1 2006 if self._node_count > self.max_nodes: 2007 self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded") 2008 if self.error_level != ErrorLevel.IGNORE: 2009 for error_message in expression.error_messages(args): 2010 self.raise_error(error_message) 2011 return expression
2030 def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]: 2031 """ 2032 Parses a list of tokens and returns a list of syntax trees, one tree 2033 per parsed SQL statement. 2034 2035 Args: 2036 raw_tokens: The list of tokens. 2037 sql: The original SQL string. 2038 2039 Returns: 2040 The list of the produced syntax trees. 2041 """ 2042 return self._parse( 2043 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 2044 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string.
Returns:
The list of the produced syntax trees.
2046 def parse_into( 2047 self, 2048 expression_types: exp.IntoType, 2049 raw_tokens: list[Token], 2050 sql: str | None = None, 2051 ) -> list[exp.Expr | None]: 2052 """ 2053 Parses a list of tokens into a given Expr type. If a collection of Expr 2054 types is given instead, this method will try to parse the token list into each one 2055 of them, stopping at the first for which the parsing succeeds. 2056 2057 Args: 2058 expression_types: The expression type(s) to try and parse the token list into. 2059 raw_tokens: The list of tokens. 2060 sql: The original SQL string, used to produce helpful debug messages. 2061 2062 Returns: 2063 The target Expr. 2064 """ 2065 errors = [] 2066 for expression_type in ensure_list(expression_types): 2067 parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type)) 2068 if not parser: 2069 raise TypeError(f"No parser registered for {expression_type}") 2070 2071 try: 2072 return self._parse(parser, raw_tokens, sql) 2073 except ParseError as e: 2074 e.errors[0]["into_expression"] = expression_type 2075 errors.append(e) 2076 2077 raise ParseError( 2078 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 2079 errors=merge_errors(errors), 2080 ) from errors[-1]
Parses a list of tokens into a given Expr type. If a collection of Expr types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expr.
2082 def check_errors(self) -> None: 2083 """Logs or raises any found errors, depending on the chosen error level setting.""" 2084 if self.error_level == ErrorLevel.WARN: 2085 for error in self.errors: 2086 logger.error(str(error)) 2087 elif self.error_level == ErrorLevel.RAISE and self.errors: 2088 raise ParseError( 2089 concat_messages(self.errors, self.max_errors), 2090 errors=merge_errors(self.errors), 2091 )
Logs or raises any found errors, depending on the chosen error level setting.
2093 def expression( 2094 self, 2095 instance: E, 2096 token: Token | None = None, 2097 comments: list[str] | None = None, 2098 ) -> E: 2099 if token: 2100 instance.update_positions(token) 2101 instance.add_comments(comments) if comments else self._add_comments(instance) 2102 if not instance.is_primitive: 2103 instance = self.validate_expression(instance) 2104 return instance
5680 def parse_set_operation( 5681 self, this: exp.Expr | None, consume_pipe: bool = False 5682 ) -> exp.Expr | None: 5683 start = self._index 5684 _, side_token, kind_token = self._parse_join_parts() 5685 5686 side = side_token.text if side_token else None 5687 kind = kind_token.text if kind_token else None 5688 5689 if not self._match_set(self.SET_OPERATIONS): 5690 self._retreat(start) 5691 return None 5692 5693 token_type = self._prev.token_type 5694 5695 if token_type == TokenType.UNION: 5696 operation: type[exp.SetOperation] = exp.Union 5697 elif token_type == TokenType.EXCEPT: 5698 operation = exp.Except 5699 else: 5700 operation = exp.Intersect 5701 5702 comments = self._prev.comments 5703 5704 if self._match(TokenType.DISTINCT): 5705 distinct: bool | None = True 5706 elif self._match(TokenType.ALL): 5707 distinct = False 5708 else: 5709 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 5710 if distinct is None: 5711 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 5712 5713 by_name = ( 5714 self._match_text_seq("BY", "NAME") 5715 or self._match_text_seq("STRICT", "CORRESPONDING") 5716 or None 5717 ) 5718 if self._match_text_seq("CORRESPONDING"): 5719 by_name = True 5720 if not side and not kind: 5721 kind = "INNER" 5722 5723 on_column_list = None 5724 if by_name and self._match_texts(("ON", "BY")): 5725 on_column_list = self._parse_wrapped_csv(self._parse_column) 5726 5727 expression = self._parse_select( 5728 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 5729 ) 5730 5731 return self.expression( 5732 operation( 5733 this=this, 5734 distinct=distinct, 5735 by_name=by_name, 5736 expression=expression, 5737 side=side, 5738 kind=kind, 5739 on=on_column_list, 5740 ), 5741 comments=comments, 5742 )