Edit on GitHub

sqlglot.parser

View Source

    1from __future__ import annotations
    2
    3import itertools
    4import logging
    5import re
    6import typing as t
    7from collections import defaultdict
    8
    9from sqlglot import exp
   10from sqlglot.errors import (
   11    ErrorLevel,
   12    ParseError,
   13    TokenError,
   14    concat_messages,
   15    highlight_sql,
   16    merge_errors,
   17)
   18from sqlglot.expressions import apply_index_offset
   19from sqlglot.helper import ensure_list, i64, seq_get
   20from sqlglot.trie import new_trie
   21from sqlglot.time import format_time
   22from sqlglot.tokens import Token, Tokenizer, TokenType
   23from sqlglot.trie import TrieResult, in_trie
   24from collections.abc import Sequence
   25from builtins import type as Type
   26
   27if t.TYPE_CHECKING:
   28    from sqlglot.expressions import ExpOrStr
   29    from sqlglot._typing import E, BuilderArgs
   30    from sqlglot.dialects.dialect import Dialect, DialectType
   31
   32    from re import Pattern
   33
   34    T = t.TypeVar("T")
   35    TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor)
   36
   37logger = logging.getLogger("sqlglot")
   38
   39OPTIONS_TYPE = dict[str, Sequence[t.Union[Sequence[str], str]]]
   40
   41# Used to detect alphabetical characters and +/- in timestamp literals
   42TIME_ZONE_RE: Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]")
   43
   44
   45def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap:
   46    if len(args) == 1 and args[0].is_star:
   47        return exp.StarMap(this=args[0])
   48
   49    keys: list[ExpOrStr] = []
   50    values: list[ExpOrStr] = []
   51    for i in range(0, len(args), 2):
   52        keys.append(args[i])
   53        values.append(args[i + 1])
   54
   55    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
   56
   57
   58def build_like(args: BuilderArgs) -> exp.Escape | exp.Like:
   59    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
   60    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like
   61
   62
   63def binary_range_parser(
   64    expr_type: Type[exp.Expr], reverse_args: bool = False
   65) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]:
   66    def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None:
   67        expression = self._parse_bitwise()
   68        if reverse_args:
   69            this, expression = expression, this
   70        return self._parse_escape(self.expression(expr_type(this=this, expression=expression)))
   71
   72    return _parse_binary_range
   73
   74
   75def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func:
   76    # Default argument order is base, expression
   77    this = seq_get(args, 0)
   78    expression = seq_get(args, 1)
   79
   80    if expression:
   81        if not dialect.LOG_BASE_FIRST:
   82            this, expression = expression, this
   83        return exp.Log(this=this, expression=expression)
   84
   85    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
   86
   87
   88def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex:
   89    arg = seq_get(args, 0)
   90    return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)
   91
   92
   93def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex:
   94    # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
   95    arg = seq_get(args, 0)
   96    return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)
   97
   98
   99def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex:
  100    # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
  101    arg = seq_get(args, 0)
  102    return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)
  103
  104
  105def build_extract_json_with_path(
  106    expr_type: Type[E],
  107) -> t.Callable[[BuilderArgs, Dialect], E]:
  108    def _builder(args: BuilderArgs, dialect: Dialect) -> E:
  109        expression = expr_type(
  110            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
  111        )
  112        if len(args) > 2 and expr_type is exp.JSONExtract:
  113            expression.set("expressions", args[2:])
  114        if expr_type is exp.JSONExtractScalar:
  115            expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY)
  116
  117        return expression
  118
  119    return _builder
  120
  121
  122def build_mod(args: BuilderArgs) -> exp.Mod:
  123    this = seq_get(args, 0)
  124    expression = seq_get(args, 1)
  125
  126    # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
  127    this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
  128    expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
  129
  130    return exp.Mod(this=this, expression=expression)
  131
  132
  133def build_pad(args: BuilderArgs, is_left: bool = True):
  134    return exp.Pad(
  135        this=seq_get(args, 0),
  136        expression=seq_get(args, 1),
  137        fill_pattern=seq_get(args, 2),
  138        is_left=is_left,
  139    )
  140
  141
  142def build_array_constructor(
  143    exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect
  144) -> exp.Expr:
  145    array_exp = exp_class(expressions=args)
  146
  147    if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS:
  148        array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET)
  149
  150    return array_exp
  151
  152
  153def build_convert_timezone(
  154    args: BuilderArgs, default_source_tz: str | None = None
  155) -> exp.ConvertTimezone | exp.Anonymous:
  156    if len(args) == 2:
  157        source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None
  158        return exp.ConvertTimezone(
  159            source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1)
  160        )
  161
  162    return exp.ConvertTimezone.from_arg_list(args)
  163
  164
  165def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim:
  166    this, expression = seq_get(args, 0), seq_get(args, 1)
  167
  168    if expression and reverse_args:
  169        this, expression = expression, this
  170
  171    return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")
  172
  173
  174def build_coalesce(
  175    args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None
  176) -> exp.Coalesce:
  177    return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null)
  178
  179
  180def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition:
  181    return exp.StrPosition(
  182        this=seq_get(args, 1),
  183        substr=seq_get(args, 0),
  184        position=seq_get(args, 2),
  185    )
  186
  187
  188def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend:
  189    """
  190    Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
  191
  192    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
  193    Others (DuckDB, PostgreSQL) create a new single-element array instead.
  194
  195    Args:
  196        args: Function arguments [array, element]
  197        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
  198
  199    Returns:
  200        ArrayAppend expression with appropriate null_propagation flag
  201    """
  202    return exp.ArrayAppend(
  203        this=seq_get(args, 0),
  204        expression=seq_get(args, 1),
  205        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
  206    )
  207
  208
  209def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend:
  210    """
  211    Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
  212
  213    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
  214    Others (DuckDB, PostgreSQL) create a new single-element array instead.
  215
  216    Args:
  217        args: Function arguments [array, element]
  218        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
  219
  220    Returns:
  221        ArrayPrepend expression with appropriate null_propagation flag
  222    """
  223    return exp.ArrayPrepend(
  224        this=seq_get(args, 0),
  225        expression=seq_get(args, 1),
  226        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
  227    )
  228
  229
  230def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat:
  231    """
  232    Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
  233
  234    Some dialects (Redshift, Snowflake) return NULL when any input array is NULL.
  235    Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
  236
  237    Args:
  238        args: Function arguments [array1, array2, ...] (variadic)
  239        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
  240
  241    Returns:
  242        ArrayConcat expression with appropriate null_propagation flag
  243    """
  244    return exp.ArrayConcat(
  245        this=seq_get(args, 0),
  246        expressions=args[1:],
  247        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
  248    )
  249
  250
  251def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove:
  252    """
  253    Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
  254
  255    Some dialects (Snowflake) return NULL when the removal value is NULL.
  256    Others (DuckDB) may return empty array due to NULL comparison semantics.
  257
  258    Args:
  259        args: Function arguments [array, value_to_remove]
  260        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
  261
  262    Returns:
  263        ArrayRemove expression with appropriate null_propagation flag
  264    """
  265    return exp.ArrayRemove(
  266        this=seq_get(args, 0),
  267        expression=seq_get(args, 1),
  268        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
  269    )
  270
  271
  272def _resolve_dialect(dialect: DialectType) -> Dialect:
  273    from sqlglot.dialects.dialect import Dialect
  274
  275    return Dialect.get_or_raise(dialect)
  276
  277
  278def _unpivot_target(expr: exp.Expr) -> exp.Expr:
  279    # UNPIVOT's pre-FOR values and FOR field are new output names, not column references.
  280    if isinstance(expr, exp.Column) and not expr.table:
  281        return expr.this
  282    if isinstance(expr, exp.Tuple):
  283        expr.set("expressions", [_unpivot_target(e) for e in expr.expressions])
  284    return expr
  285
  286
  287SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL")
  288
  289
  290class Parser:
  291    """
  292    Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
  293
  294    Args:
  295        error_level: The desired error level.
  296            Default: ErrorLevel.IMMEDIATE
  297        error_message_context: The amount of context to capture from a query string when displaying
  298            the error message (in number of characters).
  299            Default: 100
  300        max_errors: Maximum number of error messages to include in a raised ParseError.
  301            This is only relevant if error_level is ErrorLevel.RAISE.
  302            Default: 3
  303        max_nodes: Maximum number of AST nodes to prevent memory exhaustion.
  304            Set to -1 (default) to disable the check.
  305    """
  306
  307    __slots__ = (
  308        "error_level",
  309        "error_message_context",
  310        "max_errors",
  311        "max_nodes",
  312        "dialect",
  313        "sql",
  314        "errors",
  315        "_tokens",
  316        "_index",
  317        "_curr",
  318        "_next",
  319        "_prev",
  320        "_prev_comments",
  321        "_pipe_cte_counter",
  322        "_chunks",
  323        "_chunk_index",
  324        "_tokens_size",
  325        "_node_count",
  326    )
  327
  328    FUNCTIONS: t.ClassVar[dict[str, t.Callable]] = {
  329        **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()},
  330        **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce),
  331        "ARRAY": lambda args, dialect: exp.Array(expressions=args),
  332        "ARRAYAGG": lambda args, dialect: exp.ArrayAgg(
  333            this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
  334        ),
  335        "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg(
  336            this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None
  337        ),
  338        "ARRAY_APPEND": build_array_append,
  339        "ARRAY_CAT": build_array_concat,
  340        "ARRAY_CONCAT": build_array_concat,
  341        "ARRAY_INTERSECT": lambda args: exp.ArrayIntersect(expressions=args),
  342        "ARRAY_INTERSECTION": lambda args: exp.ArrayIntersect(expressions=args),
  343        "ARRAY_PREPEND": build_array_prepend,
  344        "ARRAY_REMOVE": build_array_remove,
  345        "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True),
  346        "CONCAT": lambda args, dialect: exp.Concat(
  347            expressions=args,
  348            safe=not dialect.STRICT_STRING_CONCAT,
  349            coalesce=dialect.CONCAT_COALESCE,
  350        ),
  351        "CONCAT_WS": lambda args, dialect: exp.ConcatWs(
  352            expressions=args,
  353            safe=not dialect.STRICT_STRING_CONCAT,
  354            coalesce=dialect.CONCAT_WS_COALESCE,
  355        ),
  356        "CONVERT_TIMEZONE": build_convert_timezone,
  357        "DATE_TO_DATE_STR": lambda args: exp.Cast(
  358            this=seq_get(args, 0),
  359            to=exp.DataType(this=exp.DType.TEXT),
  360        ),
  361        "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray(
  362            start=seq_get(args, 0),
  363            end=seq_get(args, 1),
  364            step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")),
  365        ),
  366        "GENERATE_UUID": lambda args, dialect: exp.Uuid(
  367            is_string=dialect.UUID_IS_STRING_TYPE or None
  368        ),
  369        "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)),
  370        "GREATEST": lambda args, dialect: exp.Greatest(
  371            this=seq_get(args, 0),
  372            expressions=args[1:],
  373            ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS,
  374        ),
  375        "LEAST": lambda args, dialect: exp.Least(
  376            this=seq_get(args, 0),
  377            expressions=args[1:],
  378            ignore_nulls=dialect.LEAST_GREATEST_IGNORES_NULLS,
  379        ),
  380        "HEX": build_hex,
  381        "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract),
  382        "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar),
  383        "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar),
  384        "JSON_KEYS": lambda args, dialect: exp.JSONKeys(
  385            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
  386        ),
  387        "LIKE": build_like,
  388        "LOG": build_logarithm,
  389        "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)),
  390        "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)),
  391        "LOWER": build_lower,
  392        "LPAD": lambda args: build_pad(args),
  393        "LEFTPAD": lambda args: build_pad(args),
  394        "LTRIM": lambda args: build_trim(args),
  395        "MOD": build_mod,
  396        "RIGHTPAD": lambda args: build_pad(args, is_left=False),
  397        "RPAD": lambda args: build_pad(args, is_left=False),
  398        "RTRIM": lambda args: build_trim(args, is_left=False),
  399        "SCOPE_RESOLUTION": lambda args: (
  400            exp.ScopeResolution(expression=seq_get(args, 0))
  401            if len(args) != 2
  402            else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1))
  403        ),
  404        "STRPOS": exp.StrPosition.from_arg_list,
  405        "CHARINDEX": lambda args: build_locate_strposition(args),
  406        "INSTR": exp.StrPosition.from_arg_list,
  407        "LOCATE": lambda args: build_locate_strposition(args),
  408        "TIME_TO_TIME_STR": lambda args: exp.Cast(
  409            this=seq_get(args, 0),
  410            to=exp.DataType(this=exp.DType.TEXT),
  411        ),
  412        "TO_HEX": build_hex,
  413        "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring(
  414            this=exp.Cast(
  415                this=seq_get(args, 0),
  416                to=exp.DataType(this=exp.DType.TEXT),
  417            ),
  418            start=exp.Literal.number(1),
  419            length=exp.Literal.number(10),
  420        ),
  421        "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))),
  422        "UPPER": build_upper,
  423        "UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None),
  424        "UUID_STRING": lambda args, dialect: exp.Uuid(
  425            this=seq_get(args, 0),
  426            name=seq_get(args, 1),
  427            is_string=dialect.UUID_IS_STRING_TYPE or None,
  428        ),
  429        "VAR_MAP": build_var_map,
  430    }
  431
  432    NO_PAREN_FUNCTIONS: t.ClassVar[dict] = {
  433        TokenType.CURRENT_DATE: exp.CurrentDate,
  434        TokenType.CURRENT_DATETIME: exp.CurrentDate,
  435        TokenType.CURRENT_TIME: exp.CurrentTime,
  436        TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp,
  437        TokenType.CURRENT_USER: exp.CurrentUser,
  438        TokenType.CURRENT_ROLE: exp.CurrentRole,
  439    }
  440
  441    STRUCT_TYPE_TOKENS: t.ClassVar = {
  442        TokenType.NESTED,
  443        TokenType.OBJECT,
  444        TokenType.STRUCT,
  445        TokenType.UNION,
  446    }
  447
  448    NESTED_TYPE_TOKENS: t.ClassVar = {
  449        TokenType.ARRAY,
  450        TokenType.LIST,
  451        TokenType.LOWCARDINALITY,
  452        TokenType.MAP,
  453        TokenType.NULLABLE,
  454        TokenType.RANGE,
  455        *STRUCT_TYPE_TOKENS,
  456    }
  457
  458    ENUM_TYPE_TOKENS: t.ClassVar = {
  459        TokenType.DYNAMIC,
  460        TokenType.ENUM,
  461        TokenType.ENUM8,
  462        TokenType.ENUM16,
  463    }
  464
  465    AGGREGATE_TYPE_TOKENS: t.ClassVar = {
  466        TokenType.AGGREGATEFUNCTION,
  467        TokenType.SIMPLEAGGREGATEFUNCTION,
  468    }
  469
  470    TYPE_TOKENS: t.ClassVar = {
  471        TokenType.BIT,
  472        TokenType.BOOLEAN,
  473        TokenType.TINYINT,
  474        TokenType.UTINYINT,
  475        TokenType.SMALLINT,
  476        TokenType.USMALLINT,
  477        TokenType.INT,
  478        TokenType.UINT,
  479        TokenType.BIGINT,
  480        TokenType.UBIGINT,
  481        TokenType.BIGNUM,
  482        TokenType.INT128,
  483        TokenType.UINT128,
  484        TokenType.INT256,
  485        TokenType.UINT256,
  486        TokenType.MEDIUMINT,
  487        TokenType.UMEDIUMINT,
  488        TokenType.FIXEDSTRING,
  489        TokenType.FLOAT,
  490        TokenType.DOUBLE,
  491        TokenType.UDOUBLE,
  492        TokenType.CHAR,
  493        TokenType.NCHAR,
  494        TokenType.VARCHAR,
  495        TokenType.NVARCHAR,
  496        TokenType.BPCHAR,
  497        TokenType.TEXT,
  498        TokenType.MEDIUMTEXT,
  499        TokenType.LONGTEXT,
  500        TokenType.BLOB,
  501        TokenType.MEDIUMBLOB,
  502        TokenType.LONGBLOB,
  503        TokenType.BINARY,
  504        TokenType.VARBINARY,
  505        TokenType.JSON,
  506        TokenType.JSONB,
  507        TokenType.INTERVAL,
  508        TokenType.TINYBLOB,
  509        TokenType.TINYTEXT,
  510        TokenType.TIME,
  511        TokenType.TIMETZ,
  512        TokenType.TIME_NS,
  513        TokenType.TIMESTAMP,
  514        TokenType.TIMESTAMP_S,
  515        TokenType.TIMESTAMP_MS,
  516        TokenType.TIMESTAMP_NS,
  517        TokenType.TIMESTAMPTZ,
  518        TokenType.TIMESTAMPLTZ,
  519        TokenType.TIMESTAMPNTZ,
  520        TokenType.DATETIME,
  521        TokenType.DATETIME2,
  522        TokenType.DATETIME64,
  523        TokenType.SMALLDATETIME,
  524        TokenType.DATE,
  525        TokenType.DATE32,
  526        TokenType.INT4RANGE,
  527        TokenType.INT4MULTIRANGE,
  528        TokenType.INT8RANGE,
  529        TokenType.INT8MULTIRANGE,
  530        TokenType.NUMRANGE,
  531        TokenType.NUMMULTIRANGE,
  532        TokenType.TSRANGE,
  533        TokenType.TSMULTIRANGE,
  534        TokenType.TSTZRANGE,
  535        TokenType.TSTZMULTIRANGE,
  536        TokenType.DATERANGE,
  537        TokenType.DATEMULTIRANGE,
  538        TokenType.DECIMAL,
  539        TokenType.DECIMAL32,
  540        TokenType.DECIMAL64,
  541        TokenType.DECIMAL128,
  542        TokenType.DECIMAL256,
  543        TokenType.DECFLOAT,
  544        TokenType.UDECIMAL,
  545        TokenType.BIGDECIMAL,
  546        TokenType.UUID,
  547        TokenType.GEOGRAPHY,
  548        TokenType.GEOGRAPHYPOINT,
  549        TokenType.GEOMETRY,
  550        TokenType.POINT,
  551        TokenType.RING,
  552        TokenType.LINESTRING,
  553        TokenType.MULTILINESTRING,
  554        TokenType.POLYGON,
  555        TokenType.MULTIPOLYGON,
  556        TokenType.HLLSKETCH,
  557        TokenType.HSTORE,
  558        TokenType.PSEUDO_TYPE,
  559        TokenType.SUPER,
  560        TokenType.SERIAL,
  561        TokenType.SMALLSERIAL,
  562        TokenType.BIGSERIAL,
  563        TokenType.XML,
  564        TokenType.YEAR,
  565        TokenType.USERDEFINED,
  566        TokenType.MONEY,
  567        TokenType.SMALLMONEY,
  568        TokenType.ROWVERSION,
  569        TokenType.IMAGE,
  570        TokenType.VARIANT,
  571        TokenType.VECTOR,
  572        TokenType.VOID,
  573        TokenType.OBJECT,
  574        TokenType.OBJECT_IDENTIFIER,
  575        TokenType.INET,
  576        TokenType.IPADDRESS,
  577        TokenType.IPPREFIX,
  578        TokenType.IPV4,
  579        TokenType.IPV6,
  580        TokenType.UNKNOWN,
  581        TokenType.NOTHING,
  582        TokenType.NULL,
  583        TokenType.NAME,
  584        TokenType.TDIGEST,
  585        TokenType.DYNAMIC,
  586        *ENUM_TYPE_TOKENS,
  587        *NESTED_TYPE_TOKENS,
  588        *AGGREGATE_TYPE_TOKENS,
  589    }
  590
  591    SIGNED_TO_UNSIGNED_TYPE_TOKEN: t.ClassVar = {
  592        TokenType.BIGINT: TokenType.UBIGINT,
  593        TokenType.INT: TokenType.UINT,
  594        TokenType.MEDIUMINT: TokenType.UMEDIUMINT,
  595        TokenType.SMALLINT: TokenType.USMALLINT,
  596        TokenType.TINYINT: TokenType.UTINYINT,
  597        TokenType.DECIMAL: TokenType.UDECIMAL,
  598        TokenType.DOUBLE: TokenType.UDOUBLE,
  599    }
  600
  601    SUBQUERY_PREDICATES: t.ClassVar = {
  602        TokenType.ANY: exp.Any,
  603        TokenType.ALL: exp.All,
  604        TokenType.EXISTS: exp.Exists,
  605        TokenType.SOME: exp.Any,
  606    }
  607
  608    SUBQUERY_TOKENS: t.ClassVar = {
  609        TokenType.SELECT,
  610        TokenType.WITH,
  611        TokenType.FROM,
  612    }
  613
  614    RESERVED_TOKENS: t.ClassVar = {
  615        *Tokenizer.SINGLE_TOKENS.values(),
  616        TokenType.SELECT,
  617    } - {TokenType.IDENTIFIER}
  618
  619    DB_CREATABLES: t.ClassVar = {
  620        TokenType.DATABASE,
  621        TokenType.DICTIONARY,
  622        TokenType.FILE_FORMAT,
  623        TokenType.MODEL,
  624        TokenType.NAMESPACE,
  625        TokenType.SCHEMA,
  626        TokenType.SEMANTIC_VIEW,
  627        TokenType.SEQUENCE,
  628        TokenType.SINK,
  629        TokenType.SOURCE,
  630        TokenType.STAGE,
  631        TokenType.STORAGE_INTEGRATION,
  632        TokenType.STREAMLIT,
  633        TokenType.TABLE,
  634        TokenType.TAG,
  635        TokenType.VIEW,
  636        TokenType.WAREHOUSE,
  637    }
  638
  639    CREATABLES: t.ClassVar = {
  640        TokenType.COLUMN,
  641        TokenType.CONSTRAINT,
  642        TokenType.FOREIGN_KEY,
  643        TokenType.FUNCTION,
  644        TokenType.INDEX,
  645        TokenType.PROCEDURE,
  646        TokenType.TRIGGER,
  647        TokenType.TYPE,
  648        *DB_CREATABLES,
  649    }
  650
  651    TRIGGER_EVENTS: t.ClassVar = {
  652        TokenType.INSERT,
  653        TokenType.UPDATE,
  654        TokenType.DELETE,
  655        TokenType.TRUNCATE,
  656    }
  657
  658    ALTERABLES: t.ClassVar = {
  659        TokenType.INDEX,
  660        TokenType.TABLE,
  661        TokenType.VIEW,
  662        TokenType.SESSION,
  663    }
  664
  665    # Tokens that can represent identifiers
  666    ID_VAR_TOKENS: t.ClassVar[set] = {
  667        TokenType.ALL,
  668        TokenType.ANALYZE,
  669        TokenType.ATTACH,
  670        TokenType.VAR,
  671        TokenType.ANTI,
  672        TokenType.APPLY,
  673        TokenType.ASC,
  674        TokenType.ASOF,
  675        TokenType.AUTO_INCREMENT,
  676        TokenType.BEGIN,
  677        TokenType.BPCHAR,
  678        TokenType.CACHE,
  679        TokenType.CASE,
  680        TokenType.COLLATE,
  681        TokenType.COMMAND,
  682        TokenType.COMMENT,
  683        TokenType.COMMIT,
  684        TokenType.CONSTRAINT,
  685        TokenType.COPY,
  686        TokenType.CUBE,
  687        TokenType.CURRENT_SCHEMA,
  688        TokenType.DEFAULT,
  689        TokenType.DELETE,
  690        TokenType.DESC,
  691        TokenType.DESCRIBE,
  692        TokenType.DETACH,
  693        TokenType.DICTIONARY,
  694        TokenType.DIV,
  695        TokenType.END,
  696        TokenType.EXECUTE,
  697        TokenType.EXPORT,
  698        TokenType.ESCAPE,
  699        TokenType.FALSE,
  700        TokenType.FIRST,
  701        TokenType.FILE,
  702        TokenType.FILTER,
  703        TokenType.FINAL,
  704        TokenType.FORMAT,
  705        TokenType.FULL,
  706        TokenType.GET,
  707        TokenType.IDENTIFIER,
  708        TokenType.INOUT,
  709        TokenType.IS,
  710        TokenType.ISNULL,
  711        TokenType.INTERVAL,
  712        TokenType.KEEP,
  713        TokenType.KILL,
  714        TokenType.LEFT,
  715        TokenType.LIMIT,
  716        TokenType.LOAD,
  717        TokenType.LOCK,
  718        TokenType.MATCH,
  719        TokenType.MERGE,
  720        TokenType.NATURAL,
  721        TokenType.NEXT,
  722        TokenType.OFFSET,
  723        TokenType.OPERATOR,
  724        TokenType.ORDINALITY,
  725        TokenType.OVER,
  726        TokenType.OVERLAPS,
  727        TokenType.OVERWRITE,
  728        TokenType.PARTITION,
  729        TokenType.PERCENT,
  730        TokenType.PIVOT,
  731        TokenType.PRAGMA,
  732        TokenType.PUT,
  733        TokenType.RANGE,
  734        TokenType.RECURSIVE,
  735        TokenType.REFERENCES,
  736        TokenType.REFRESH,
  737        TokenType.RENAME,
  738        TokenType.REPLACE,
  739        TokenType.RIGHT,
  740        TokenType.ROLLUP,
  741        TokenType.ROW,
  742        TokenType.ROWS,
  743        TokenType.SEMI,
  744        TokenType.SET,
  745        TokenType.SETTINGS,
  746        TokenType.SHOW,
  747        TokenType.STREAM,
  748        TokenType.STREAMLIT,
  749        TokenType.TEMPORARY,
  750        TokenType.TOP,
  751        TokenType.TRUE,
  752        TokenType.TRUNCATE,
  753        TokenType.UNIQUE,
  754        TokenType.UNNEST,
  755        TokenType.UNPIVOT,
  756        TokenType.UPDATE,
  757        TokenType.USE,
  758        TokenType.VOLATILE,
  759        TokenType.WINDOW,
  760        TokenType.CURRENT_CATALOG,
  761        TokenType.LOCALTIME,
  762        TokenType.LOCALTIMESTAMP,
  763        TokenType.SESSION_USER,
  764        TokenType.STRAIGHT_JOIN,
  765        *ALTERABLES,
  766        *CREATABLES,
  767        *SUBQUERY_PREDICATES,
  768        *TYPE_TOKENS,
  769        *NO_PAREN_FUNCTIONS,
  770    } - {TokenType.UNION}
  771
  772    TABLE_ALIAS_TOKENS: t.ClassVar[set] = ID_VAR_TOKENS - {
  773        TokenType.ANTI,
  774        TokenType.ASOF,
  775        TokenType.FULL,
  776        TokenType.LEFT,
  777        TokenType.LOCK,
  778        TokenType.NATURAL,
  779        TokenType.RIGHT,
  780        TokenType.SEMI,
  781        TokenType.WINDOW,
  782    }
  783
  784    ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS
  785
  786    COLON_PLACEHOLDER_TOKENS: t.ClassVar = ID_VAR_TOKENS
  787
  788    ARRAY_CONSTRUCTORS: t.ClassVar = {
  789        "ARRAY": exp.Array,
  790        "LIST": exp.List,
  791    }
  792
  793    COMMENT_TABLE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.IS}
  794
  795    UPDATE_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - {TokenType.SET}
  796
  797    TRIM_TYPES: t.ClassVar = {"LEADING", "TRAILING", "BOTH"}
  798
  799    # Tokens that indicate a simple column reference
  800    IDENTIFIER_TOKENS: t.ClassVar[frozenset] = frozenset({TokenType.VAR, TokenType.IDENTIFIER})
  801
  802    BRACKETS: t.ClassVar[frozenset] = frozenset({TokenType.L_BRACKET, TokenType.L_BRACE})
  803
  804    # Postfix tokens that prevent the bare column fast path
  805    COLUMN_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset(
  806        {
  807            TokenType.L_PAREN,
  808            TokenType.L_BRACKET,
  809            TokenType.L_BRACE,
  810            TokenType.COLON,
  811            TokenType.JOIN_MARKER,
  812        }
  813    )
  814
  815    TABLE_POSTFIX_TOKENS: t.ClassVar[frozenset] = frozenset(
  816        {
  817            TokenType.L_PAREN,
  818            TokenType.L_BRACKET,
  819            TokenType.L_BRACE,
  820            TokenType.PIVOT,
  821            TokenType.UNPIVOT,
  822            TokenType.TABLE_SAMPLE,
  823        }
  824    )
  825
  826    FUNC_TOKENS: t.ClassVar = {
  827        TokenType.COLLATE,
  828        TokenType.COMMAND,
  829        TokenType.CURRENT_DATE,
  830        TokenType.CURRENT_DATETIME,
  831        TokenType.CURRENT_SCHEMA,
  832        TokenType.CURRENT_TIMESTAMP,
  833        TokenType.CURRENT_TIME,
  834        TokenType.CURRENT_USER,
  835        TokenType.CURRENT_CATALOG,
  836        TokenType.FILTER,
  837        TokenType.FIRST,
  838        TokenType.FORMAT,
  839        TokenType.GET,
  840        TokenType.GLOB,
  841        TokenType.IDENTIFIER,
  842        TokenType.INDEX,
  843        TokenType.ISNULL,
  844        TokenType.ILIKE,
  845        TokenType.INSERT,
  846        TokenType.LIKE,
  847        TokenType.LOCALTIME,
  848        TokenType.LOCALTIMESTAMP,
  849        TokenType.MERGE,
  850        TokenType.NEXT,
  851        TokenType.OFFSET,
  852        TokenType.PRIMARY_KEY,
  853        TokenType.RANGE,
  854        TokenType.REPLACE,
  855        TokenType.RLIKE,
  856        TokenType.ROW,
  857        TokenType.SESSION_USER,
  858        TokenType.UNNEST,
  859        TokenType.VAR,
  860        TokenType.LEFT,
  861        TokenType.RIGHT,
  862        TokenType.SEQUENCE,
  863        TokenType.DATE,
  864        TokenType.DATETIME,
  865        TokenType.TABLE,
  866        TokenType.TIMESTAMP,
  867        TokenType.TIMESTAMPTZ,
  868        TokenType.TRUNCATE,
  869        TokenType.UTC_DATE,
  870        TokenType.UTC_TIME,
  871        TokenType.UTC_TIMESTAMP,
  872        TokenType.WINDOW,
  873        TokenType.XOR,
  874        *TYPE_TOKENS,
  875        *SUBQUERY_PREDICATES,
  876    }
  877
  878    CONJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
  879        TokenType.AND: exp.And,
  880    }
  881
  882    ASSIGNMENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
  883        TokenType.COLON_EQ: exp.PropertyEQ,
  884    }
  885
  886    DISJUNCTION: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {
  887        TokenType.OR: exp.Or,
  888    }
  889
  890    EQUALITY: t.ClassVar = {
  891        TokenType.EQ: exp.EQ,
  892        TokenType.NEQ: exp.NEQ,
  893        TokenType.NULLSAFE_EQ: exp.NullSafeEQ,
  894    }
  895
  896    COMPARISON: t.ClassVar = {
  897        TokenType.GT: exp.GT,
  898        TokenType.GTE: exp.GTE,
  899        TokenType.LT: exp.LT,
  900        TokenType.LTE: exp.LTE,
  901    }
  902
  903    BITWISE: t.ClassVar = {
  904        TokenType.AMP: exp.BitwiseAnd,
  905        TokenType.CARET: exp.BitwiseXor,
  906        TokenType.PIPE: exp.BitwiseOr,
  907    }
  908
  909    TERM: t.ClassVar = {
  910        TokenType.DASH: exp.Sub,
  911        TokenType.PLUS: exp.Add,
  912        TokenType.MOD: exp.Mod,
  913        TokenType.COLLATE: exp.Collate,
  914    }
  915
  916    FACTOR: t.ClassVar = {
  917        TokenType.DIV: exp.IntDiv,
  918        TokenType.LR_ARROW: exp.Distance,
  919        TokenType.LLRR_ARROW: exp.DistanceNd,
  920        TokenType.SLASH: exp.Div,
  921        TokenType.STAR: exp.Mul,
  922    }
  923
  924    EXPONENT: t.ClassVar[dict[TokenType, type[exp.Expr]]] = {}
  925
  926    TIMES: t.ClassVar = {
  927        TokenType.TIME,
  928        TokenType.TIMETZ,
  929    }
  930
  931    TIMESTAMPS: t.ClassVar = {
  932        TokenType.TIMESTAMP,
  933        TokenType.TIMESTAMPNTZ,
  934        TokenType.TIMESTAMPTZ,
  935        TokenType.TIMESTAMPLTZ,
  936        *TIMES,
  937    }
  938
  939    SET_OPERATIONS: t.ClassVar = {
  940        TokenType.UNION,
  941        TokenType.INTERSECT,
  942        TokenType.EXCEPT,
  943    }
  944
  945    JOIN_METHODS: t.ClassVar = {
  946        TokenType.ASOF,
  947        TokenType.NATURAL,
  948        TokenType.POSITIONAL,
  949    }
  950
  951    JOIN_SIDES: t.ClassVar = {
  952        TokenType.LEFT,
  953        TokenType.RIGHT,
  954        TokenType.FULL,
  955    }
  956
  957    JOIN_KINDS: t.ClassVar = {
  958        TokenType.ANTI,
  959        TokenType.CROSS,
  960        TokenType.INNER,
  961        TokenType.OUTER,
  962        TokenType.SEMI,
  963        TokenType.STRAIGHT_JOIN,
  964    }
  965
  966    JOIN_HINTS: t.ClassVar[set[str]] = set()
  967
  968    # Tokens that unambiguously end a table reference on the fast path
  969    TABLE_TERMINATORS: t.ClassVar[frozenset] = frozenset(
  970        {
  971            TokenType.COMMA,
  972            TokenType.GROUP_BY,
  973            TokenType.HAVING,
  974            TokenType.JOIN,
  975            TokenType.LIMIT,
  976            TokenType.ON,
  977            TokenType.ORDER_BY,
  978            TokenType.R_PAREN,
  979            TokenType.SEMICOLON,
  980            TokenType.SENTINEL,
  981            TokenType.WHERE,
  982            *SET_OPERATIONS,
  983            *JOIN_KINDS,
  984            *JOIN_METHODS,
  985            *JOIN_SIDES,
  986        }
  987    )
  988
  989    LAMBDAS: t.ClassVar = {
  990        TokenType.ARROW: lambda self, expressions: self.expression(
  991            exp.Lambda(
  992                this=self._replace_lambda(
  993                    self._parse_disjunction(),
  994                    expressions,
  995                ),
  996                expressions=expressions,
  997            )
  998        ),
  999        TokenType.FARROW: lambda self, expressions: self.expression(
 1000            exp.Kwarg(this=exp.var(expressions[0].name), expression=self._parse_disjunction())
 1001        ),
 1002    }
 1003
 1004    # Whether lambda args include type annotations, e.g. TRANSFORM(arr, x INT -> x + 1) in Snowflake
 1005    TYPED_LAMBDA_ARGS: t.ClassVar[bool] = False
 1006
 1007    LAMBDA_ARG_TERMINATORS: t.ClassVar[frozenset] = frozenset({TokenType.COMMA, TokenType.R_PAREN})
 1008
 1009    COLUMN_OPERATORS: t.ClassVar = {
 1010        TokenType.DOT: None,
 1011        TokenType.DOTCOLON: lambda self, this, to: self.expression(exp.JSONCast(this=this, to=to)),
 1012        TokenType.DCOLON: lambda self, this, to: self.build_cast(
 1013            strict=self.STRICT_CAST, this=this, to=to
 1014        ),
 1015        TokenType.ARROW: lambda self, this, path: self.expression(
 1016            exp.JSONExtract(
 1017                this=this,
 1018                expression=self.dialect.to_json_path(path),
 1019                only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 1020            )
 1021        ),
 1022        TokenType.DARROW: lambda self, this, path: self.expression(
 1023            exp.JSONExtractScalar(
 1024                this=this,
 1025                expression=self.dialect.to_json_path(path),
 1026                only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE,
 1027                scalar_only=self.dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY,
 1028            )
 1029        ),
 1030        TokenType.HASH_ARROW: lambda self, this, path: self.expression(
 1031            exp.JSONBExtract(this=this, expression=path)
 1032        ),
 1033        TokenType.DHASH_ARROW: lambda self, this, path: self.expression(
 1034            exp.JSONBExtractScalar(this=this, expression=path)
 1035        ),
 1036        TokenType.PLACEHOLDER: lambda self, this, key: self.expression(
 1037            exp.JSONBContains(this=this, expression=key)
 1038        ),
 1039    }
 1040
 1041    CAST_COLUMN_OPERATORS: t.ClassVar = {
 1042        TokenType.DOTCOLON,
 1043        TokenType.DCOLON,
 1044    }
 1045
 1046    EXPRESSION_PARSERS: t.ClassVar = {
 1047        exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY),
 1048        exp.Column: lambda self: self._parse_column(),
 1049        exp.ColumnDef: lambda self: self._parse_column_def(self._parse_column()),
 1050        exp.Condition: lambda self: self._parse_disjunction(),
 1051        exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True),
 1052        exp.Expr: lambda self: self._parse_expression(),
 1053        exp.From: lambda self: self._parse_from(joins=True),
 1054        exp.GrantPrincipal: lambda self: self._parse_grant_principal(),
 1055        exp.GrantPrivilege: lambda self: self._parse_grant_privilege(),
 1056        exp.Group: lambda self: self._parse_group(),
 1057        exp.Having: lambda self: self._parse_having(),
 1058        exp.Hint: lambda self: self._parse_hint_body(),
 1059        exp.Identifier: lambda self: self._parse_id_var(),
 1060        exp.Join: lambda self: self._parse_join(),
 1061        exp.Lambda: lambda self: self._parse_lambda(),
 1062        exp.Lateral: lambda self: self._parse_lateral(),
 1063        exp.Limit: lambda self: self._parse_limit(),
 1064        exp.Offset: lambda self: self._parse_offset(),
 1065        exp.Order: lambda self: self._parse_order(),
 1066        exp.Ordered: lambda self: self._parse_ordered(),
 1067        exp.Properties: lambda self: self._parse_properties(),
 1068        exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(),
 1069        exp.Qualify: lambda self: self._parse_qualify(),
 1070        exp.Returning: lambda self: self._parse_returning(),
 1071        exp.Select: lambda self: self._parse_select(),
 1072        exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY),
 1073        exp.Table: lambda self: self._parse_table_parts(),
 1074        exp.TableAlias: lambda self: self._parse_table_alias(),
 1075        exp.Tuple: lambda self: self._parse_value(values=False),
 1076        exp.Whens: lambda self: self._parse_when_matched(),
 1077        exp.Where: lambda self: self._parse_where(),
 1078        exp.Window: lambda self: self._parse_named_window(),
 1079        exp.With: lambda self: self._parse_with(),
 1080    }
 1081
 1082    STATEMENT_PARSERS: t.ClassVar = {
 1083        TokenType.ALTER: lambda self: self._parse_alter(),
 1084        TokenType.ANALYZE: lambda self: self._parse_analyze(),
 1085        TokenType.BEGIN: lambda self: self._parse_transaction(),
 1086        TokenType.CACHE: lambda self: self._parse_cache(),
 1087        TokenType.COMMENT: lambda self: self._parse_comment(),
 1088        TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(),
 1089        TokenType.COPY: lambda self: self._parse_copy(),
 1090        TokenType.CREATE: lambda self: self._parse_create(),
 1091        TokenType.DELETE: lambda self: self._parse_delete(),
 1092        TokenType.DESC: lambda self: self._parse_describe(),
 1093        TokenType.DESCRIBE: lambda self: self._parse_describe(),
 1094        TokenType.DROP: lambda self: self._parse_drop(),
 1095        TokenType.GRANT: lambda self: self._parse_grant(),
 1096        TokenType.REVOKE: lambda self: self._parse_revoke(),
 1097        TokenType.INSERT: lambda self: self._parse_insert(),
 1098        TokenType.KILL: lambda self: self._parse_kill(),
 1099        TokenType.LOAD: lambda self: self._parse_load(),
 1100        TokenType.MERGE: lambda self: self._parse_merge(),
 1101        TokenType.PIVOT: lambda self: self._parse_simplified_pivot(),
 1102        TokenType.PRAGMA: lambda self: self.expression(exp.Pragma(this=self._parse_expression())),
 1103        TokenType.REFRESH: lambda self: self._parse_refresh(),
 1104        TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(),
 1105        TokenType.SET: lambda self: self._parse_set(),
 1106        TokenType.TRUNCATE: lambda self: self._parse_truncate_table(),
 1107        TokenType.UNCACHE: lambda self: self._parse_uncache(),
 1108        TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True),
 1109        TokenType.UPDATE: lambda self: self._parse_update(),
 1110        TokenType.USE: lambda self: self._parse_use(),
 1111        TokenType.SEMICOLON: lambda self: exp.Semicolon(),
 1112    }
 1113
 1114    UNARY_PARSERS: t.ClassVar = {
 1115        TokenType.PLUS: lambda self: self._parse_unary(),  # Unary + is handled as a no-op
 1116        TokenType.NOT: lambda self: self.expression(exp.Not(this=self._parse_equality())),
 1117        TokenType.TILDE: lambda self: self.expression(exp.BitwiseNot(this=self._parse_unary())),
 1118        TokenType.DASH: lambda self: self.expression(exp.Neg(this=self._parse_unary())),
 1119        TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt(this=self._parse_unary())),
 1120        TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt(this=self._parse_unary())),
 1121    }
 1122
 1123    STRING_PARSERS: t.ClassVar = {
 1124        TokenType.HEREDOC_STRING: lambda self, token: self.expression(
 1125            exp.RawString(this=token.text), token
 1126        ),
 1127        TokenType.NATIONAL_STRING: lambda self, token: self.expression(
 1128            exp.National(this=token.text), token
 1129        ),
 1130        TokenType.RAW_STRING: lambda self, token: self.expression(
 1131            exp.RawString(this=token.text), token
 1132        ),
 1133        TokenType.STRING: lambda self, token: self.expression(
 1134            exp.Literal(this=token.text, is_string=True), token
 1135        ),
 1136        TokenType.UNICODE_STRING: lambda self, token: self.expression(
 1137            exp.UnicodeString(
 1138                this=token.text, escape=self._match_text_seq("UESCAPE") and self._parse_string()
 1139            ),
 1140            token,
 1141        ),
 1142    }
 1143
 1144    NUMERIC_PARSERS: t.ClassVar = {
 1145        TokenType.BIT_STRING: lambda self, token: self.expression(
 1146            exp.BitString(this=token.text), token
 1147        ),
 1148        TokenType.BYTE_STRING: lambda self, token: self.expression(
 1149            exp.ByteString(
 1150                this=token.text, is_bytes=self.dialect.BYTE_STRING_IS_BYTES_TYPE or None
 1151            ),
 1152            token,
 1153        ),
 1154        TokenType.HEX_STRING: lambda self, token: self.expression(
 1155            exp.HexString(
 1156                this=token.text, is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None
 1157            ),
 1158            token,
 1159        ),
 1160        TokenType.NUMBER: lambda self, token: self.expression(
 1161            exp.Literal(this=token.text, is_string=False), token
 1162        ),
 1163    }
 1164
 1165    PRIMARY_PARSERS: t.ClassVar = {
 1166        **STRING_PARSERS,
 1167        **NUMERIC_PARSERS,
 1168        TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token),
 1169        TokenType.NULL: lambda self, _: self.expression(exp.Null()),
 1170        TokenType.TRUE: lambda self, _: self.expression(exp.Boolean(this=True)),
 1171        TokenType.FALSE: lambda self, _: self.expression(exp.Boolean(this=False)),
 1172        TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(),
 1173        TokenType.STAR: lambda self, _: self._parse_star_ops(),
 1174    }
 1175
 1176    PLACEHOLDER_PARSERS: t.ClassVar = {
 1177        TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder()),
 1178        TokenType.PARAMETER: lambda self: self._parse_parameter(),
 1179        TokenType.COLON: lambda self: (
 1180            self.expression(exp.Placeholder(this=self._prev.text))
 1181            if self._match_set(self.COLON_PLACEHOLDER_TOKENS)
 1182            else None
 1183        ),
 1184    }
 1185
 1186    RANGE_PARSERS: t.ClassVar = {
 1187        TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll),
 1188        TokenType.BETWEEN: lambda self, this: self._parse_between(this),
 1189        TokenType.GLOB: binary_range_parser(exp.Glob),
 1190        TokenType.ILIKE: binary_range_parser(exp.ILike),
 1191        TokenType.IN: lambda self, this: self._parse_in(this),
 1192        TokenType.IRLIKE: binary_range_parser(exp.RegexpILike),
 1193        TokenType.IS: lambda self, this: self._parse_is(this),
 1194        TokenType.LIKE: binary_range_parser(exp.Like),
 1195        TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True),
 1196        TokenType.OVERLAPS: binary_range_parser(exp.Overlaps),
 1197        TokenType.RLIKE: binary_range_parser(exp.RegexpLike),
 1198        TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo),
 1199        TokenType.FOR: lambda self, this: self._parse_comprehension(this),
 1200        TokenType.QMARK_AMP: binary_range_parser(exp.JSONBContainsAllTopKeys),
 1201        TokenType.QMARK_PIPE: binary_range_parser(exp.JSONBContainsAnyTopKeys),
 1202        TokenType.HASH_DASH: binary_range_parser(exp.JSONBDeleteAtPath),
 1203        TokenType.ADJACENT: binary_range_parser(exp.Adjacent),
 1204        TokenType.OPERATOR: lambda self, this: self._parse_operator(this),
 1205        TokenType.AMP_LT: binary_range_parser(exp.ExtendsLeft),
 1206        TokenType.AMP_GT: binary_range_parser(exp.ExtendsRight),
 1207    }
 1208
 1209    PIPE_SYNTAX_TRANSFORM_PARSERS: t.ClassVar = {
 1210        "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query),
 1211        "AS": lambda self, query: self._build_pipe_cte(
 1212            query, [exp.Star()], self._parse_table_alias()
 1213        ),
 1214        "DISTINCT": lambda self, query: self._advance() or query.distinct(copy=False),
 1215        "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query),
 1216        "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query),
 1217        "ORDER BY": lambda self, query: query.order_by(
 1218            self._parse_order(), append=False, copy=False
 1219        ),
 1220        "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
 1221        "SELECT": lambda self, query: self._parse_pipe_syntax_select(query),
 1222        "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query),
 1223        "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query),
 1224        "WHERE": lambda self, query: query.where(self._parse_where(), copy=False),
 1225    }
 1226
 1227    PROPERTY_PARSERS: t.ClassVar[dict[str, t.Callable]] = {
 1228        "ALLOWED_VALUES": lambda self: self.expression(
 1229            exp.AllowedValuesProperty(expressions=self._parse_csv(self._parse_primary))
 1230        ),
 1231        "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty),
 1232        "AUTO": lambda self: self._parse_auto_property(),
 1233        "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty),
 1234        "BACKUP": lambda self: self.expression(
 1235            exp.BackupProperty(this=self._parse_var(any_token=True))
 1236        ),
 1237        "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(),
 1238        "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 1239        "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs),
 1240        "CHECKSUM": lambda self: self._parse_checksum(),
 1241        "CLUSTER BY": lambda self: self._parse_cluster_property(),
 1242        "CLUSTERED": lambda self: self._parse_clustered_by(),
 1243        "COLLATE": lambda self, **kwargs: self._parse_property_assignment(
 1244            exp.CollateProperty, **kwargs
 1245        ),
 1246        "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty),
 1247        "CONTAINS": lambda self: self._parse_contains_property(),
 1248        "COPY": lambda self: self._parse_copy_property(),
 1249        "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs),
 1250        "DATA_DELETION": lambda self: self._parse_data_deletion_property(),
 1251        "DEFINER": lambda self: self._parse_definer(),
 1252        "DETERMINISTIC": lambda self: self.expression(
 1253            exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE"))
 1254        ),
 1255        "DISTRIBUTED": lambda self: self._parse_distributed_property(),
 1256        "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty),
 1257        "DYNAMIC": lambda self: self.expression(exp.DynamicProperty()),
 1258        "DISTKEY": lambda self: self._parse_distkey(),
 1259        "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty),
 1260        "EMPTY": lambda self: self.expression(exp.EmptyProperty()),
 1261        "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty),
 1262        "ENVIRONMENT": lambda self: self.expression(
 1263            exp.EnviromentProperty(expressions=self._parse_wrapped_csv(self._parse_assignment))
 1264        ),
 1265        "HANDLER": lambda self: self._parse_property_assignment(exp.HandlerProperty),
 1266        "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty),
 1267        "EXTERNAL": lambda self: self.expression(exp.ExternalProperty()),
 1268        "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs),
 1269        "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 1270        "FREESPACE": lambda self: self._parse_freespace(),
 1271        "GLOBAL": lambda self: self.expression(exp.GlobalProperty()),
 1272        "HEAP": lambda self: self.expression(exp.HeapProperty()),
 1273        "ICEBERG": lambda self: self.expression(exp.IcebergProperty()),
 1274        "IMMUTABLE": lambda self: self.expression(
 1275            exp.StabilityProperty(this=exp.Literal.string("IMMUTABLE"))
 1276        ),
 1277        "INHERITS": lambda self: self.expression(
 1278            exp.InheritsProperty(expressions=self._parse_wrapped_csv(self._parse_table))
 1279        ),
 1280        "INPUT": lambda self: self.expression(exp.InputModelProperty(this=self._parse_schema())),
 1281        "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs),
 1282        "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty),
 1283        "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"),
 1284        "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"),
 1285        "LIKE": lambda self: self._parse_create_like(),
 1286        "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty),
 1287        "LOCK": lambda self: self._parse_locking(),
 1288        "LOCKING": lambda self: self._parse_locking(),
 1289        "LOG": lambda self, **kwargs: self._parse_log(**kwargs),
 1290        "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty()),
 1291        "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs),
 1292        "MODIFIES": lambda self: self._parse_modifies_property(),
 1293        "MULTISET": lambda self: self.expression(exp.SetProperty(multi=True)),
 1294        "NO": lambda self: self._parse_no_property(),
 1295        "ON": lambda self: self._parse_on_property(),
 1296        "ORDER BY": lambda self: self._parse_order(skip_order_token=True),
 1297        "OUTPUT": lambda self: self.expression(exp.OutputModelProperty(this=self._parse_schema())),
 1298        "PARTITION": lambda self: self._parse_partitioned_of(),
 1299        "PARTITION BY": lambda self: self._parse_partitioned_by(),
 1300        "PARTITIONED BY": lambda self: self._parse_partitioned_by(),
 1301        "PARTITIONED_BY": lambda self: self._parse_partitioned_by(),
 1302        "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True),
 1303        "RANGE": lambda self: self._parse_dict_range(this="RANGE"),
 1304        "READS": lambda self: self._parse_reads_property(),
 1305        "REMOTE": lambda self: self._parse_remote_with_connection(),
 1306        "RETURNS": lambda self: self._parse_returns(),
 1307        "STRICT": lambda self: self.expression(exp.StrictProperty()),
 1308        "STREAMING": lambda self: self.expression(exp.StreamingTableProperty()),
 1309        "ROW": lambda self: self._parse_row(),
 1310        "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty),
 1311        "SAMPLE": lambda self: self.expression(
 1312            exp.SampleProperty(this=self._match_text_seq("BY") and self._parse_bitwise())
 1313        ),
 1314        "SECURE": lambda self: self.expression(exp.SecureProperty()),
 1315        "SECURITY": lambda self: self._parse_sql_security(),
 1316        "SQL SECURITY": lambda self: self._parse_sql_security(),
 1317        "SET": lambda self: self.expression(exp.SetProperty(multi=False)),
 1318        "SETTINGS": lambda self: self._parse_settings_property(),
 1319        "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty),
 1320        "SORTKEY": lambda self: self._parse_sortkey(),
 1321        "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"),
 1322        "STABLE": lambda self: self.expression(
 1323            exp.StabilityProperty(this=exp.Literal.string("STABLE"))
 1324        ),
 1325        "STORED": lambda self: self._parse_stored(),
 1326        "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(),
 1327        "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(),
 1328        "TEMP": lambda self: self.expression(exp.TemporaryProperty()),
 1329        "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty()),
 1330        "TO": lambda self: self._parse_to_table(),
 1331        "TRANSIENT": lambda self: self.expression(exp.TransientProperty()),
 1332        "TRANSFORM": lambda self: self.expression(
 1333            exp.TransformModelProperty(expressions=self._parse_wrapped_csv(self._parse_expression))
 1334        ),
 1335        "TTL": lambda self: self._parse_ttl(),
 1336        "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty),
 1337        "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty()),
 1338        "VOLATILE": lambda self: self._parse_volatile_property(),
 1339        "WITH": lambda self: self._parse_with_property(),
 1340    }
 1341
 1342    CONSTRAINT_PARSERS: t.ClassVar = {
 1343        "AUTOINCREMENT": lambda self: self._parse_auto_increment(),
 1344        "AUTO_INCREMENT": lambda self: self._parse_auto_increment(),
 1345        "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint(not_=False)),
 1346        "CHARACTER SET": lambda self: self.expression(
 1347            exp.CharacterSetColumnConstraint(this=self._parse_var_or_string())
 1348        ),
 1349        "CHECK": lambda self: self._parse_check_constraint(),
 1350        "COLLATE": lambda self: self.expression(
 1351            exp.CollateColumnConstraint(this=self._parse_identifier() or self._parse_column())
 1352        ),
 1353        "COMMENT": lambda self: self.expression(
 1354            exp.CommentColumnConstraint(this=self._parse_string())
 1355        ),
 1356        "COMPRESS": lambda self: self._parse_compress(),
 1357        "CLUSTERED": lambda self: self.expression(
 1358            exp.ClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered))
 1359        ),
 1360        "NONCLUSTERED": lambda self: self.expression(
 1361            exp.NonClusteredColumnConstraint(this=self._parse_wrapped_csv(self._parse_ordered))
 1362        ),
 1363        "DEFAULT": lambda self: self.expression(
 1364            exp.DefaultColumnConstraint(this=self._parse_bitwise())
 1365        ),
 1366        "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint(this=self._parse_var())),
 1367        "EPHEMERAL": lambda self: self.expression(
 1368            exp.EphemeralColumnConstraint(this=self._parse_bitwise())
 1369        ),
 1370        "EXCLUDE": lambda self: self.expression(
 1371            exp.ExcludeColumnConstraint(this=self._parse_index_params())
 1372        ),
 1373        "FOREIGN KEY": lambda self: self._parse_foreign_key(),
 1374        "FORMAT": lambda self: self.expression(
 1375            exp.DateFormatColumnConstraint(this=self._parse_var_or_string())
 1376        ),
 1377        "GENERATED": lambda self: self._parse_generated_as_identity(),
 1378        "IDENTITY": lambda self: self._parse_auto_increment(),
 1379        "INLINE": lambda self: self._parse_inline(),
 1380        "LIKE": lambda self: self._parse_create_like(),
 1381        "NOT": lambda self: self._parse_not_constraint(),
 1382        "NULL": lambda self: self.expression(exp.NotNullColumnConstraint(allow_null=True)),
 1383        "ON": lambda self: (
 1384            (
 1385                self._match(TokenType.UPDATE)
 1386                and self.expression(exp.OnUpdateColumnConstraint(this=self._parse_function()))
 1387            )
 1388            or self.expression(exp.OnProperty(this=self._parse_id_var()))
 1389        ),
 1390        "PATH": lambda self: self.expression(exp.PathColumnConstraint(this=self._parse_string())),
 1391        "PERIOD": lambda self: self._parse_period_for_system_time(),
 1392        "PRIMARY KEY": lambda self: self._parse_primary_key(),
 1393        "REFERENCES": lambda self: self._parse_references(match=False),
 1394        "TITLE": lambda self: self.expression(
 1395            exp.TitleColumnConstraint(this=self._parse_var_or_string())
 1396        ),
 1397        "TTL": lambda self: self.expression(exp.MergeTreeTTL(expressions=[self._parse_bitwise()])),
 1398        "UNIQUE": lambda self: self._parse_unique(),
 1399        "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint()),
 1400        "WITH": lambda self: self.expression(
 1401            exp.Properties(expressions=self._parse_wrapped_properties())
 1402        ),
 1403        "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
 1404        "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(),
 1405    }
 1406
 1407    def _parse_partitioned_by_bucket_or_truncate(self) -> exp.Expr | None:
 1408        if not self._match(TokenType.L_PAREN, advance=False):
 1409            # Partitioning by bucket or truncate follows the syntax:
 1410            # PARTITION BY (BUCKET(..) | TRUNCATE(..))
 1411            # If we don't have parenthesis after each keyword, we should instead parse this as an identifier
 1412            self._retreat(self._index - 1)
 1413            return None
 1414
 1415        klass = (
 1416            exp.PartitionedByBucket
 1417            if self._prev.text.upper() == "BUCKET"
 1418            else exp.PartitionByTruncate
 1419        )
 1420
 1421        args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column())
 1422        this, expression = seq_get(args, 0), seq_get(args, 1)
 1423
 1424        if isinstance(this, exp.Literal):
 1425            # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order
 1426            #  - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)`
 1427            #  - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)`
 1428            # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)`
 1429            #
 1430            # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning
 1431            # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties
 1432            this, expression = expression, this
 1433
 1434        return self.expression(klass(this=this, expression=expression))
 1435
 1436    ALTER_PARSERS: t.ClassVar = {
 1437        "ADD": lambda self: self._parse_alter_table_add(),
 1438        "AS": lambda self: self._parse_select(),
 1439        "ALTER": lambda self: self._parse_alter_table_alter(),
 1440        "CLUSTER BY": lambda self: self._parse_cluster_property(),
 1441        "DELETE": lambda self: self.expression(exp.Delete(where=self._parse_where())),
 1442        "DROP": lambda self: self._parse_alter_table_drop(),
 1443        "RENAME": lambda self: self._parse_alter_table_rename(),
 1444        "SET": lambda self: self._parse_alter_table_set(),
 1445        "SWAP": lambda self: self.expression(
 1446            exp.SwapTable(this=self._match(TokenType.WITH) and self._parse_table(schema=True))
 1447        ),
 1448    }
 1449
 1450    ALTER_ALTER_PARSERS: t.ClassVar = {
 1451        "DISTKEY": lambda self: self._parse_alter_diststyle(),
 1452        "DISTSTYLE": lambda self: self._parse_alter_diststyle(),
 1453        "SORTKEY": lambda self: self._parse_alter_sortkey(),
 1454        "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True),
 1455    }
 1456
 1457    SCHEMA_UNNAMED_CONSTRAINTS: t.ClassVar = {
 1458        "CHECK",
 1459        "EXCLUDE",
 1460        "FOREIGN KEY",
 1461        "LIKE",
 1462        "PERIOD",
 1463        "PRIMARY KEY",
 1464        "UNIQUE",
 1465        "BUCKET",
 1466        "TRUNCATE",
 1467    }
 1468
 1469    NO_PAREN_FUNCTION_PARSERS: t.ClassVar = {
 1470        "ANY": lambda self: self.expression(exp.Any(this=self._parse_bitwise())),
 1471        "CASE": lambda self: self._parse_case(),
 1472        "CONNECT_BY_ROOT": lambda self: self.expression(
 1473            exp.ConnectByRoot(this=self._parse_column())
 1474        ),
 1475        "IF": lambda self: self._parse_if(),
 1476    }
 1477
 1478    INVALID_FUNC_NAME_TOKENS: t.ClassVar = {
 1479        TokenType.IDENTIFIER,
 1480        TokenType.STRING,
 1481    }
 1482
 1483    FUNCTIONS_WITH_ALIASED_ARGS: t.ClassVar = {"STRUCT"}
 1484
 1485    KEY_VALUE_DEFINITIONS: t.ClassVar = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice)
 1486
 1487    FUNCTION_PARSERS: t.ClassVar[dict[str, t.Callable]] = {
 1488        **{
 1489            name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names()
 1490        },
 1491        **{
 1492            name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names()
 1493        },
 1494        "CAST": lambda self: self._parse_cast(self.STRICT_CAST),
 1495        "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil),
 1496        "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST),
 1497        "CHAR": lambda self: self._parse_char(),
 1498        "CHR": lambda self: self._parse_char(),
 1499        "DECODE": lambda self: self._parse_decode(),
 1500        "EXTRACT": lambda self: self._parse_extract(),
 1501        "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor),
 1502        "GAP_FILL": lambda self: self._parse_gap_fill(),
 1503        "INITCAP": lambda self: self._parse_initcap(),
 1504        "JSON_OBJECT": lambda self: self._parse_json_object(),
 1505        "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True),
 1506        "JSON_TABLE": lambda self: self._parse_json_table(),
 1507        "MATCH": lambda self: self._parse_match_against(),
 1508        "NORMALIZE": lambda self: self._parse_normalize(),
 1509        "OPENJSON": lambda self: self._parse_open_json(),
 1510        "OVERLAY": lambda self: self._parse_overlay(),
 1511        "POSITION": lambda self: self._parse_position(),
 1512        "SAFE_CAST": lambda self: self._parse_cast(False, safe=True),
 1513        "STRING_AGG": lambda self: self._parse_string_agg(),
 1514        "SUBSTRING": lambda self: self._parse_substring(),
 1515        "TRIM": lambda self: self._parse_trim(),
 1516        "TRY_CAST": lambda self: self._parse_cast(False, safe=True),
 1517        "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True),
 1518        "XMLELEMENT": lambda self: self._parse_xml_element(),
 1519        "XMLTABLE": lambda self: self._parse_xml_table(),
 1520    }
 1521
 1522    QUERY_MODIFIER_PARSERS: t.ClassVar = {
 1523        TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()),
 1524        TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()),
 1525        TokenType.WHERE: lambda self: ("where", self._parse_where()),
 1526        TokenType.GROUP_BY: lambda self: ("group", self._parse_group()),
 1527        TokenType.HAVING: lambda self: ("having", self._parse_having()),
 1528        TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()),
 1529        TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()),
 1530        TokenType.ORDER_BY: lambda self: ("order", self._parse_order()),
 1531        TokenType.LIMIT: lambda self: ("limit", self._parse_limit()),
 1532        TokenType.FETCH: lambda self: ("limit", self._parse_limit()),
 1533        TokenType.OFFSET: lambda self: ("offset", self._parse_offset()),
 1534        TokenType.FOR: lambda self: ("locks", self._parse_locks()),
 1535        TokenType.LOCK: lambda self: ("locks", self._parse_locks()),
 1536        TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
 1537        TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)),
 1538        TokenType.CLUSTER_BY: lambda self: (
 1539            "cluster",
 1540            self._parse_cluster(),
 1541        ),
 1542        TokenType.DISTRIBUTE_BY: lambda self: (
 1543            "distribute",
 1544            self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY),
 1545        ),
 1546        TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)),
 1547        TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)),
 1548        TokenType.START_WITH: lambda self: ("connect", self._parse_connect()),
 1549    }
 1550    QUERY_MODIFIER_TOKENS: t.ClassVar = set(QUERY_MODIFIER_PARSERS)
 1551
 1552    SET_PARSERS: t.ClassVar = {
 1553        "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"),
 1554        "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"),
 1555        "SESSION": lambda self: self._parse_set_item_assignment("SESSION"),
 1556        "TRANSACTION": lambda self: self._parse_set_transaction(),
 1557    }
 1558
 1559    SHOW_PARSERS: t.ClassVar[dict[str, t.Callable]] = {}
 1560
 1561    TYPE_LITERAL_PARSERS: t.ClassVar = {
 1562        exp.DType.JSON: lambda self, this, _: self.expression(exp.ParseJSON(this=this)),
 1563    }
 1564
 1565    TYPE_CONVERTERS: t.ClassVar[dict[exp.DType, t.Callable[[exp.DataType], exp.DataType]]] = {}
 1566
 1567    DDL_SELECT_TOKENS: t.ClassVar = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN}
 1568
 1569    PRE_VOLATILE_TOKENS: t.ClassVar = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE}
 1570
 1571    TRANSACTION_KIND: t.ClassVar = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"}
 1572    TRANSACTION_CHARACTERISTICS: t.ClassVar[OPTIONS_TYPE] = {
 1573        "ISOLATION": (
 1574            ("LEVEL", "REPEATABLE", "READ"),
 1575            ("LEVEL", "READ", "COMMITTED"),
 1576            ("LEVEL", "READ", "UNCOMITTED"),
 1577            ("LEVEL", "SERIALIZABLE"),
 1578        ),
 1579        "READ": ("WRITE", "ONLY"),
 1580    }
 1581
 1582    CONFLICT_ACTIONS: t.ClassVar[OPTIONS_TYPE] = {
 1583        **dict.fromkeys(("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple()),
 1584        "DO": ("NOTHING", "UPDATE"),
 1585    }
 1586
 1587    TRIGGER_TIMING: t.ClassVar[OPTIONS_TYPE] = {
 1588        "INSTEAD": (("OF",),),
 1589        "BEFORE": tuple(),
 1590        "AFTER": tuple(),
 1591    }
 1592
 1593    TRIGGER_DEFERRABLE: t.ClassVar[OPTIONS_TYPE] = {
 1594        "NOT": (("DEFERRABLE",),),
 1595        "DEFERRABLE": tuple(),
 1596    }
 1597
 1598    CREATE_SEQUENCE: t.ClassVar[OPTIONS_TYPE] = {
 1599        "SCALE": ("EXTEND", "NOEXTEND"),
 1600        "SHARD": ("EXTEND", "NOEXTEND"),
 1601        "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"),
 1602        **dict.fromkeys(
 1603            (
 1604                "SESSION",
 1605                "GLOBAL",
 1606                "KEEP",
 1607                "NOKEEP",
 1608                "ORDER",
 1609                "NOORDER",
 1610                "NOCACHE",
 1611                "CYCLE",
 1612                "NOCYCLE",
 1613                "NOMINVALUE",
 1614                "NOMAXVALUE",
 1615                "NOSCALE",
 1616                "NOSHARD",
 1617            ),
 1618            tuple(),
 1619        ),
 1620    }
 1621
 1622    ISOLATED_LOADING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {"FOR": ("ALL", "INSERT", "NONE")}
 1623
 1624    USABLES: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(
 1625        ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple()
 1626    )
 1627
 1628    CAST_ACTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",))
 1629
 1630    SCHEMA_BINDING_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
 1631        "TYPE": ("EVOLUTION",),
 1632        **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()),
 1633    }
 1634
 1635    PROCEDURE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {}
 1636
 1637    EXECUTE_AS_OPTIONS: t.ClassVar[OPTIONS_TYPE] = dict.fromkeys(
 1638        ("CALLER", "SELF", "OWNER"), tuple()
 1639    )
 1640
 1641    KEY_CONSTRAINT_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
 1642        "NOT": ("ENFORCED",),
 1643        "MATCH": (
 1644            "FULL",
 1645            "PARTIAL",
 1646            "SIMPLE",
 1647        ),
 1648        "INITIALLY": ("DEFERRED", "IMMEDIATE"),
 1649        "USING": (
 1650            "BTREE",
 1651            "HASH",
 1652        ),
 1653        **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()),
 1654    }
 1655
 1656    WINDOW_EXCLUDE_OPTIONS: t.ClassVar[OPTIONS_TYPE] = {
 1657        "NO": ("OTHERS",),
 1658        "CURRENT": ("ROW",),
 1659        **dict.fromkeys(("GROUP", "TIES"), tuple()),
 1660    }
 1661
 1662    INSERT_ALTERNATIVES: t.ClassVar = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"}
 1663
 1664    CLONE_KEYWORDS: t.ClassVar = {"CLONE", "COPY"}
 1665    HISTORICAL_DATA_PREFIX: t.ClassVar = {"AT", "BEFORE", "END"}
 1666    HISTORICAL_DATA_KIND: t.ClassVar = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"}
 1667
 1668    OPCLASS_FOLLOW_KEYWORDS: t.ClassVar = {"ASC", "DESC", "NULLS", "WITH"}
 1669
 1670    OPTYPE_FOLLOW_TOKENS: t.ClassVar = {TokenType.COMMA, TokenType.R_PAREN}
 1671
 1672    TABLE_INDEX_HINT_TOKENS: t.ClassVar = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE}
 1673
 1674    VIEW_ATTRIBUTES: t.ClassVar = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"}
 1675
 1676    WINDOW_ALIAS_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.RANGE, TokenType.ROWS}
 1677    WINDOW_BEFORE_PAREN_TOKENS: t.ClassVar = {TokenType.OVER}
 1678    WINDOW_SIDES: t.ClassVar = {"FOLLOWING", "PRECEDING"}
 1679
 1680    JSON_KEY_VALUE_SEPARATOR_TOKENS: t.ClassVar = {TokenType.COLON, TokenType.COMMA, TokenType.IS}
 1681
 1682    FETCH_TOKENS: t.ClassVar = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT}
 1683
 1684    ADD_CONSTRAINT_TOKENS: t.ClassVar = {
 1685        TokenType.CONSTRAINT,
 1686        TokenType.FOREIGN_KEY,
 1687        TokenType.INDEX,
 1688        TokenType.KEY,
 1689        TokenType.PRIMARY_KEY,
 1690        TokenType.UNIQUE,
 1691    }
 1692
 1693    DISTINCT_TOKENS: t.ClassVar = {TokenType.DISTINCT}
 1694
 1695    UNNEST_OFFSET_ALIAS_TOKENS: t.ClassVar = TABLE_ALIAS_TOKENS - SET_OPERATIONS
 1696
 1697    SELECT_START_TOKENS: t.ClassVar = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT}
 1698
 1699    COPY_INTO_VARLEN_OPTIONS: t.ClassVar = {
 1700        "FILE_FORMAT",
 1701        "COPY_OPTIONS",
 1702        "FORMAT_OPTIONS",
 1703        "CREDENTIAL",
 1704    }
 1705
 1706    IS_JSON_PREDICATE_KIND: t.ClassVar = {"VALUE", "SCALAR", "ARRAY", "OBJECT"}
 1707
 1708    ODBC_DATETIME_LITERALS: t.ClassVar[dict[str, type[exp.Expr]]] = {}
 1709
 1710    ON_CONDITION_TOKENS: t.ClassVar = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"}
 1711
 1712    PRIVILEGE_FOLLOW_TOKENS: t.ClassVar = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN}
 1713
 1714    # The style options for the DESCRIBE statement
 1715    DESCRIBE_STYLES: t.ClassVar = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"}
 1716
 1717    SET_ASSIGNMENT_DELIMITERS: t.ClassVar = {"=", ":=", "TO"}
 1718
 1719    # The style options for the ANALYZE statement
 1720    ANALYZE_STYLES: t.ClassVar = {
 1721        "BUFFER_USAGE_LIMIT",
 1722        "FULL",
 1723        "LOCAL",
 1724        "NO_WRITE_TO_BINLOG",
 1725        "SAMPLE",
 1726        "SKIP_LOCKED",
 1727        "VERBOSE",
 1728    }
 1729
 1730    ANALYZE_EXPRESSION_PARSERS: t.ClassVar = {
 1731        "ALL": lambda self: self._parse_analyze_columns(),
 1732        "COMPUTE": lambda self: self._parse_analyze_statistics(),
 1733        "DELETE": lambda self: self._parse_analyze_delete(),
 1734        "DROP": lambda self: self._parse_analyze_histogram(),
 1735        "ESTIMATE": lambda self: self._parse_analyze_statistics(),
 1736        "LIST": lambda self: self._parse_analyze_list(),
 1737        "PREDICATE": lambda self: self._parse_analyze_columns(),
 1738        "UPDATE": lambda self: self._parse_analyze_histogram(),
 1739        "VALIDATE": lambda self: self._parse_analyze_validate(),
 1740    }
 1741
 1742    PARTITION_KEYWORDS: t.ClassVar = {"PARTITION", "SUBPARTITION"}
 1743
 1744    AMBIGUOUS_ALIAS_TOKENS: t.ClassVar = (TokenType.LIMIT, TokenType.OFFSET)
 1745
 1746    OPERATION_MODIFIERS: t.ClassVar[set[str]] = set()
 1747
 1748    RECURSIVE_CTE_SEARCH_KIND: t.ClassVar = {"BREADTH", "DEPTH", "CYCLE"}
 1749
 1750    SECURITY_PROPERTY_KEYWORDS: t.ClassVar = {"DEFINER", "INVOKER", "NONE"}
 1751
 1752    MODIFIABLES: t.ClassVar = (exp.Query, exp.Table, exp.TableFromRows, exp.Values)
 1753
 1754    STRICT_CAST: t.ClassVar = True
 1755
 1756    PREFIXED_PIVOT_COLUMNS: t.ClassVar = False
 1757    IDENTIFY_PIVOT_STRINGS: t.ClassVar = False
 1758    # Controls when an aggregation's name is included in a pivoted column's name:
 1759    # "agg_name_if_aliased" - only for aggregations that carry an explicit alias
 1760    # "agg_name_if_aliased_or_multiple" - if aliased, or whenever there are multiple aggregations
 1761    # "agg_name_if_multiple" - only when there are multiple aggregations (a lone agg is value-only)
 1762    PIVOT_COLUMN_NAMING: t.ClassVar[str] = "agg_name_if_aliased"
 1763
 1764    LOG_DEFAULTS_TO_LN: t.ClassVar = False
 1765
 1766    # Whether the table sample clause expects CSV syntax
 1767    TABLESAMPLE_CSV: t.ClassVar = False
 1768
 1769    # The default method used for table sampling
 1770    DEFAULT_SAMPLING_METHOD: t.ClassVar[str | None] = None
 1771
 1772    # Whether the SET command needs a delimiter (e.g. "=") for assignments
 1773    SET_REQUIRES_ASSIGNMENT_DELIMITER: t.ClassVar = True
 1774
 1775    # Whether the TRIM function expects the characters to trim as its first argument
 1776    TRIM_PATTERN_FIRST: t.ClassVar = False
 1777
 1778    # Whether string aliases are supported `SELECT COUNT(*) 'count'`
 1779    STRING_ALIASES: t.ClassVar = False
 1780
 1781    # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand)
 1782    MODIFIERS_ATTACHED_TO_SET_OP: t.ClassVar = True
 1783    SET_OP_MODIFIERS: t.ClassVar = {"order", "limit", "offset"}
 1784
 1785    # Whether to parse IF statements that aren't followed by a left parenthesis as commands
 1786    NO_PAREN_IF_COMMANDS: t.ClassVar = True
 1787
 1788    # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres)
 1789    JSON_ARROWS_REQUIRE_JSON_TYPE: t.ClassVar = False
 1790
 1791    # Whether the `:` operator is used to extract a value from a VARIANT column
 1792    COLON_IS_VARIANT_EXTRACT: t.ClassVar = False
 1793
 1794    # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause.
 1795    # If this is True and '(' is not found, the keyword will be treated as an identifier
 1796    VALUES_FOLLOWED_BY_PAREN: t.ClassVar = True
 1797
 1798    # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift)
 1799    SUPPORTS_IMPLICIT_UNNEST: t.ClassVar = False
 1800
 1801    # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS
 1802    INTERVAL_SPANS: t.ClassVar = True
 1803
 1804    # Whether a PARTITION clause can follow a table reference
 1805    SUPPORTS_PARTITION_SELECTION: t.ClassVar = False
 1806
 1807    # Whether the `name AS expr` schema/column constraint requires parentheses around `expr`
 1808    WRAPPED_TRANSFORM_COLUMN_CONSTRAINT: t.ClassVar = True
 1809
 1810    # Whether the 'AS' keyword is optional in the CTE definition syntax
 1811    OPTIONAL_ALIAS_TOKEN_CTE: t.ClassVar = True
 1812
 1813    # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword
 1814    ALTER_RENAME_REQUIRES_COLUMN: t.ClassVar = True
 1815
 1816    # Whether Alter statements are allowed to contain Partition specifications
 1817    ALTER_TABLE_PARTITIONS: t.ClassVar = False
 1818
 1819    # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree.
 1820    # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is
 1821    # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such
 1822    # as BigQuery, where all joins have the same precedence.
 1823    JOINS_HAVE_EQUAL_PRECEDENCE: t.ClassVar = False
 1824
 1825    # Whether TIMESTAMP <literal> can produce a zone-aware timestamp
 1826    ZONE_AWARE_TIMESTAMP_CONSTRUCTOR: t.ClassVar = False
 1827
 1828    # Whether map literals support arbitrary expressions as keys.
 1829    # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB).
 1830    # When False, keys are typically restricted to identifiers.
 1831    MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: t.ClassVar = False
 1832
 1833    # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this
 1834    # is true for Snowflake but not for BigQuery which can also process strings
 1835    JSON_EXTRACT_REQUIRES_JSON_EXPRESSION: t.ClassVar = False
 1836
 1837    # Dialects like Databricks support JOINS without join criteria
 1838    # Adding an ON TRUE, makes transpilation semantically correct for other dialects
 1839    ADD_JOIN_ON_TRUE: t.ClassVar = False
 1840
 1841    # Whether INTERVAL spans with literal format '\d+ hh:[mm:[ss[.ff]]]'
 1842    # can omit the span unit `DAY TO MINUTE` or `DAY TO SECOND`
 1843    SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT: t.ClassVar = False
 1844
 1845    SHOW_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SHOW_PARSERS)
 1846    SET_TRIE: t.ClassVar[dict] = new_trie(key.split(" ") for key in SET_PARSERS)
 1847
 1848    def __init__(
 1849        self,
 1850        error_level: ErrorLevel | None = None,
 1851        error_message_context: int = 100,
 1852        max_errors: int = 3,
 1853        max_nodes: int = -1,
 1854        dialect: DialectType = None,
 1855    ):
 1856        self.error_level: ErrorLevel = error_level or ErrorLevel.IMMEDIATE
 1857        self.error_message_context: int = error_message_context
 1858        self.max_errors: int = max_errors
 1859        self.max_nodes: int = max_nodes
 1860        self.dialect: t.Any = _resolve_dialect(dialect)
 1861        self.sql: str = ""
 1862        self.errors: list[ParseError] = []
 1863        self._tokens: list[Token] = []
 1864        self._tokens_size: i64 = 0
 1865        self._index: i64 = 0
 1866        self._curr: Token = SENTINEL_NONE
 1867        self._next: Token = SENTINEL_NONE
 1868        self._prev: Token = SENTINEL_NONE
 1869        self._prev_comments: list[str] = []
 1870        self._pipe_cte_counter: int = 0
 1871        self._chunks: list[list[Token]] = []
 1872        self._chunk_index: i64 = 0
 1873        self._node_count: int = 0
 1874
 1875    def reset(self) -> None:
 1876        self.sql = ""
 1877        self.errors = []
 1878        self._tokens = []
 1879        self._tokens_size = 0
 1880        self._index = 0
 1881        self._curr = SENTINEL_NONE
 1882        self._next = SENTINEL_NONE
 1883        self._prev = SENTINEL_NONE
 1884        self._prev_comments = []
 1885        self._pipe_cte_counter = 0
 1886        self._chunks = []
 1887        self._chunk_index = 0
 1888        self._node_count = 0
 1889
 1890    def _advance(self, times: i64 = 1) -> None:
 1891        index = self._index + times
 1892        self._index = index
 1893        tokens = self._tokens
 1894        size = self._tokens_size
 1895        self._curr = tokens[index] if index < size else SENTINEL_NONE
 1896        self._next = tokens[index + 1] if index + 1 < size else SENTINEL_NONE
 1897
 1898        if index > 0:
 1899            prev = tokens[index - 1]
 1900            self._prev = prev
 1901            self._prev_comments = prev.comments
 1902        else:
 1903            self._prev = SENTINEL_NONE
 1904            self._prev_comments = []
 1905
 1906    def _advance_chunk(self) -> None:
 1907        self._index = -1
 1908        self._tokens = self._chunks[self._chunk_index]
 1909        self._tokens_size = i64(len(self._tokens))
 1910        self._chunk_index += 1
 1911        self._advance()
 1912
 1913    def _retreat(self, index: i64) -> None:
 1914        if index != self._index:
 1915            self._advance(index - self._index)
 1916
 1917    def _add_comments(self, expression: exp.Expr | None) -> None:
 1918        if expression and self._prev_comments:
 1919            expression.add_comments(self._prev_comments)
 1920            self._prev_comments = []
 1921
 1922    def _match(
 1923        self, token_type: TokenType, advance: bool = True, expression: exp.Expr | None = None
 1924    ) -> bool:
 1925        if self._curr.token_type == token_type:
 1926            if advance:
 1927                self._advance()
 1928            self._add_comments(expression)
 1929            return True
 1930        return False
 1931
 1932    def _match_set(self, types: t.Collection[TokenType], advance: bool = True) -> bool:
 1933        if self._curr.token_type in types:
 1934            if advance:
 1935                self._advance()
 1936            return True
 1937        return False
 1938
 1939    def _match_pair(
 1940        self, token_type_a: TokenType, token_type_b: TokenType, advance: bool = True
 1941    ) -> bool:
 1942        if self._curr.token_type == token_type_a and self._next.token_type == token_type_b:
 1943            if advance:
 1944                self._advance(2)
 1945            return True
 1946        return False
 1947
 1948    def _match_texts(self, texts: t.Collection[str], advance: bool = True) -> bool:
 1949        if self._curr.token_type != TokenType.STRING and self._curr.text.upper() in texts:
 1950            if advance:
 1951                self._advance()
 1952            return True
 1953        return False
 1954
 1955    def _match_text_seq(self, *texts: str, advance: bool = True) -> bool:
 1956        index = self._index
 1957        string_type = TokenType.STRING
 1958        for text in texts:
 1959            if self._curr.token_type != string_type and self._curr.text.upper() == text:
 1960                self._advance()
 1961            else:
 1962                self._retreat(index)
 1963                return False
 1964
 1965        if not advance:
 1966            self._retreat(index)
 1967
 1968        return True
 1969
 1970    def _is_connected(self) -> bool:
 1971        prev = self._prev
 1972        curr = self._curr
 1973        return bool(prev and curr and prev.end + 1 == curr.start)
 1974
 1975    def _find_sql(self, start: Token, end: Token) -> str:
 1976        return self.sql[start.start : end.end + 1]
 1977
 1978    def raise_error(self, message: str, token: Token = SENTINEL_NONE) -> None:
 1979        token = token or self._curr or self._prev or Token.string("")
 1980        formatted_sql, start_context, highlight, end_context = highlight_sql(
 1981            sql=self.sql,
 1982            positions=[(token.start, token.end)],
 1983            context_length=self.error_message_context,
 1984        )
 1985        formatted_message = f"{message}. Line {token.line}, Col: {token.col}.\n  {formatted_sql}"
 1986
 1987        error = ParseError.new(
 1988            formatted_message,
 1989            description=message,
 1990            line=token.line,
 1991            col=token.col,
 1992            start_context=start_context,
 1993            highlight=highlight,
 1994            end_context=end_context,
 1995        )
 1996
 1997        if self.error_level == ErrorLevel.IMMEDIATE:
 1998            raise error
 1999
 2000        self.errors.append(error)
 2001
 2002    def validate_expression(self, expression: E, args: list | None = None) -> E:
 2003        if self.max_nodes > -1:
 2004            self._node_count += 1
 2005            if self._node_count > self.max_nodes:
 2006                self.raise_error(f"Maximum number of AST nodes ({self.max_nodes}) exceeded")
 2007        if self.error_level != ErrorLevel.IGNORE:
 2008            for error_message in expression.error_messages(args):
 2009                self.raise_error(error_message)
 2010        return expression
 2011
 2012    def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> T | None:
 2013        index = self._index
 2014        error_level = self.error_level
 2015        this: T | None = None
 2016
 2017        self.error_level = ErrorLevel.IMMEDIATE
 2018        try:
 2019            this = parse_method()
 2020        except ParseError:
 2021            this = None
 2022        finally:
 2023            if not this or retreat:
 2024                self._retreat(index)
 2025            self.error_level = error_level
 2026
 2027        return this
 2028
 2029    def parse(self, raw_tokens: list[Token], sql: str) -> list[exp.Expr | None]:
 2030        """
 2031        Parses a list of tokens and returns a list of syntax trees, one tree
 2032        per parsed SQL statement.
 2033
 2034        Args:
 2035            raw_tokens: The list of tokens.
 2036            sql: The original SQL string.
 2037
 2038        Returns:
 2039            The list of the produced syntax trees.
 2040        """
 2041        return self._parse(
 2042            parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql
 2043        )
 2044
 2045    def parse_into(
 2046        self,
 2047        expression_types: exp.IntoType,
 2048        raw_tokens: list[Token],
 2049        sql: str | None = None,
 2050    ) -> list[exp.Expr | None]:
 2051        """
 2052        Parses a list of tokens into a given Expr type. If a collection of Expr
 2053        types is given instead, this method will try to parse the token list into each one
 2054        of them, stopping at the first for which the parsing succeeds.
 2055
 2056        Args:
 2057            expression_types: The expression type(s) to try and parse the token list into.
 2058            raw_tokens: The list of tokens.
 2059            sql: The original SQL string, used to produce helpful debug messages.
 2060
 2061        Returns:
 2062            The target Expr.
 2063        """
 2064        errors = []
 2065        for expression_type in ensure_list(expression_types):
 2066            parser = self.EXPRESSION_PARSERS.get(t.cast(type[exp.Expr], expression_type))
 2067            if not parser:
 2068                raise TypeError(f"No parser registered for {expression_type}")
 2069
 2070            try:
 2071                return self._parse(parser, raw_tokens, sql)
 2072            except ParseError as e:
 2073                e.errors[0]["into_expression"] = expression_type
 2074                errors.append(e)
 2075
 2076        raise ParseError(
 2077            f"Failed to parse '{sql or raw_tokens}' into {expression_types}",
 2078            errors=merge_errors(errors),
 2079        ) from errors[-1]
 2080
 2081    def check_errors(self) -> None:
 2082        """Logs or raises any found errors, depending on the chosen error level setting."""
 2083        if self.error_level == ErrorLevel.WARN:
 2084            for error in self.errors:
 2085                logger.error(str(error))
 2086        elif self.error_level == ErrorLevel.RAISE and self.errors:
 2087            raise ParseError(
 2088                concat_messages(self.errors, self.max_errors),
 2089                errors=merge_errors(self.errors),
 2090            )
 2091
 2092    def expression(
 2093        self,
 2094        instance: E,
 2095        token: Token | None = None,
 2096        comments: list[str] | None = None,
 2097    ) -> E:
 2098        if token:
 2099            instance.update_positions(token)
 2100        instance.add_comments(comments) if comments else self._add_comments(instance)
 2101        if not instance.is_primitive:
 2102            instance = self.validate_expression(instance)
 2103        return instance
 2104
 2105    def _parse_batch_statements(
 2106        self,
 2107        parse_method: t.Callable[[Parser], exp.Expr | None],
 2108        sep_first_statement: bool = True,
 2109    ) -> list[exp.Expr | None]:
 2110        expressions = []
 2111
 2112        # Chunkification binds if/while statements with the first statement of the body
 2113        if sep_first_statement:
 2114            self._match(TokenType.BEGIN)
 2115            expressions.append(parse_method(self))
 2116
 2117        chunks_length = len(self._chunks)
 2118        while self._chunk_index < chunks_length:
 2119            self._advance_chunk()
 2120
 2121            if self._match(TokenType.ELSE, advance=False):
 2122                return expressions
 2123
 2124            if expressions and not self._next and self._match(TokenType.END):
 2125                expressions.append(exp.EndStatement())
 2126                continue
 2127
 2128            expressions.append(parse_method(self))
 2129
 2130            if self._index < self._tokens_size:
 2131                self.raise_error("Invalid expression / Unexpected token")
 2132
 2133            self.check_errors()
 2134
 2135        return expressions
 2136
 2137    def _parse(
 2138        self,
 2139        parse_method: t.Callable[[Parser], exp.Expr | None],
 2140        raw_tokens: list[Token],
 2141        sql: str | None = None,
 2142    ) -> list[exp.Expr | None]:
 2143        self.reset()
 2144        self.sql = sql or ""
 2145
 2146        total = len(raw_tokens)
 2147        chunks: list[list[Token]] = [[]]
 2148
 2149        for i, token in enumerate(raw_tokens):
 2150            if token.token_type == TokenType.SEMICOLON:
 2151                if token.comments:
 2152                    chunks.append([token])
 2153
 2154                if i < total - 1:
 2155                    chunks.append([])
 2156            else:
 2157                chunks[-1].append(token)
 2158
 2159        self._chunks = chunks
 2160
 2161        return self._parse_batch_statements(parse_method=parse_method, sep_first_statement=False)
 2162
 2163    def _warn_unsupported(self) -> None:
 2164        if self._tokens_size <= 1:
 2165            return
 2166
 2167        # We use _find_sql because self.sql may comprise multiple chunks, and we're only
 2168        # interested in emitting a warning for the one being currently processed.
 2169        sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context]
 2170
 2171        logger.warning(
 2172            f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'."
 2173        )
 2174
 2175    def _parse_command(self) -> exp.Command:
 2176        self._warn_unsupported()
 2177        comments = self._prev_comments
 2178        return self.expression(
 2179            exp.Command(this=self._prev.text.upper(), expression=self._parse_string()),
 2180            comments=comments,
 2181        )
 2182
 2183    def _parse_comment(self, allow_exists: bool = True) -> exp.Expr:
 2184        start = self._prev
 2185        exists = self._parse_exists() if allow_exists else None
 2186
 2187        self._match(TokenType.ON)
 2188
 2189        materialized = self._match_text_seq("MATERIALIZED")
 2190        kind = self._match_set(self.CREATABLES) and self._prev
 2191        if not kind:
 2192            return self._parse_as_command(start)
 2193
 2194        if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 2195            this = self._parse_user_defined_function(kind=kind.token_type)
 2196        elif kind.token_type == TokenType.TABLE:
 2197            this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS)
 2198        elif kind.token_type == TokenType.COLUMN:
 2199            this = self._parse_column()
 2200        else:
 2201            this = self._parse_table_parts(schema=True)
 2202
 2203        self._match(TokenType.IS)
 2204
 2205        return self.expression(
 2206            exp.Comment(
 2207                this=this,
 2208                kind=kind.text,
 2209                expression=self._parse_string(),
 2210                exists=exists,
 2211                materialized=materialized,
 2212            )
 2213        )
 2214
 2215    def _parse_to_table(
 2216        self,
 2217    ) -> exp.ToTableProperty:
 2218        table = self._parse_table_parts(schema=True)
 2219        return self.expression(exp.ToTableProperty(this=table))
 2220
 2221    # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl
 2222    def _parse_ttl(self) -> exp.Expr:
 2223        def _parse_ttl_action() -> exp.Expr | None:
 2224            this = self._parse_bitwise()
 2225
 2226            if self._match_text_seq("DELETE"):
 2227                return self.expression(exp.MergeTreeTTLAction(this=this, delete=True))
 2228            if self._match_text_seq("RECOMPRESS"):
 2229                return self.expression(
 2230                    exp.MergeTreeTTLAction(this=this, recompress=self._parse_bitwise())
 2231                )
 2232            if self._match_text_seq("TO", "DISK"):
 2233                return self.expression(
 2234                    exp.MergeTreeTTLAction(this=this, to_disk=self._parse_string())
 2235                )
 2236            if self._match_text_seq("TO", "VOLUME"):
 2237                return self.expression(
 2238                    exp.MergeTreeTTLAction(this=this, to_volume=self._parse_string())
 2239                )
 2240
 2241            return this
 2242
 2243        expressions = self._parse_csv(_parse_ttl_action)
 2244        where = self._parse_where()
 2245        group = self._parse_group()
 2246
 2247        aggregates = None
 2248        if group and self._match(TokenType.SET):
 2249            aggregates = self._parse_csv(self._parse_set_item)
 2250
 2251        return self.expression(
 2252            exp.MergeTreeTTL(
 2253                expressions=expressions, where=where, group=group, aggregates=aggregates
 2254            )
 2255        )
 2256
 2257    def _parse_condition(self) -> exp.Expr | None:
 2258        return self._parse_wrapped(parse_method=self._parse_expression, optional=True)
 2259
 2260    def _parse_block(self) -> exp.Block:
 2261        return self.expression(
 2262            exp.Block(
 2263                expressions=self._parse_batch_statements(
 2264                    parse_method=lambda self: self._parse_statement()
 2265                )
 2266            )
 2267        )
 2268
 2269    def _parse_whileblock(self) -> exp.WhileBlock:
 2270        return self.expression(
 2271            exp.WhileBlock(this=self._parse_condition(), body=self._parse_block())
 2272        )
 2273
 2274    def _parse_statement(self) -> exp.Expr | None:
 2275        if not self._curr:
 2276            return None
 2277
 2278        if self._match_set(self.STATEMENT_PARSERS):
 2279            comments = self._prev_comments
 2280            stmt = self.STATEMENT_PARSERS[self._prev.token_type](self)
 2281            stmt.add_comments(comments, prepend=True)
 2282            return stmt
 2283
 2284        if self._match_set(self.dialect.tokenizer_class.COMMANDS):
 2285            return self._parse_command()
 2286
 2287        if self._match_text_seq("WHILE"):
 2288            return self._parse_whileblock()
 2289
 2290        expression = self._parse_expression()
 2291        expression = self._parse_set_operations(expression) if expression else self._parse_select()
 2292
 2293        if isinstance(expression, exp.Subquery) and self._match(TokenType.PIPE_GT, advance=False):
 2294            expression = self._parse_pipe_syntax_query(expression)
 2295
 2296        return self._parse_query_modifiers(expression)
 2297
 2298    def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command:
 2299        start = self._prev
 2300        temporary = self._match(TokenType.TEMPORARY)
 2301        materialized = self._match_text_seq("MATERIALIZED")
 2302        iceberg = self._match_text_seq("ICEBERG")
 2303
 2304        kind = self._match_set(self.CREATABLES) and self._prev.text.upper()
 2305        if not kind or (iceberg and kind and kind != "TABLE"):
 2306            return self._parse_as_command(start)
 2307
 2308        concurrently = self._match_text_seq("CONCURRENTLY")
 2309        if_exists = exists or self._parse_exists()
 2310
 2311        if kind == "COLUMN":
 2312            this = self._parse_column()
 2313        else:
 2314            this = self._parse_table_parts(schema=True, is_db_reference=kind == "SCHEMA")
 2315
 2316        cluster = self._parse_on_property() if self._match(TokenType.ON) else None
 2317
 2318        if self._match(TokenType.L_PAREN, advance=False):
 2319            expressions = self._parse_wrapped_csv(self._parse_types)
 2320        else:
 2321            expressions = None
 2322
 2323        cascade_or_restrict = self._match_texts(("CASCADE", "RESTRICT")) and self._prev.text.upper()
 2324
 2325        return self.expression(
 2326            exp.Drop(
 2327                exists=if_exists,
 2328                this=this,
 2329                expressions=expressions,
 2330                kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind,
 2331                temporary=temporary,
 2332                materialized=materialized,
 2333                cascade=cascade_or_restrict == "CASCADE",
 2334                restrict=cascade_or_restrict == "RESTRICT",
 2335                constraints=self._match_text_seq("CONSTRAINTS"),
 2336                purge=self._match_text_seq("PURGE"),
 2337                cluster=cluster,
 2338                concurrently=concurrently,
 2339                sync=self._match_text_seq("SYNC"),
 2340                iceberg=iceberg,
 2341            )
 2342        )
 2343
 2344    def _parse_exists(self, not_: bool = False) -> bool | None:
 2345        return (
 2346            self._match_text_seq("IF")
 2347            and (not not_ or self._match(TokenType.NOT))
 2348            and self._match(TokenType.EXISTS)
 2349        )
 2350
 2351    def _parse_create(self) -> exp.Create | exp.Command:
 2352        # Note: this can't be None because we've matched a statement parser
 2353        start = self._prev
 2354
 2355        replace = (
 2356            start.token_type == TokenType.REPLACE
 2357            or self._match_pair(TokenType.OR, TokenType.REPLACE)
 2358            or self._match_pair(TokenType.OR, TokenType.ALTER)
 2359        )
 2360        refresh = self._match_pair(TokenType.OR, TokenType.REFRESH)
 2361
 2362        unique = self._match(TokenType.UNIQUE)
 2363
 2364        if self._match_text_seq("CLUSTERED", "COLUMNSTORE"):
 2365            clustered = True
 2366        elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq(
 2367            "COLUMNSTORE"
 2368        ):
 2369            clustered = False
 2370        else:
 2371            clustered = None
 2372
 2373        if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False):
 2374            self._advance()
 2375
 2376        properties = None
 2377        create_token = self._match_set(self.CREATABLES) and self._prev
 2378
 2379        if not create_token:
 2380            # exp.Properties.Location.POST_CREATE
 2381            properties = self._parse_properties()
 2382            create_token = self._match_set(self.CREATABLES) and self._prev
 2383
 2384            if not properties or not create_token:
 2385                return self._parse_as_command(start)
 2386
 2387        create_token_type = t.cast(Token, create_token).token_type
 2388
 2389        concurrently = self._match_text_seq("CONCURRENTLY")
 2390        exists = self._parse_exists(not_=True)
 2391        this = None
 2392        expression: exp.Expr | None = None
 2393        indexes = None
 2394        no_schema_binding = None
 2395        begin = None
 2396        clone = None
 2397
 2398        def extend_props(temp_props: exp.Properties | None) -> None:
 2399            nonlocal properties
 2400            if properties and temp_props:
 2401                properties.expressions.extend(temp_props.expressions)
 2402            elif temp_props:
 2403                properties = temp_props
 2404
 2405        if create_token_type in (TokenType.FUNCTION, TokenType.PROCEDURE):
 2406            this = self._parse_user_defined_function(kind=create_token_type)
 2407
 2408            # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature)
 2409            extend_props(self._parse_properties())
 2410
 2411            expression = self._parse_heredoc() if self._match(TokenType.ALIAS) else None
 2412
 2413            if (
 2414                not expression
 2415                and create_token_type == TokenType.FUNCTION
 2416                and isinstance(this, exp.UserDefinedFunction)
 2417                and this.args.get("wrapped")
 2418            ):
 2419                pre_table_index = self._index
 2420                is_table = self._match(TokenType.TABLE)
 2421
 2422                expression = self._parse_expression()
 2423                overload_mode = bool(
 2424                    expression
 2425                    and self._curr.token_type == TokenType.COMMA
 2426                    and self._next.token_type == TokenType.L_PAREN
 2427                )
 2428                if not overload_mode:
 2429                    self._retreat(pre_table_index)
 2430                    is_table = False
 2431                    expression = None
 2432            else:
 2433                is_table = False
 2434                overload_mode = False
 2435
 2436            extend_props(self._parse_function_properties())
 2437
 2438            if not expression:
 2439                if self._match(TokenType.COMMAND):
 2440                    expression = self._parse_as_command(self._prev)
 2441                else:
 2442                    begin = self._match(TokenType.BEGIN)
 2443                    return_ = self._match_text_seq("RETURN")
 2444
 2445                    if self._match(TokenType.STRING, advance=False):
 2446                        # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property
 2447                        # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement
 2448                        expression = self._parse_string()
 2449                        extend_props(self._parse_properties())
 2450                    else:
 2451                        expression = (
 2452                            self._parse_user_defined_function_expression()
 2453                            if create_token_type == TokenType.FUNCTION
 2454                            else self._parse_block()
 2455                        )
 2456
 2457                    if return_:
 2458                        expression = self.expression(exp.Return(this=expression))
 2459
 2460            if overload_mode and expression:
 2461                expression = self._parse_macro_overloads(
 2462                    t.cast(exp.UserDefinedFunction, this), expression, is_table
 2463                )
 2464        elif create_token_type == TokenType.INDEX:
 2465            # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c)
 2466            if not self._match(TokenType.ON):
 2467                index = self._parse_id_var()
 2468                anonymous = False
 2469            else:
 2470                index = None
 2471                anonymous = True
 2472
 2473            this = self._parse_index(index=index, anonymous=anonymous)
 2474        elif (
 2475            create_token_type == TokenType.CONSTRAINT and self._match(TokenType.TRIGGER)
 2476        ) or create_token_type == TokenType.TRIGGER:
 2477            if is_constraint := (create_token_type == TokenType.CONSTRAINT):
 2478                create_token = self._prev
 2479
 2480            trigger_name = self._parse_id_var()
 2481            if not trigger_name:
 2482                return self._parse_as_command(start)
 2483
 2484            timing_var = self._parse_var_from_options(self.TRIGGER_TIMING, raise_unmatched=False)
 2485            timing = timing_var.this if timing_var else None
 2486            if not timing:
 2487                return self._parse_as_command(start)
 2488
 2489            events = self._parse_trigger_events()
 2490            if not self._match(TokenType.ON):
 2491                self.raise_error("Expected ON in trigger definition")
 2492
 2493            table = self._parse_table_parts()
 2494            referenced_table = self._parse_table_parts() if self._match(TokenType.FROM) else None
 2495            deferrable, initially = self._parse_trigger_deferrable()
 2496            referencing = self._parse_trigger_referencing()
 2497            for_each = self._parse_trigger_for_each()
 2498            when = self._match_text_seq("WHEN") and self._parse_wrapped(
 2499                self._parse_disjunction, optional=True
 2500            )
 2501            execute = self._parse_trigger_execute()
 2502
 2503            if execute is None:
 2504                return self._parse_as_command(start)
 2505
 2506            trigger_props = self.expression(
 2507                exp.TriggerProperties(
 2508                    table=table,
 2509                    timing=timing,
 2510                    events=events,
 2511                    execute=execute,
 2512                    constraint=is_constraint,
 2513                    referenced_table=referenced_table,
 2514                    deferrable=deferrable,
 2515                    initially=initially,
 2516                    referencing=referencing,
 2517                    for_each=for_each,
 2518                    when=when,
 2519                )
 2520            )
 2521
 2522            this = trigger_name
 2523            extend_props(exp.Properties(expressions=[trigger_props] if trigger_props else []))
 2524        elif create_token_type == TokenType.TYPE:
 2525            this = self._parse_table_parts(schema=True)
 2526            if not this or not self._match(TokenType.ALIAS):
 2527                return self._parse_as_command(start)
 2528
 2529            if self._match(TokenType.ENUM):
 2530                expression = exp.DataType(
 2531                    this=exp.DType.ENUM,
 2532                    expressions=self._parse_wrapped_csv(self._parse_string),
 2533                )
 2534            elif self._match(TokenType.L_PAREN, advance=False):
 2535                expression = self._parse_schema()
 2536            else:
 2537                return self._parse_as_command(start)
 2538        elif create_token_type in self.DB_CREATABLES:
 2539            table_parts = self._parse_table_parts(
 2540                schema=True, is_db_reference=create_token_type == TokenType.SCHEMA
 2541            )
 2542
 2543            # exp.Properties.Location.POST_NAME
 2544            self._match(TokenType.COMMA)
 2545            extend_props(self._parse_properties(before=True))
 2546
 2547            this = self._parse_schema(this=table_parts)
 2548
 2549            # exp.Properties.Location.POST_SCHEMA and POST_WITH
 2550            extend_props(self._parse_properties())
 2551
 2552            has_alias = self._match(TokenType.ALIAS)
 2553            if not self._match_set(self.DDL_SELECT_TOKENS, advance=False):
 2554                # exp.Properties.Location.POST_ALIAS
 2555                extend_props(self._parse_properties())
 2556
 2557            if create_token_type == TokenType.SEQUENCE:
 2558                expression = self._parse_types()
 2559                props = self._parse_properties()
 2560                if props:
 2561                    sequence_props = exp.SequenceProperties()
 2562                    options = []
 2563                    for prop in props:
 2564                        if isinstance(prop, exp.SequenceProperties):
 2565                            for arg, value in prop.args.items():
 2566                                if arg == "options":
 2567                                    options.extend(value)
 2568                                else:
 2569                                    sequence_props.set(arg, value)
 2570                            prop.pop()
 2571
 2572                    if options:
 2573                        sequence_props.set("options", options)
 2574
 2575                    props.append("expressions", sequence_props)
 2576                    extend_props(props)
 2577            else:
 2578                expression = self._parse_ddl_select()
 2579
 2580                # Some dialects also support using a table as an alias instead of a SELECT.
 2581                # Here we fallback to this as an alternative.
 2582                if not expression and has_alias:
 2583                    expression = self._try_parse(self._parse_table_parts)
 2584
 2585            if create_token_type == TokenType.TABLE:
 2586                # exp.Properties.Location.POST_EXPRESSION
 2587                extend_props(self._parse_properties())
 2588
 2589                indexes = []
 2590                while True:
 2591                    index = self._parse_index()
 2592
 2593                    # exp.Properties.Location.POST_INDEX
 2594                    extend_props(self._parse_properties())
 2595                    if not index:
 2596                        break
 2597                    else:
 2598                        self._match(TokenType.COMMA)
 2599                        indexes.append(index)
 2600            elif create_token_type == TokenType.VIEW:
 2601                if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"):
 2602                    no_schema_binding = True
 2603            elif create_token_type in (TokenType.SINK, TokenType.SOURCE):
 2604                extend_props(self._parse_properties())
 2605
 2606            shallow = self._match_text_seq("SHALLOW")
 2607
 2608            if self._match_texts(self.CLONE_KEYWORDS):
 2609                copy = self._prev.text.lower() == "copy"
 2610                clone = self.expression(
 2611                    exp.Clone(this=self._parse_table(schema=True), shallow=shallow, copy=copy)
 2612                )
 2613
 2614        if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False):
 2615            return self._parse_as_command(start)
 2616
 2617        create_kind_text = create_token.text.upper()
 2618        return self.expression(
 2619            exp.Create(
 2620                this=this,
 2621                kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text,
 2622                replace=replace,
 2623                refresh=refresh,
 2624                unique=unique,
 2625                expression=expression,
 2626                exists=exists,
 2627                properties=properties,
 2628                indexes=indexes,
 2629                no_schema_binding=no_schema_binding,
 2630                begin=begin,
 2631                clone=clone,
 2632                concurrently=concurrently,
 2633                clustered=clustered,
 2634            )
 2635        )
 2636
 2637    def _parse_sequence_properties(self) -> exp.SequenceProperties | None:
 2638        seq = exp.SequenceProperties()
 2639
 2640        options = []
 2641        index = self._index
 2642
 2643        while self._curr:
 2644            self._match(TokenType.COMMA)
 2645            if self._match_text_seq("INCREMENT"):
 2646                self._match_text_seq("BY")
 2647                self._match_text_seq("=")
 2648                seq.set("increment", self._parse_term())
 2649            elif self._match_text_seq("MINVALUE"):
 2650                seq.set("minvalue", self._parse_term())
 2651            elif self._match_text_seq("MAXVALUE"):
 2652                seq.set("maxvalue", self._parse_term())
 2653            elif self._match(TokenType.START_WITH) or self._match_text_seq("START"):
 2654                self._match_text_seq("=")
 2655                seq.set("start", self._parse_term())
 2656            elif self._match_text_seq("CACHE"):
 2657                # T-SQL allows empty CACHE which is initialized dynamically
 2658                seq.set("cache", self._parse_number() or True)
 2659            elif self._match_text_seq("OWNED", "BY"):
 2660                # "OWNED BY NONE" is the default
 2661                seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column())
 2662            else:
 2663                opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False)
 2664                if opt:
 2665                    options.append(opt)
 2666                else:
 2667                    break
 2668
 2669        seq.set("options", options if options else None)
 2670        return None if self._index == index else seq
 2671
 2672    def _parse_trigger_events(self) -> list[exp.TriggerEvent]:
 2673        events = []
 2674
 2675        while True:
 2676            event_type = self._match_set(self.TRIGGER_EVENTS) and self._prev.text.upper()
 2677
 2678            if not event_type:
 2679                self.raise_error("Expected trigger event (INSERT, UPDATE, DELETE, TRUNCATE)")
 2680
 2681            columns = (
 2682                self._parse_csv(self._parse_column)
 2683                if event_type == "UPDATE" and self._match_text_seq("OF")
 2684                else None
 2685            )
 2686
 2687            events.append(self.expression(exp.TriggerEvent(this=event_type, columns=columns)))
 2688
 2689            if not self._match(TokenType.OR):
 2690                break
 2691
 2692        return events
 2693
 2694    def _parse_trigger_deferrable(
 2695        self,
 2696    ) -> tuple[str | None, str | None]:
 2697        deferrable_var = self._parse_var_from_options(
 2698            self.TRIGGER_DEFERRABLE, raise_unmatched=False
 2699        )
 2700        deferrable = deferrable_var.this if deferrable_var else None
 2701
 2702        initially = None
 2703        if deferrable and self._match_text_seq("INITIALLY"):
 2704            initially = (
 2705                self._prev.text.upper() if self._match_texts(("IMMEDIATE", "DEFERRED")) else None
 2706            )
 2707
 2708        return deferrable, initially
 2709
 2710    def _parse_trigger_referencing_clause(self, keyword: str) -> exp.Expr | None:
 2711        if not self._match_text_seq(keyword):
 2712            return None
 2713        if not self._match_text_seq("TABLE"):
 2714            self.raise_error(f"Expected TABLE after {keyword} in REFERENCING clause")
 2715        self._match_text_seq("AS")
 2716        return self._parse_id_var()
 2717
 2718    def _parse_trigger_referencing(self) -> exp.TriggerReferencing | None:
 2719        if not self._match_text_seq("REFERENCING"):
 2720            return None
 2721
 2722        old_alias = None
 2723        new_alias = None
 2724
 2725        while True:
 2726            if alias := self._parse_trigger_referencing_clause("OLD"):
 2727                if old_alias is not None:
 2728                    self.raise_error("Duplicate OLD clause in REFERENCING")
 2729                old_alias = alias
 2730            elif alias := self._parse_trigger_referencing_clause("NEW"):
 2731                if new_alias is not None:
 2732                    self.raise_error("Duplicate NEW clause in REFERENCING")
 2733                new_alias = alias
 2734            else:
 2735                break
 2736
 2737        if old_alias is None and new_alias is None:
 2738            self.raise_error("REFERENCING clause requires at least OLD TABLE or NEW TABLE")
 2739
 2740        return self.expression(exp.TriggerReferencing(old=old_alias, new=new_alias))
 2741
 2742    def _parse_trigger_for_each(self) -> str | None:
 2743        if not self._match_text_seq("FOR", "EACH"):
 2744            return None
 2745
 2746        return self._prev.text.upper() if self._match_texts(("ROW", "STATEMENT")) else None
 2747
 2748    def _parse_trigger_execute(self) -> exp.TriggerExecute | None:
 2749        if not self._match(TokenType.EXECUTE):
 2750            return None
 2751
 2752        if not self._match_set((TokenType.FUNCTION, TokenType.PROCEDURE)):
 2753            self.raise_error("Expected FUNCTION or PROCEDURE after EXECUTE")
 2754
 2755        func_call = self._parse_column()
 2756        return self.expression(exp.TriggerExecute(this=func_call))
 2757
 2758    def _parse_property_before(self) -> exp.Expr | list[exp.Expr] | None:
 2759        # only used for teradata currently
 2760        self._match(TokenType.COMMA)
 2761
 2762        kwargs = {
 2763            "no": self._match_text_seq("NO"),
 2764            "dual": self._match_text_seq("DUAL"),
 2765            "before": self._match_text_seq("BEFORE"),
 2766            "default": self._match_text_seq("DEFAULT"),
 2767            "local": (self._match_text_seq("LOCAL") and "LOCAL")
 2768            or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"),
 2769            "after": self._match_text_seq("AFTER"),
 2770            "minimum": self._match_texts(("MIN", "MINIMUM")),
 2771            "maximum": self._match_texts(("MAX", "MAXIMUM")),
 2772        }
 2773
 2774        if self._match_texts(self.PROPERTY_PARSERS):
 2775            parser = self.PROPERTY_PARSERS[self._prev.text.upper()]
 2776            try:
 2777                return parser(self, **{k: v for k, v in kwargs.items() if v})
 2778            except TypeError:
 2779                self.raise_error(f"Cannot parse property '{self._prev.text}'")
 2780
 2781        return None
 2782
 2783    def _parse_wrapped_properties(self) -> list[exp.Expr | list[exp.Expr]]:
 2784        return self._parse_wrapped_csv(self._parse_property)
 2785
 2786    def _parse_property(self) -> exp.Expr | list[exp.Expr] | None:
 2787        if self._match_texts(self.PROPERTY_PARSERS):
 2788            return self.PROPERTY_PARSERS[self._prev.text.upper()](self)
 2789
 2790        if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
 2791            return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
 2792
 2793        if self._match_text_seq("COMPOUND", "SORTKEY"):
 2794            return self._parse_sortkey(compound=True)
 2795
 2796        if self._match_text_seq("PARAMETER", "STYLE", "PANDAS"):
 2797            return self.expression(exp.ParameterStyleProperty(this="PANDAS"))
 2798
 2799        index = self._index
 2800
 2801        seq_props = self._parse_sequence_properties()
 2802        if seq_props:
 2803            return seq_props
 2804
 2805        self._retreat(index)
 2806        key = self._parse_column()
 2807
 2808        if not self._match(TokenType.EQ):
 2809            self._retreat(index)
 2810            return None
 2811
 2812        # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise
 2813        if isinstance(key, exp.Column):
 2814            key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name)
 2815
 2816        value = self._parse_bitwise() or self._parse_var(any_token=True)
 2817
 2818        # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier())
 2819        if isinstance(value, exp.Column):
 2820            value = exp.var(value.name)
 2821
 2822        return self.expression(exp.Property(this=key, value=value))
 2823
 2824    def _parse_stored(self) -> exp.FileFormatProperty | exp.StorageHandlerProperty:
 2825        if self._match_text_seq("BY"):
 2826            return self.expression(exp.StorageHandlerProperty(this=self._parse_var_or_string()))
 2827
 2828        self._match(TokenType.ALIAS)
 2829        input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None
 2830        output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None
 2831
 2832        return self.expression(
 2833            exp.FileFormatProperty(
 2834                this=(
 2835                    self.expression(
 2836                        exp.InputOutputFormat(
 2837                            input_format=input_format, output_format=output_format
 2838                        )
 2839                    )
 2840                    if input_format or output_format
 2841                    else self._parse_var_or_string() or self._parse_number() or self._parse_id_var()
 2842                ),
 2843                hive_format=True,
 2844            )
 2845        )
 2846
 2847    def _parse_unquoted_field(self) -> exp.Expr | None:
 2848        field = self._parse_field()
 2849        if isinstance(field, exp.Identifier) and not field.quoted:
 2850            field = exp.var(field)
 2851
 2852        return field
 2853
 2854    def _parse_property_assignment(self, exp_class: type[E], **kwargs: t.Any) -> E:
 2855        self._match(TokenType.EQ)
 2856        self._match(TokenType.ALIAS)
 2857
 2858        return self.expression(exp_class(this=self._parse_unquoted_field(), **kwargs))
 2859
 2860    def _parse_properties(self, before: bool | None = None) -> exp.Properties | None:
 2861        properties = []
 2862        while True:
 2863            if before:
 2864                prop = self._parse_property_before()
 2865            else:
 2866                prop = self._parse_property()
 2867            if not prop:
 2868                break
 2869            for p in ensure_list(prop):
 2870                properties.append(p)
 2871
 2872        if properties:
 2873            return self.expression(exp.Properties(expressions=properties))
 2874
 2875        return None
 2876
 2877    def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty:
 2878        return self.expression(
 2879            exp.FallbackProperty(no=no, protection=self._match_text_seq("PROTECTION"))
 2880        )
 2881
 2882    def _parse_sql_security(self) -> exp.SqlSecurityProperty:
 2883        return self.expression(
 2884            exp.SqlSecurityProperty(
 2885                this=self._match_texts(self.SECURITY_PROPERTY_KEYWORDS) and self._prev.text.upper()
 2886            )
 2887        )
 2888
 2889    def _parse_settings_property(self) -> exp.SettingsProperty:
 2890        return self.expression(
 2891            exp.SettingsProperty(expressions=self._parse_csv(self._parse_assignment))
 2892        )
 2893
 2894    def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty:
 2895        if self._index >= 2:
 2896            pre_volatile_token = self._tokens[self._index - 2]
 2897        else:
 2898            pre_volatile_token = None
 2899
 2900        if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS:
 2901            return exp.VolatileProperty()
 2902
 2903        return self.expression(exp.StabilityProperty(this=exp.Literal.string("VOLATILE")))
 2904
 2905    def _parse_retention_period(self) -> exp.Var:
 2906        # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...}
 2907        number = self._parse_number()
 2908        number_str = f"{number} " if number else ""
 2909        unit = self._parse_var(any_token=True)
 2910        return exp.var(f"{number_str}{unit}")
 2911
 2912    def _parse_system_versioning_property(
 2913        self, with_: bool = False
 2914    ) -> exp.WithSystemVersioningProperty:
 2915        self._match(TokenType.EQ)
 2916        prop = self.expression(exp.WithSystemVersioningProperty(on=True, with_=with_))
 2917
 2918        if self._match_text_seq("OFF"):
 2919            prop.set("on", False)
 2920            return prop
 2921
 2922        self._match(TokenType.ON)
 2923        if self._match(TokenType.L_PAREN):
 2924            while self._curr and not self._match(TokenType.R_PAREN):
 2925                if self._match_text_seq("HISTORY_TABLE", "="):
 2926                    prop.set("this", self._parse_table_parts())
 2927                elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="):
 2928                    prop.set("data_consistency", self._advance_any() and self._prev.text.upper())
 2929                elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="):
 2930                    prop.set("retention_period", self._parse_retention_period())
 2931
 2932                self._match(TokenType.COMMA)
 2933
 2934        return prop
 2935
 2936    def _parse_data_deletion_property(self) -> exp.DataDeletionProperty:
 2937        self._match(TokenType.EQ)
 2938        on = self._match_text_seq("ON") or not self._match_text_seq("OFF")
 2939        prop = self.expression(exp.DataDeletionProperty(on=on))
 2940
 2941        if self._match(TokenType.L_PAREN):
 2942            while self._curr and not self._match(TokenType.R_PAREN):
 2943                if self._match_text_seq("FILTER_COLUMN", "="):
 2944                    prop.set("filter_column", self._parse_column())
 2945                elif self._match_text_seq("RETENTION_PERIOD", "="):
 2946                    prop.set("retention_period", self._parse_retention_period())
 2947
 2948                self._match(TokenType.COMMA)
 2949
 2950        return prop
 2951
 2952    def _parse_distributed_property(self) -> exp.DistributedByProperty:
 2953        kind = "HASH"
 2954        expressions: list[exp.Expr] | None = None
 2955        if self._match_text_seq("BY", "HASH"):
 2956            expressions = self._parse_wrapped_csv(self._parse_id_var)
 2957        elif self._match_text_seq("BY", "RANDOM"):
 2958            kind = "RANDOM"
 2959
 2960        # If the BUCKETS keyword is not present, the number of buckets is AUTO
 2961        buckets: exp.Expr | None = None
 2962        if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"):
 2963            buckets = self._parse_number()
 2964
 2965        return self.expression(
 2966            exp.DistributedByProperty(
 2967                expressions=expressions, kind=kind, buckets=buckets, order=self._parse_order()
 2968            )
 2969        )
 2970
 2971    def _parse_composite_key_property(self, expr_type: type[E]) -> E:
 2972        self._match_text_seq("KEY")
 2973        expressions = self._parse_wrapped_id_vars()
 2974        return self.expression(expr_type(expressions=expressions))
 2975
 2976    def _parse_with_property(self) -> exp.Expr | None | list[exp.Expr]:
 2977        if self._match_text_seq("(", "SYSTEM_VERSIONING"):
 2978            prop = self._parse_system_versioning_property(with_=True)
 2979            self._match_r_paren()
 2980            return prop
 2981
 2982        if self._match(TokenType.L_PAREN, advance=False):
 2983            result: list[exp.Expr] = []
 2984            for i in self._parse_wrapped_properties():
 2985                result.extend(i) if isinstance(i, list) else result.append(i)
 2986            return result
 2987
 2988        if self._match_text_seq("JOURNAL"):
 2989            return self._parse_withjournaltable()
 2990
 2991        if self._match_texts(self.VIEW_ATTRIBUTES):
 2992            return self.expression(exp.ViewAttributeProperty(this=self._prev.text.upper()))
 2993
 2994        if self._match_text_seq("DATA"):
 2995            return self._parse_withdata(no=False)
 2996        elif self._match_text_seq("NO", "DATA"):
 2997            return self._parse_withdata(no=True)
 2998
 2999        if self._match(TokenType.SERDE_PROPERTIES, advance=False):
 3000            return self._parse_serde_properties(with_=True)
 3001
 3002        if self._match(TokenType.SCHEMA):
 3003            return self.expression(
 3004                exp.WithSchemaBindingProperty(
 3005                    this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS)
 3006                )
 3007            )
 3008
 3009        if self._match_texts(self.PROCEDURE_OPTIONS, advance=False):
 3010            return self.expression(
 3011                exp.WithProcedureOptions(expressions=self._parse_csv(self._parse_procedure_option))
 3012            )
 3013
 3014        if not self._next:
 3015            return None
 3016
 3017        return self._parse_withisolatedloading()
 3018
 3019    def _parse_procedure_option(self) -> exp.Expr | None:
 3020        if self._match_text_seq("EXECUTE", "AS"):
 3021            return self.expression(
 3022                exp.ExecuteAsProperty(
 3023                    this=self._parse_var_from_options(
 3024                        self.EXECUTE_AS_OPTIONS, raise_unmatched=False
 3025                    )
 3026                    or self._parse_string()
 3027                )
 3028            )
 3029
 3030        return self._parse_var_from_options(self.PROCEDURE_OPTIONS)
 3031
 3032    # https://dev.mysql.com/doc/refman/8.0/en/create-view.html
 3033    def _parse_definer(self) -> exp.DefinerProperty | None:
 3034        self._match(TokenType.EQ)
 3035
 3036        user = self._parse_id_var()
 3037        self._match(TokenType.PARAMETER)
 3038        host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text)
 3039
 3040        if not user or not host:
 3041            return None
 3042
 3043        return exp.DefinerProperty(this=f"{user}@{host}")
 3044
 3045    def _parse_withjournaltable(self) -> exp.WithJournalTableProperty:
 3046        self._match(TokenType.TABLE)
 3047        self._match(TokenType.EQ)
 3048        return self.expression(exp.WithJournalTableProperty(this=self._parse_table_parts()))
 3049
 3050    def _parse_log(self, no: bool = False) -> exp.LogProperty:
 3051        return self.expression(exp.LogProperty(no=no))
 3052
 3053    def _parse_journal(self, **kwargs) -> exp.JournalProperty:
 3054        return self.expression(exp.JournalProperty(**kwargs))
 3055
 3056    def _parse_checksum(self) -> exp.ChecksumProperty:
 3057        self._match(TokenType.EQ)
 3058
 3059        on = None
 3060        if self._match(TokenType.ON):
 3061            on = True
 3062        elif self._match_text_seq("OFF"):
 3063            on = False
 3064
 3065        return self.expression(exp.ChecksumProperty(on=on, default=self._match(TokenType.DEFAULT)))
 3066
 3067    def _parse_cluster(self) -> exp.Cluster:
 3068        self._match(TokenType.CLUSTER_BY)
 3069        return self.expression(
 3070            exp.Cluster(
 3071                expressions=self._parse_csv(self._parse_column),
 3072            )
 3073        )
 3074
 3075    def _parse_cluster_property(self) -> exp.ClusterProperty:
 3076        return self.expression(
 3077            exp.ClusterProperty(
 3078                expressions=self._parse_wrapped_csv(self._parse_column),
 3079            )
 3080        )
 3081
 3082    def _parse_clustered_by(self) -> exp.ClusteredByProperty:
 3083        self._match_text_seq("BY")
 3084
 3085        self._match_l_paren()
 3086        expressions = self._parse_csv(self._parse_column)
 3087        self._match_r_paren()
 3088
 3089        if self._match_text_seq("SORTED", "BY"):
 3090            self._match_l_paren()
 3091            sorted_by = self._parse_csv(self._parse_ordered)
 3092            self._match_r_paren()
 3093        else:
 3094            sorted_by = None
 3095
 3096        self._match(TokenType.INTO)
 3097        buckets = self._parse_number()
 3098        self._match_text_seq("BUCKETS")
 3099
 3100        return self.expression(
 3101            exp.ClusteredByProperty(expressions=expressions, sorted_by=sorted_by, buckets=buckets)
 3102        )
 3103
 3104    def _parse_copy_property(self) -> exp.CopyGrantsProperty | None:
 3105        if not self._match_text_seq("GRANTS"):
 3106            self._retreat(self._index - 1)
 3107            return None
 3108
 3109        return self.expression(exp.CopyGrantsProperty())
 3110
 3111    def _parse_freespace(self) -> exp.FreespaceProperty:
 3112        self._match(TokenType.EQ)
 3113        return self.expression(
 3114            exp.FreespaceProperty(this=self._parse_number(), percent=self._match(TokenType.PERCENT))
 3115        )
 3116
 3117    def _parse_mergeblockratio(
 3118        self, no: bool = False, default: bool = False
 3119    ) -> exp.MergeBlockRatioProperty:
 3120        if self._match(TokenType.EQ):
 3121            return self.expression(
 3122                exp.MergeBlockRatioProperty(
 3123                    this=self._parse_number(), percent=self._match(TokenType.PERCENT)
 3124                )
 3125            )
 3126
 3127        return self.expression(exp.MergeBlockRatioProperty(no=no, default=default))
 3128
 3129    def _parse_datablocksize(
 3130        self,
 3131        default: bool | None = None,
 3132        minimum: bool | None = None,
 3133        maximum: bool | None = None,
 3134    ) -> exp.DataBlocksizeProperty:
 3135        self._match(TokenType.EQ)
 3136        size = self._parse_number()
 3137
 3138        units = None
 3139        if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")):
 3140            units = self._prev.text
 3141
 3142        return self.expression(
 3143            exp.DataBlocksizeProperty(
 3144                size=size, units=units, default=default, minimum=minimum, maximum=maximum
 3145            )
 3146        )
 3147
 3148    def _parse_blockcompression(self) -> exp.BlockCompressionProperty:
 3149        self._match(TokenType.EQ)
 3150        always = self._match_text_seq("ALWAYS")
 3151        manual = self._match_text_seq("MANUAL")
 3152        never = self._match_text_seq("NEVER")
 3153        default = self._match_text_seq("DEFAULT")
 3154
 3155        autotemp = None
 3156        if self._match_text_seq("AUTOTEMP"):
 3157            autotemp = self._parse_schema()
 3158
 3159        return self.expression(
 3160            exp.BlockCompressionProperty(
 3161                always=always, manual=manual, never=never, default=default, autotemp=autotemp
 3162            )
 3163        )
 3164
 3165    def _parse_withisolatedloading(self) -> exp.IsolatedLoadingProperty | None:
 3166        index = self._index
 3167        no = self._match_text_seq("NO")
 3168        concurrent = self._match_text_seq("CONCURRENT")
 3169
 3170        if not self._match_text_seq("ISOLATED", "LOADING"):
 3171            self._retreat(index)
 3172            return None
 3173
 3174        target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False)
 3175        return self.expression(
 3176            exp.IsolatedLoadingProperty(no=no, concurrent=concurrent, target=target)
 3177        )
 3178
 3179    def _parse_locking(self) -> exp.LockingProperty:
 3180        if self._match(TokenType.TABLE):
 3181            kind = "TABLE"
 3182        elif self._match(TokenType.VIEW):
 3183            kind = "VIEW"
 3184        elif self._match(TokenType.ROW):
 3185            kind = "ROW"
 3186        elif self._match_text_seq("DATABASE"):
 3187            kind = "DATABASE"
 3188        else:
 3189            kind = None
 3190
 3191        if kind in ("DATABASE", "TABLE", "VIEW"):
 3192            this = self._parse_table_parts()
 3193        else:
 3194            this = None
 3195
 3196        if self._match(TokenType.FOR):
 3197            for_or_in = "FOR"
 3198        elif self._match(TokenType.IN):
 3199            for_or_in = "IN"
 3200        else:
 3201            for_or_in = None
 3202
 3203        if self._match_text_seq("ACCESS"):
 3204            lock_type = "ACCESS"
 3205        elif self._match_texts(("EXCL", "EXCLUSIVE")):
 3206            lock_type = "EXCLUSIVE"
 3207        elif self._match_text_seq("SHARE"):
 3208            lock_type = "SHARE"
 3209        elif self._match_text_seq("READ"):
 3210            lock_type = "READ"
 3211        elif self._match_text_seq("WRITE"):
 3212            lock_type = "WRITE"
 3213        elif self._match_text_seq("CHECKSUM"):
 3214            lock_type = "CHECKSUM"
 3215        else:
 3216            lock_type = None
 3217
 3218        override = self._match_text_seq("OVERRIDE")
 3219
 3220        return self.expression(
 3221            exp.LockingProperty(
 3222                this=this, kind=kind, for_or_in=for_or_in, lock_type=lock_type, override=override
 3223            )
 3224        )
 3225
 3226    def _parse_partition_by(self) -> list[exp.Expr]:
 3227        if self._match(TokenType.PARTITION_BY):
 3228            return self._parse_csv(self._parse_disjunction)
 3229        return []
 3230
 3231    def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec:
 3232        def _parse_partition_bound_expr() -> exp.Expr | None:
 3233            if self._match_text_seq("MINVALUE"):
 3234                return exp.var("MINVALUE")
 3235            if self._match_text_seq("MAXVALUE"):
 3236                return exp.var("MAXVALUE")
 3237            return self._parse_bitwise()
 3238
 3239        this: exp.Expr | list[exp.Expr] | None = None
 3240        expression = None
 3241        from_expressions = None
 3242        to_expressions = None
 3243
 3244        if self._match(TokenType.IN):
 3245            this = self._parse_wrapped_csv(self._parse_bitwise)
 3246        elif self._match(TokenType.FROM):
 3247            from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
 3248            self._match_text_seq("TO")
 3249            to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr)
 3250        elif self._match_text_seq("WITH", "(", "MODULUS"):
 3251            this = self._parse_number()
 3252            self._match_text_seq(",", "REMAINDER")
 3253            expression = self._parse_number()
 3254            self._match_r_paren()
 3255        else:
 3256            self.raise_error("Failed to parse partition bound spec.")
 3257
 3258        return self.expression(
 3259            exp.PartitionBoundSpec(
 3260                this=this,
 3261                expression=expression,
 3262                from_expressions=from_expressions,
 3263                to_expressions=to_expressions,
 3264            )
 3265        )
 3266
 3267    # https://www.postgresql.org/docs/current/sql-createtable.html
 3268    def _parse_partitioned_of(self) -> exp.PartitionedOfProperty | None:
 3269        if not self._match_text_seq("OF"):
 3270            self._retreat(self._index - 1)
 3271            return None
 3272
 3273        this = self._parse_table(schema=True)
 3274
 3275        if self._match(TokenType.DEFAULT):
 3276            expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT")
 3277        elif self._match_text_seq("FOR", "VALUES"):
 3278            expression = self._parse_partition_bound_spec()
 3279        else:
 3280            self.raise_error("Expecting either DEFAULT or FOR VALUES clause.")
 3281
 3282        return self.expression(exp.PartitionedOfProperty(this=this, expression=expression))
 3283
 3284    def _parse_partitioned_by(self) -> exp.PartitionedByProperty:
 3285        self._match(TokenType.EQ)
 3286        return self.expression(
 3287            exp.PartitionedByProperty(
 3288                this=self._parse_schema() or self._parse_bracket(self._parse_field())
 3289            )
 3290        )
 3291
 3292    def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty:
 3293        if self._match_text_seq("AND", "STATISTICS"):
 3294            statistics = True
 3295        elif self._match_text_seq("AND", "NO", "STATISTICS"):
 3296            statistics = False
 3297        else:
 3298            statistics = None
 3299
 3300        return self.expression(exp.WithDataProperty(no=no, statistics=statistics))
 3301
 3302    def _parse_contains_property(self) -> exp.SqlReadWriteProperty | None:
 3303        if self._match_text_seq("SQL"):
 3304            return self.expression(exp.SqlReadWriteProperty(this="CONTAINS SQL"))
 3305        return None
 3306
 3307    def _parse_modifies_property(self) -> exp.SqlReadWriteProperty | None:
 3308        if self._match_text_seq("SQL", "DATA"):
 3309            return self.expression(exp.SqlReadWriteProperty(this="MODIFIES SQL DATA"))
 3310        return None
 3311
 3312    def _parse_no_property(self) -> exp.Expr | None:
 3313        if self._match_text_seq("PRIMARY", "INDEX"):
 3314            return exp.NoPrimaryIndexProperty()
 3315        if self._match_text_seq("SQL"):
 3316            return self.expression(exp.SqlReadWriteProperty(this="NO SQL"))
 3317        return None
 3318
 3319    def _parse_on_property(self) -> exp.Expr | None:
 3320        if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"):
 3321            return exp.OnCommitProperty()
 3322        if self._match_text_seq("COMMIT", "DELETE", "ROWS"):
 3323            return exp.OnCommitProperty(delete=True)
 3324        return self.expression(exp.OnProperty(this=self._parse_schema(self._parse_id_var())))
 3325
 3326    def _parse_reads_property(self) -> exp.SqlReadWriteProperty | None:
 3327        if self._match_text_seq("SQL", "DATA"):
 3328            return self.expression(exp.SqlReadWriteProperty(this="READS SQL DATA"))
 3329        return None
 3330
 3331    def _parse_distkey(self) -> exp.DistKeyProperty:
 3332        return self.expression(exp.DistKeyProperty(this=self._parse_wrapped(self._parse_id_var)))
 3333
 3334    def _parse_create_like(self) -> exp.LikeProperty | None:
 3335        table = self._parse_table(schema=True)
 3336
 3337        options = []
 3338        while self._match_texts(("INCLUDING", "EXCLUDING")):
 3339            this = self._prev.text.upper()
 3340
 3341            id_var = self._parse_id_var()
 3342            if not id_var:
 3343                return None
 3344
 3345            options.append(
 3346                self.expression(exp.Property(this=this, value=exp.var(id_var.this.upper())))
 3347            )
 3348
 3349        return self.expression(exp.LikeProperty(this=table, expressions=options))
 3350
 3351    def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty:
 3352        return self.expression(
 3353            exp.SortKeyProperty(this=self._parse_wrapped_id_vars(), compound=compound)
 3354        )
 3355
 3356    def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty:
 3357        self._match(TokenType.EQ)
 3358        return self.expression(
 3359            exp.CharacterSetProperty(this=self._parse_var_or_string(), default=default)
 3360        )
 3361
 3362    def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty:
 3363        self._match_text_seq("WITH", "CONNECTION")
 3364        return self.expression(
 3365            exp.RemoteWithConnectionModelProperty(this=self._parse_table_parts())
 3366        )
 3367
 3368    def _parse_returns(self) -> exp.ReturnsProperty:
 3369        value: exp.Expr | None
 3370        null = None
 3371        is_table = self._match(TokenType.TABLE)
 3372
 3373        if is_table:
 3374            if self._match(TokenType.LT):
 3375                value = self.expression(
 3376                    exp.Schema(this="TABLE", expressions=self._parse_csv(self._parse_struct_types))
 3377                )
 3378                if not self._match(TokenType.GT):
 3379                    self.raise_error("Expecting >")
 3380            else:
 3381                value = self._parse_schema(exp.var("TABLE"))
 3382        elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"):
 3383            null = True
 3384            value = None
 3385        else:
 3386            value = self._parse_types()
 3387
 3388        return self.expression(exp.ReturnsProperty(this=value, is_table=is_table, null=null))
 3389
 3390    def _parse_describe(self) -> exp.Describe:
 3391        kind = self._prev.text if self._match_set(self.CREATABLES) else None
 3392        style: str | None = (
 3393            self._prev.text.upper() if self._match_texts(self.DESCRIBE_STYLES) else None
 3394        )
 3395        if self._match(TokenType.DOT):
 3396            style = None
 3397            self._retreat(self._index - 2)
 3398
 3399        format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None
 3400
 3401        if self._match_set(self.STATEMENT_PARSERS, advance=False):
 3402            this = self._parse_statement()
 3403        else:
 3404            this = self._parse_table(schema=True)
 3405
 3406        properties = self._parse_properties()
 3407        expressions = properties.expressions if properties else None
 3408        partition = self._parse_partition()
 3409        return self.expression(
 3410            exp.Describe(
 3411                this=this,
 3412                style=style,
 3413                kind=kind,
 3414                expressions=expressions,
 3415                partition=partition,
 3416                format=format,
 3417                as_json=self._match_text_seq("AS", "JSON"),
 3418            )
 3419        )
 3420
 3421    def _parse_multitable_inserts(self, comments: list[str] | None) -> exp.MultitableInserts:
 3422        kind = self._prev.text.upper()
 3423        expressions = []
 3424
 3425        def parse_conditional_insert() -> exp.ConditionalInsert | None:
 3426            if self._match(TokenType.WHEN):
 3427                expression = self._parse_disjunction()
 3428                self._match(TokenType.THEN)
 3429            else:
 3430                expression = None
 3431
 3432            else_ = self._match(TokenType.ELSE)
 3433
 3434            if not self._match(TokenType.INTO):
 3435                return None
 3436
 3437            return self.expression(
 3438                exp.ConditionalInsert(
 3439                    this=self.expression(
 3440                        exp.Insert(
 3441                            this=self._parse_table(schema=True),
 3442                            expression=self._parse_derived_table_values(),
 3443                        )
 3444                    ),
 3445                    expression=expression,
 3446                    else_=else_,
 3447                )
 3448            )
 3449
 3450        expression = parse_conditional_insert()
 3451        while expression is not None:
 3452            expressions.append(expression)
 3453            expression = parse_conditional_insert()
 3454
 3455        return self.expression(
 3456            exp.MultitableInserts(kind=kind, expressions=expressions, source=self._parse_table()),
 3457            comments=comments,
 3458        )
 3459
 3460    def _parse_insert(self) -> exp.Insert | exp.MultitableInserts:
 3461        comments: list[str] = []
 3462        hint = self._parse_hint()
 3463        overwrite = self._match(TokenType.OVERWRITE)
 3464        ignore = self._match(TokenType.IGNORE)
 3465        local = self._match_text_seq("LOCAL")
 3466        alternative = None
 3467        is_function = None
 3468
 3469        if self._match_text_seq("DIRECTORY"):
 3470            this: exp.Expr | None = self.expression(
 3471                exp.Directory(
 3472                    this=self._parse_var_or_string(),
 3473                    local=local,
 3474                    row_format=self._parse_row_format(match_row=True),
 3475                )
 3476            )
 3477        else:
 3478            if self._match_set((TokenType.FIRST, TokenType.ALL)):
 3479                comments += ensure_list(self._prev_comments)
 3480                return self._parse_multitable_inserts(comments)
 3481
 3482            if self._match(TokenType.OR):
 3483                alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text
 3484
 3485            self._match(TokenType.INTO)
 3486            comments += ensure_list(self._prev_comments)
 3487            self._match(TokenType.TABLE)
 3488            is_function = self._match(TokenType.FUNCTION)
 3489
 3490            this = self._parse_function() if is_function else self._parse_insert_table()
 3491
 3492        returning = self._parse_returning()  # TSQL allows RETURNING before source
 3493
 3494        return self.expression(
 3495            exp.Insert(
 3496                hint=hint,
 3497                is_function=is_function,
 3498                this=this,
 3499                stored=self._match_text_seq("STORED") and self._parse_stored(),
 3500                by_name=self._match_text_seq("BY", "NAME"),
 3501                exists=self._parse_exists(),
 3502                where=self._match_pair(TokenType.REPLACE, TokenType.WHERE)
 3503                and self._parse_disjunction(),
 3504                partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(),
 3505                settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(),
 3506                default=self._match_text_seq("DEFAULT", "VALUES"),
 3507                expression=self._parse_derived_table_values() or self._parse_ddl_select(),
 3508                conflict=self._parse_on_conflict(),
 3509                returning=returning or self._parse_returning(),
 3510                overwrite=overwrite,
 3511                alternative=alternative,
 3512                ignore=ignore,
 3513                source=self._match(TokenType.TABLE) and self._parse_table(),
 3514            ),
 3515            comments=comments,
 3516        )
 3517
 3518    def _parse_insert_table(self) -> exp.Expr | None:
 3519        this = self._parse_table(schema=True, parse_partition=True)
 3520        if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False):
 3521            this.set("alias", self._parse_table_alias())
 3522        return this
 3523
 3524    def _parse_kill(self) -> exp.Kill:
 3525        kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None
 3526
 3527        return self.expression(exp.Kill(this=self._parse_primary(), kind=kind))
 3528
 3529    def _parse_on_conflict(self) -> exp.OnConflict | None:
 3530        conflict = self._match_text_seq("ON", "CONFLICT")
 3531        duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY")
 3532
 3533        if not conflict and not duplicate:
 3534            return None
 3535
 3536        conflict_keys = None
 3537        constraint = None
 3538
 3539        if conflict:
 3540            if self._match_text_seq("ON", "CONSTRAINT"):
 3541                constraint = self._parse_id_var()
 3542            elif self._match(TokenType.L_PAREN):
 3543                conflict_keys = self._parse_csv(self._parse_id_var)
 3544                self._match_r_paren()
 3545
 3546        index_predicate = self._parse_where()
 3547
 3548        action = self._parse_var_from_options(self.CONFLICT_ACTIONS)
 3549        if self._prev.token_type == TokenType.UPDATE:
 3550            self._match(TokenType.SET)
 3551            expressions = self._parse_csv(self._parse_equality)
 3552        else:
 3553            expressions = None
 3554
 3555        return self.expression(
 3556            exp.OnConflict(
 3557                duplicate=duplicate,
 3558                expressions=expressions,
 3559                action=action,
 3560                conflict_keys=conflict_keys,
 3561                index_predicate=index_predicate,
 3562                constraint=constraint,
 3563                where=self._parse_where(),
 3564            )
 3565        )
 3566
 3567    def _parse_returning(self) -> exp.Returning | None:
 3568        if not self._match(TokenType.RETURNING):
 3569            return None
 3570        return self.expression(
 3571            exp.Returning(
 3572                expressions=self._parse_csv(self._parse_expression),
 3573                into=self._match(TokenType.INTO) and self._parse_table_part(),
 3574            )
 3575        )
 3576
 3577    def _parse_row(self) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None:
 3578        if not self._match(TokenType.FORMAT):
 3579            return None
 3580        return self._parse_row_format()
 3581
 3582    def _parse_serde_properties(self, with_: bool = False) -> exp.SerdeProperties | None:
 3583        index = self._index
 3584        with_ = with_ or self._match_text_seq("WITH")
 3585
 3586        if not self._match(TokenType.SERDE_PROPERTIES):
 3587            self._retreat(index)
 3588            return None
 3589        return self.expression(
 3590            exp.SerdeProperties(expressions=self._parse_wrapped_properties(), with_=with_)
 3591        )
 3592
 3593    def _parse_row_format(
 3594        self, match_row: bool = False
 3595    ) -> exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty | None:
 3596        if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT):
 3597            return None
 3598
 3599        if self._match_text_seq("SERDE"):
 3600            this = self._parse_string()
 3601
 3602            serde_properties = self._parse_serde_properties()
 3603
 3604            return self.expression(
 3605                exp.RowFormatSerdeProperty(this=this, serde_properties=serde_properties)
 3606            )
 3607
 3608        self._match_text_seq("DELIMITED")
 3609
 3610        kwargs = {}
 3611
 3612        if self._match_text_seq("FIELDS", "TERMINATED", "BY"):
 3613            kwargs["fields"] = self._parse_string()
 3614            if self._match_text_seq("ESCAPED", "BY"):
 3615                kwargs["escaped"] = self._parse_string()
 3616        if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"):
 3617            kwargs["collection_items"] = self._parse_string()
 3618        if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"):
 3619            kwargs["map_keys"] = self._parse_string()
 3620        if self._match_text_seq("LINES", "TERMINATED", "BY"):
 3621            kwargs["lines"] = self._parse_string()
 3622        if self._match_text_seq("NULL", "DEFINED", "AS"):
 3623            kwargs["null"] = self._parse_string()
 3624
 3625        return self.expression(exp.RowFormatDelimitedProperty(**kwargs))  # type: ignore
 3626
 3627    def _parse_load(self) -> exp.LoadData | exp.Command:
 3628        if self._match_text_seq("DATA"):
 3629            local = self._match_text_seq("LOCAL")
 3630            self._match_text_seq("INPATH")
 3631            inpath = self._parse_string()
 3632            overwrite = self._match(TokenType.OVERWRITE)
 3633            temp: bool | None = None
 3634            if self._match(TokenType.INTO):
 3635                temp = self._match(TokenType.TEMPORARY)
 3636                self._match(TokenType.TABLE)
 3637
 3638            return self.expression(
 3639                exp.LoadData(
 3640                    this=self._parse_table(schema=True),
 3641                    local=local,
 3642                    overwrite=overwrite,
 3643                    temp=temp,
 3644                    inpath=inpath,
 3645                    files=self._match_text_seq("FROM", "FILES")
 3646                    and exp.Properties(expressions=self._parse_wrapped_properties()),
 3647                    partition=self._parse_partition(),
 3648                    input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(),
 3649                    serde=self._match_text_seq("SERDE") and self._parse_string(),
 3650                )
 3651            )
 3652        return self._parse_as_command(self._prev)
 3653
 3654    def _parse_delete(self) -> exp.Delete:
 3655        hint = self._parse_hint()
 3656
 3657        # This handles MySQL's "Multiple-Table Syntax"
 3658        # https://dev.mysql.com/doc/refman/8.0/en/delete.html
 3659        tables = None
 3660        if not self._match(TokenType.FROM, advance=False):
 3661            tables = self._parse_csv(self._parse_table) or None
 3662
 3663        returning = self._parse_returning()
 3664
 3665        return self.expression(
 3666            exp.Delete(
 3667                hint=hint,
 3668                tables=tables,
 3669                this=self._match(TokenType.FROM) and self._parse_table(joins=True),
 3670                using=self._match(TokenType.USING)
 3671                and self._parse_csv(lambda: self._parse_table(joins=True)),
 3672                cluster=self._match(TokenType.ON) and self._parse_on_property(),
 3673                where=self._parse_where(),
 3674                returning=returning or self._parse_returning(),
 3675                order=self._parse_order(),
 3676                limit=self._parse_limit(),
 3677            )
 3678        )
 3679
 3680    def _parse_update(self) -> exp.Update:
 3681        hint = self._parse_hint()
 3682        kwargs: dict[str, object] = {
 3683            "hint": hint,
 3684            "this": self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS),
 3685        }
 3686        while self._curr:
 3687            if self._match(TokenType.SET):
 3688                kwargs["expressions"] = self._parse_csv(self._parse_equality)
 3689            elif self._match(TokenType.RETURNING, advance=False):
 3690                kwargs["returning"] = self._parse_returning()
 3691            elif self._match(TokenType.FROM, advance=False):
 3692                from_ = self._parse_from(joins=True)
 3693                table = from_.this if from_ else None
 3694                if isinstance(table, exp.Subquery) and self._match(TokenType.JOIN, advance=False):
 3695                    table.set("joins", list(self._parse_joins()) or None)
 3696
 3697                kwargs["from_"] = from_
 3698            elif self._match(TokenType.WHERE, advance=False):
 3699                kwargs["where"] = self._parse_where()
 3700            elif self._match(TokenType.ORDER_BY, advance=False):
 3701                kwargs["order"] = self._parse_order()
 3702            elif self._match(TokenType.LIMIT, advance=False):
 3703                kwargs["limit"] = self._parse_limit()
 3704            else:
 3705                break
 3706
 3707        return self.expression(exp.Update(**kwargs))
 3708
 3709    def _parse_use(self) -> exp.Use:
 3710        return self.expression(
 3711            exp.Use(
 3712                kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False),
 3713                this=self._parse_table(schema=False),
 3714            )
 3715        )
 3716
 3717    def _parse_uncache(self) -> exp.Uncache:
 3718        if not self._match(TokenType.TABLE):
 3719            self.raise_error("Expecting TABLE after UNCACHE")
 3720
 3721        return self.expression(
 3722            exp.Uncache(exists=self._parse_exists(), this=self._parse_table(schema=True))
 3723        )
 3724
 3725    def _parse_cache(self) -> exp.Cache:
 3726        lazy = self._match_text_seq("LAZY")
 3727        self._match(TokenType.TABLE)
 3728        table = self._parse_table(schema=True)
 3729
 3730        options = []
 3731        if self._match_text_seq("OPTIONS"):
 3732            self._match_l_paren()
 3733            k = self._parse_string()
 3734            self._match(TokenType.EQ)
 3735            v = self._parse_string()
 3736            options = [k, v]
 3737            self._match_r_paren()
 3738
 3739        self._match(TokenType.ALIAS)
 3740        return self.expression(
 3741            exp.Cache(
 3742                this=table, lazy=lazy, options=options, expression=self._parse_select(nested=True)
 3743            )
 3744        )
 3745
 3746    def _parse_partition(self) -> exp.Partition | None:
 3747        if not self._match_texts(self.PARTITION_KEYWORDS):
 3748            return None
 3749
 3750        return self.expression(
 3751            exp.Partition(
 3752                subpartition=self._prev.text.upper() == "SUBPARTITION",
 3753                expressions=self._parse_wrapped_csv(self._parse_disjunction),
 3754            )
 3755        )
 3756
 3757    def _parse_value(self, values: bool = True) -> exp.Tuple | None:
 3758        def _parse_value_expression() -> exp.Expr | None:
 3759            if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT):
 3760                return exp.var(self._prev.text.upper())
 3761            return self._parse_expression()
 3762
 3763        if self._match(TokenType.L_PAREN):
 3764            expressions = self._parse_csv(_parse_value_expression)
 3765            self._match_r_paren()
 3766            return self.expression(exp.Tuple(expressions=expressions))
 3767
 3768        # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows.
 3769        expression = self._parse_expression()
 3770        if expression:
 3771            return self.expression(exp.Tuple(expressions=[expression]))
 3772        return None
 3773
 3774    def _parse_projections(
 3775        self,
 3776    ) -> tuple[list[exp.Expr], list[exp.Expr] | None]:
 3777        return self._parse_expressions(), None
 3778
 3779    def _parse_wrapped_select(self, table: bool = False) -> exp.Expr | None:
 3780        if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)):
 3781            this: exp.Expr | None = self._parse_simplified_pivot(
 3782                is_unpivot=self._prev.token_type == TokenType.UNPIVOT
 3783            )
 3784        elif self._match(TokenType.FROM):
 3785            from_ = self._parse_from(joins=True, skip_from_token=True, consume_pipe=True)
 3786            # Support parentheses for duckdb FROM-first syntax
 3787            select = self._parse_select(from_=from_)
 3788            if select:
 3789                if not select.args.get("from_"):
 3790                    select.set("from_", from_)
 3791                this = select
 3792            else:
 3793                this = exp.select("*").from_(t.cast(exp.From, from_))
 3794                this = self._parse_query_modifiers(self._parse_set_operations(this))
 3795        else:
 3796            this = (
 3797                self._parse_table(consume_pipe=True)
 3798                if table
 3799                else self._parse_select(nested=True, parse_set_operation=False)
 3800            )
 3801
 3802            # Transform exp.Values into a exp.Table to pass through parse_query_modifiers
 3803            # in case a modifier (e.g. join) is following
 3804            if table and isinstance(this, exp.Values) and this.alias:
 3805                alias = this.args["alias"].pop()
 3806                this = exp.Table(this=this, alias=alias)
 3807
 3808            this = self._parse_query_modifiers(self._parse_set_operations(this))
 3809
 3810        return this
 3811
 3812    def _parse_select(
 3813        self,
 3814        nested: bool = False,
 3815        table: bool = False,
 3816        parse_subquery_alias: bool = True,
 3817        parse_set_operation: bool = True,
 3818        consume_pipe: bool = True,
 3819        from_: exp.From | None = None,
 3820    ) -> exp.Expr | None:
 3821        query = self._parse_select_query(
 3822            nested=nested,
 3823            table=table,
 3824            parse_subquery_alias=parse_subquery_alias,
 3825            parse_set_operation=parse_set_operation,
 3826        )
 3827
 3828        if consume_pipe and self._match(TokenType.PIPE_GT, advance=False):
 3829            if not query and from_:
 3830                query = exp.select("*").from_(from_)
 3831            if isinstance(query, exp.Query):
 3832                query = self._parse_pipe_syntax_query(query)
 3833                query = query.subquery(copy=False) if query and table else query
 3834
 3835        return query
 3836
 3837    def _parse_select_query(
 3838        self,
 3839        nested: bool = False,
 3840        table: bool = False,
 3841        parse_subquery_alias: bool = True,
 3842        parse_set_operation: bool = True,
 3843    ) -> exp.Expr | None:
 3844        cte = self._parse_with()
 3845
 3846        if cte:
 3847            this = self._parse_statement()
 3848
 3849            if not this:
 3850                self.raise_error("Failed to parse any statement following CTE")
 3851                return cte
 3852
 3853            while isinstance(this, exp.Subquery) and this.is_wrapper:
 3854                this = this.this
 3855
 3856            assert this is not None
 3857            if "with_" in this.arg_types:
 3858                if inner_cte := this.args.get("with_"):
 3859                    cte.set("expressions", cte.expressions + inner_cte.expressions)
 3860                    if inner_cte.args.get("recursive"):
 3861                        cte.set("recursive", True)
 3862                this.set("with_", cte)
 3863            else:
 3864                self.raise_error(f"{this.key} does not support CTE")
 3865                this = cte
 3866
 3867            return this
 3868
 3869        # duckdb supports leading with FROM x
 3870        from_ = (
 3871            self._parse_from(joins=True, consume_pipe=True)
 3872            if self._match(TokenType.FROM, advance=False)
 3873            else None
 3874        )
 3875
 3876        if self._match(TokenType.SELECT):
 3877            comments = self._prev_comments
 3878
 3879            hint = self._parse_hint()
 3880
 3881            if self._next and not self._next.token_type == TokenType.DOT:
 3882                all_ = self._match(TokenType.ALL)
 3883                matched_distinct = self._match_set(self.DISTINCT_TOKENS)
 3884            else:
 3885                all_, matched_distinct = None, False
 3886
 3887            kind = (
 3888                self._prev.text.upper()
 3889                if self._match(TokenType.ALIAS) and self._match_texts(("STRUCT", "VALUE"))
 3890                else None
 3891            )
 3892
 3893            distinct: exp.Expr | None = (
 3894                self.expression(
 3895                    exp.Distinct(
 3896                        on=self._parse_value(values=False) if self._match(TokenType.ON) else None
 3897                    )
 3898                )
 3899                if matched_distinct
 3900                else None
 3901            )
 3902
 3903            operation_modifiers = []
 3904            while self._curr and self._match_texts(self.OPERATION_MODIFIERS):
 3905                operation_modifiers.append(exp.var(self._prev.text.upper()))
 3906
 3907            limit = self._parse_limit(top=True)
 3908
 3909            # Some dialects (e.g. Redshift, T-SQL) allow SELECT TOP N DISTINCT ...
 3910            if limit and not matched_distinct and not all_:
 3911                matched_distinct = self._match_set(self.DISTINCT_TOKENS)
 3912                if matched_distinct:
 3913                    distinct = self.expression(
 3914                        exp.Distinct(
 3915                            on=self._parse_value(values=False)
 3916                            if self._match(TokenType.ON)
 3917                            else None
 3918                        )
 3919                    )
 3920                else:
 3921                    all_ = self._match(TokenType.ALL)
 3922
 3923            if all_ and distinct:
 3924                self.raise_error("Cannot specify both ALL and DISTINCT after SELECT")
 3925
 3926            projections, exclude = self._parse_projections()
 3927
 3928            this = self.expression(
 3929                exp.Select(
 3930                    kind=kind,
 3931                    hint=hint,
 3932                    distinct=distinct,
 3933                    expressions=projections,
 3934                    limit=limit,
 3935                    exclude=exclude,
 3936                    operation_modifiers=operation_modifiers or None,
 3937                )
 3938            )
 3939            this.comments = comments
 3940
 3941            into = self._parse_into()
 3942            if into:
 3943                this.set("into", into)
 3944
 3945            if not from_:
 3946                from_ = self._parse_from()
 3947
 3948            if from_:
 3949                this.set("from_", from_)
 3950
 3951            this = self._parse_query_modifiers(this)
 3952        elif (table or nested) and self._match(TokenType.L_PAREN):
 3953            comments = self._prev_comments
 3954            this = self._parse_wrapped_select(table=table)
 3955
 3956            if this:
 3957                this.add_comments(comments, prepend=True)
 3958
 3959            # We return early here so that the UNION isn't attached to the subquery by the
 3960            # following call to _parse_set_operations, but instead becomes the parent node
 3961            self._match_r_paren()
 3962            return self._parse_subquery(this, parse_alias=parse_subquery_alias)
 3963        elif self._match(TokenType.VALUES, advance=False):
 3964            this = self._parse_derived_table_values()
 3965        elif from_:
 3966            this = exp.select("*").from_(from_.this, copy=False)
 3967            this = self._parse_query_modifiers(this)
 3968        elif self._match(TokenType.SUMMARIZE):
 3969            table = self._match(TokenType.TABLE)
 3970            this = self._parse_select() or self._parse_string() or self._parse_table()
 3971            return self.expression(exp.Summarize(this=this, table=table))
 3972        elif self._match(TokenType.DESCRIBE):
 3973            this = self._parse_describe()
 3974        else:
 3975            this = None
 3976
 3977        return self._parse_set_operations(this) if parse_set_operation else this
 3978
 3979    def _parse_recursive_with_search(self) -> exp.RecursiveWithSearch | None:
 3980        self._match_text_seq("SEARCH")
 3981
 3982        kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper()
 3983
 3984        if not kind:
 3985            return None
 3986
 3987        self._match_text_seq("FIRST", "BY")
 3988
 3989        return self.expression(
 3990            exp.RecursiveWithSearch(
 3991                kind=kind,
 3992                this=self._parse_id_var(),
 3993                expression=self._match_text_seq("SET") and self._parse_id_var(),
 3994                using=self._match_text_seq("USING") and self._parse_id_var(),
 3995            )
 3996        )
 3997
 3998    def _parse_with(self, skip_with_token: bool = False) -> exp.With | None:
 3999        if not skip_with_token and not self._match(TokenType.WITH):
 4000            return None
 4001
 4002        comments = self._prev_comments
 4003        recursive = self._match(TokenType.RECURSIVE)
 4004
 4005        last_comments = None
 4006        expressions = []
 4007        while True:
 4008            cte = self._parse_cte()
 4009            if isinstance(cte, exp.CTE):
 4010                expressions.append(cte)
 4011                if last_comments:
 4012                    cte.add_comments(last_comments)
 4013
 4014            if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH):
 4015                break
 4016            else:
 4017                self._match(TokenType.WITH)
 4018
 4019            last_comments = self._prev_comments
 4020
 4021        return self.expression(
 4022            exp.With(
 4023                expressions=expressions,
 4024                recursive=recursive or None,
 4025                search=self._parse_recursive_with_search(),
 4026            ),
 4027            comments=comments,
 4028        )
 4029
 4030    def _parse_cte(self) -> exp.CTE | None:
 4031        index = self._index
 4032
 4033        alias = self._parse_table_alias(self.ID_VAR_TOKENS)
 4034        if not alias or not alias.this:
 4035            self.raise_error("Expected CTE to have alias")
 4036
 4037        key_expressions = (
 4038            self._parse_wrapped_id_vars() if self._match_text_seq("USING", "KEY") else None
 4039        )
 4040
 4041        if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE:
 4042            self._retreat(index)
 4043            return None
 4044
 4045        comments = self._prev_comments
 4046
 4047        if self._match_text_seq("NOT", "MATERIALIZED"):
 4048            materialized = False
 4049        elif self._match_text_seq("MATERIALIZED"):
 4050            materialized = True
 4051        else:
 4052            materialized = None
 4053
 4054        cte = self.expression(
 4055            exp.CTE(
 4056                this=self._parse_wrapped(self._parse_statement),
 4057                alias=alias,
 4058                materialized=materialized,
 4059                key_expressions=key_expressions,
 4060            ),
 4061            comments=comments,
 4062        )
 4063
 4064        values = cte.this
 4065        if isinstance(values, exp.Values):
 4066            if values.alias:
 4067                cte.set("this", exp.select("*").from_(values))
 4068            else:
 4069                cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True)))
 4070
 4071        return cte
 4072
 4073    def _parse_table_alias(
 4074        self, alias_tokens: t.Collection[TokenType] | None = None
 4075    ) -> exp.TableAlias | None:
 4076        # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
 4077        # so this section tries to parse the clause version and if it fails, it treats the token
 4078        # as an identifier (alias)
 4079        if self._can_parse_limit_or_offset():
 4080            return None
 4081
 4082        any_token = self._match(TokenType.ALIAS)
 4083        alias = (
 4084            self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
 4085            or self._parse_string_as_identifier()
 4086        )
 4087
 4088        index = self._index
 4089        if self._match(TokenType.L_PAREN):
 4090            columns = self._parse_csv(self._parse_function_parameter)
 4091            self._match_r_paren() if columns else self._retreat(index)
 4092        else:
 4093            columns = None
 4094
 4095        if not alias and not columns:
 4096            return None
 4097
 4098        table_alias = self.expression(exp.TableAlias(this=alias, columns=columns))
 4099
 4100        # We bubble up comments from the Identifier to the TableAlias
 4101        if isinstance(alias, exp.Identifier):
 4102            table_alias.add_comments(alias.pop_comments())
 4103
 4104        return table_alias
 4105
 4106    def _parse_subquery(
 4107        self, this: exp.Expr | None, parse_alias: bool = True
 4108    ) -> exp.Subquery | None:
 4109        if not this:
 4110            return None
 4111
 4112        return self.expression(
 4113            exp.Subquery(
 4114                this=this,
 4115                pivots=self._parse_pivots(),
 4116                alias=self._parse_table_alias() if parse_alias else None,
 4117                sample=self._parse_table_sample(),
 4118            )
 4119        )
 4120
 4121    def _implicit_unnests_to_explicit(self, this: E) -> E:
 4122        from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm
 4123
 4124        refs = {_norm(this.args["from_"].this.copy(), dialect=self.dialect).alias_or_name}
 4125        for i, join in enumerate(this.args.get("joins") or []):
 4126            table = join.this
 4127            normalized_table = table.copy()
 4128            normalized_table.meta["maybe_column"] = True
 4129            normalized_table = _norm(normalized_table, dialect=self.dialect)
 4130
 4131            if isinstance(table, exp.Table) and not join.args.get("on"):
 4132                if len(normalized_table.parts) > 1 and normalized_table.parts[0].name in refs:
 4133                    table_as_column = table.to_column()
 4134                    unnest = exp.Unnest(expressions=[table_as_column])
 4135
 4136                    # Table.to_column creates a parent Alias node that we want to convert to
 4137                    # a TableAlias and attach to the Unnest, so it matches the parser's output
 4138                    if isinstance(table.args.get("alias"), exp.TableAlias):
 4139                        table_as_column.replace(table_as_column.this)
 4140                        exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False)
 4141
 4142                    table.replace(unnest)
 4143
 4144            refs.add(normalized_table.alias_or_name)
 4145
 4146        return this
 4147
 4148    @t.overload
 4149    def _parse_query_modifiers(self, this: E) -> E: ...
 4150
 4151    @t.overload
 4152    def _parse_query_modifiers(self, this: None) -> None: ...
 4153
 4154    def _parse_query_modifiers(self, this):
 4155        if isinstance(this, self.MODIFIABLES):
 4156            for join in self._parse_joins():
 4157                this.append("joins", join)
 4158            for lateral in iter(self._parse_lateral, None):
 4159                this.append("laterals", lateral)
 4160
 4161            while True:
 4162                if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False):
 4163                    modifier_token = self._curr
 4164                    parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type]
 4165                    key, expression = parser(self)
 4166
 4167                    if expression:
 4168                        if this.args.get(key):
 4169                            self.raise_error(
 4170                                f"Found multiple '{modifier_token.text.upper()}' clauses",
 4171                                token=modifier_token,
 4172                            )
 4173
 4174                        this.set(key, expression)
 4175                        if key == "limit":
 4176                            offset = expression.args.get("offset")
 4177                            expression.set("offset", None)
 4178
 4179                            if offset:
 4180                                offset = exp.Offset(expression=offset)
 4181                                this.set("offset", offset)
 4182
 4183                                limit_by_expressions = expression.expressions
 4184                                expression.set("expressions", None)
 4185                                offset.set("expressions", limit_by_expressions)
 4186                        continue
 4187                break
 4188
 4189        if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from_"):
 4190            this = self._implicit_unnests_to_explicit(this)
 4191
 4192        return this
 4193
 4194    def _parse_hint_fallback_to_string(self) -> exp.Hint | None:
 4195        start = self._curr
 4196        while self._curr:
 4197            self._advance()
 4198
 4199        end = self._tokens[self._index - 1]
 4200        return exp.Hint(expressions=[self._find_sql(start, end)])
 4201
 4202    def _parse_hint_function_call(self) -> exp.Expr | None:
 4203        return self._parse_function_call()
 4204
 4205    def _parse_hint_body(self) -> exp.Hint | None:
 4206        start_index = self._index
 4207        should_fallback_to_string = False
 4208
 4209        hints = []
 4210        try:
 4211            for hint in iter(
 4212                lambda: self._parse_csv(
 4213                    lambda: self._parse_hint_function_call() or self._parse_var(upper=True),
 4214                ),
 4215                [],
 4216            ):
 4217                hints.extend(hint)
 4218        except ParseError:
 4219            should_fallback_to_string = True
 4220
 4221        if should_fallback_to_string or self._curr:
 4222            self._retreat(start_index)
 4223            return self._parse_hint_fallback_to_string()
 4224
 4225        return self.expression(exp.Hint(expressions=hints))
 4226
 4227    def _parse_hint(self) -> exp.Hint | None:
 4228        if self._match(TokenType.HINT) and self._prev_comments:
 4229            return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect)
 4230
 4231        return None
 4232
 4233    def _parse_into(self) -> exp.Into | None:
 4234        if not self._match(TokenType.INTO):
 4235            return None
 4236
 4237        temp = self._match(TokenType.TEMPORARY)
 4238        unlogged = self._match_text_seq("UNLOGGED")
 4239        self._match(TokenType.TABLE)
 4240
 4241        return self.expression(
 4242            exp.Into(this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged)
 4243        )
 4244
 4245    def _parse_from(
 4246        self,
 4247        joins: bool = False,
 4248        skip_from_token: bool = False,
 4249        consume_pipe: bool = False,
 4250    ) -> exp.From | None:
 4251        if not skip_from_token and not self._match(TokenType.FROM):
 4252            return None
 4253
 4254        comments = self._prev_comments
 4255        return self.expression(
 4256            exp.From(this=self._parse_table(joins=joins, consume_pipe=consume_pipe)),
 4257            comments=comments,
 4258        )
 4259
 4260    def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure:
 4261        return self.expression(
 4262            exp.MatchRecognizeMeasure(
 4263                window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(),
 4264                this=self._parse_expression(),
 4265            )
 4266        )
 4267
 4268    def _parse_match_recognize(self) -> exp.MatchRecognize | None:
 4269        if not self._match(TokenType.MATCH_RECOGNIZE):
 4270            return None
 4271
 4272        self._match_l_paren()
 4273
 4274        partition = self._parse_partition_by()
 4275        order = self._parse_order()
 4276
 4277        measures = (
 4278            self._parse_csv(self._parse_match_recognize_measure)
 4279            if self._match_text_seq("MEASURES")
 4280            else None
 4281        )
 4282
 4283        if self._match_text_seq("ONE", "ROW", "PER", "MATCH"):
 4284            rows = exp.var("ONE ROW PER MATCH")
 4285        elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"):
 4286            text = "ALL ROWS PER MATCH"
 4287            if self._match_text_seq("SHOW", "EMPTY", "MATCHES"):
 4288                text += " SHOW EMPTY MATCHES"
 4289            elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"):
 4290                text += " OMIT EMPTY MATCHES"
 4291            elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"):
 4292                text += " WITH UNMATCHED ROWS"
 4293            rows = exp.var(text)
 4294        else:
 4295            rows = None
 4296
 4297        if self._match_text_seq("AFTER", "MATCH", "SKIP"):
 4298            text = "AFTER MATCH SKIP"
 4299            if self._match_text_seq("PAST", "LAST", "ROW"):
 4300                text += " PAST LAST ROW"
 4301            elif self._match_text_seq("TO", "NEXT", "ROW"):
 4302                text += " TO NEXT ROW"
 4303            elif self._match_text_seq("TO", "FIRST"):
 4304                text += f" TO FIRST {self._advance_any().text}"  # type: ignore
 4305            elif self._match_text_seq("TO", "LAST"):
 4306                text += f" TO LAST {self._advance_any().text}"  # type: ignore
 4307            after = exp.var(text)
 4308        else:
 4309            after = None
 4310
 4311        if self._match_text_seq("PATTERN"):
 4312            self._match_l_paren()
 4313
 4314            if not self._curr:
 4315                self.raise_error("Expecting )", self._curr)
 4316
 4317            paren = 1
 4318            start = self._curr
 4319
 4320            while self._curr and paren > 0:
 4321                if self._curr.token_type == TokenType.L_PAREN:
 4322                    paren += 1
 4323                if self._curr.token_type == TokenType.R_PAREN:
 4324                    paren -= 1
 4325
 4326                end = self._prev
 4327                self._advance()
 4328
 4329            if paren > 0:
 4330                self.raise_error("Expecting )", self._curr)
 4331
 4332            pattern = exp.var(self._find_sql(start, end))
 4333        else:
 4334            pattern = None
 4335
 4336        define = (
 4337            self._parse_csv(self._parse_name_as_expression)
 4338            if self._match_text_seq("DEFINE")
 4339            else None
 4340        )
 4341
 4342        self._match_r_paren()
 4343
 4344        return self.expression(
 4345            exp.MatchRecognize(
 4346                partition_by=partition,
 4347                order=order,
 4348                measures=measures,
 4349                rows=rows,
 4350                after=after,
 4351                pattern=pattern,
 4352                define=define,
 4353                alias=self._parse_table_alias(),
 4354            )
 4355        )
 4356
 4357    def _parse_lateral(self) -> exp.Lateral | None:
 4358        cross_apply: bool | None = None
 4359        if self._match_pair(TokenType.CROSS, TokenType.APPLY):
 4360            cross_apply = True
 4361        elif self._match_pair(TokenType.OUTER, TokenType.APPLY):
 4362            cross_apply = False
 4363
 4364        if cross_apply is not None:
 4365            this = self._parse_select(table=True)
 4366            view = None
 4367            outer = None
 4368        elif self._match(TokenType.LATERAL):
 4369            this = self._parse_select(table=True)
 4370            view = self._match(TokenType.VIEW)
 4371            outer = self._match(TokenType.OUTER)
 4372        else:
 4373            return None
 4374
 4375        if not this:
 4376            this = (
 4377                self._parse_unnest()
 4378                or self._parse_function()
 4379                or self._parse_id_var(any_token=False)
 4380            )
 4381
 4382            while self._match(TokenType.DOT):
 4383                this = exp.Dot(
 4384                    this=this,
 4385                    expression=self._parse_function() or self._parse_id_var(any_token=False),
 4386                )
 4387
 4388        ordinality: bool | None = None
 4389
 4390        if view:
 4391            table = self._parse_id_var(any_token=False)
 4392            columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else []
 4393            table_alias: exp.TableAlias | None = self.expression(
 4394                exp.TableAlias(this=table, columns=columns)
 4395            )
 4396        elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias:
 4397            # We move the alias from the lateral's child node to the lateral itself
 4398            table_alias = this.args["alias"].pop()
 4399        else:
 4400            ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
 4401            table_alias = self._parse_table_alias()
 4402
 4403        return self.expression(
 4404            exp.Lateral(
 4405                this=this,
 4406                view=view,
 4407                outer=outer,
 4408                alias=table_alias,
 4409                cross_apply=cross_apply,
 4410                ordinality=ordinality,
 4411            )
 4412        )
 4413
 4414    def _parse_stream(self) -> exp.Stream | None:
 4415        index = self._index
 4416        if self._match(TokenType.STREAM):
 4417            if this := self._try_parse(self._parse_table):
 4418                return self.expression(exp.Stream(this=this))
 4419            self._retreat(index)
 4420        return None
 4421
 4422    def _parse_join_parts(
 4423        self,
 4424    ) -> tuple[Token | None, Token | None, Token | None]:
 4425        return (
 4426            self._prev if self._match_set(self.JOIN_METHODS) else None,
 4427            self._prev if self._match_set(self.JOIN_SIDES) else None,
 4428            self._prev if self._match_set(self.JOIN_KINDS) else None,
 4429        )
 4430
 4431    def _parse_using_identifiers(self) -> list[exp.Expr]:
 4432        def _parse_column_as_identifier() -> exp.Expr | None:
 4433            this = self._parse_column()
 4434            if isinstance(this, exp.Column):
 4435                return this.this
 4436            return this
 4437
 4438        return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True)
 4439
 4440    def _parse_join(
 4441        self,
 4442        skip_join_token: bool = False,
 4443        parse_bracket: bool = False,
 4444        alias_tokens: t.Collection[TokenType] | None = None,
 4445    ) -> exp.Join | None:
 4446        if self._match(TokenType.COMMA):
 4447            table = self._try_parse(lambda: self._parse_table(alias_tokens=alias_tokens))
 4448            cross_join = self.expression(exp.Join(this=table)) if table else None
 4449
 4450            if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE:
 4451                cross_join.set("kind", "CROSS")
 4452
 4453            return cross_join
 4454
 4455        index = self._index
 4456        method, side, kind = self._parse_join_parts()
 4457        directed = self._match_text_seq("DIRECTED")
 4458        hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None
 4459        join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN)
 4460        join_comments = self._prev_comments
 4461
 4462        if not skip_join_token and not join:
 4463            self._retreat(index)
 4464            kind = None
 4465            method = None
 4466            side = None
 4467
 4468        outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False)
 4469        cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False)
 4470
 4471        if not skip_join_token and not join and not outer_apply and not cross_apply:
 4472            return None
 4473
 4474        kwargs: dict[str, t.Any] = {
 4475            "this": self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens)
 4476        }
 4477        if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA):
 4478            kwargs["expressions"] = self._parse_csv(
 4479                lambda: self._parse_table(parse_bracket=parse_bracket, alias_tokens=alias_tokens)
 4480            )
 4481
 4482        if method:
 4483            kwargs["method"] = method.text.upper()
 4484        if side:
 4485            kwargs["side"] = side.text.upper()
 4486        if kind:
 4487            kwargs["kind"] = kind.text.upper()
 4488        if hint:
 4489            kwargs["hint"] = hint
 4490
 4491        if self._match(TokenType.MATCH_CONDITION):
 4492            kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison)
 4493
 4494        if self._match(TokenType.ON):
 4495            kwargs["on"] = self._parse_disjunction()
 4496        elif self._match(TokenType.USING):
 4497            kwargs["using"] = self._parse_using_identifiers()
 4498        elif (
 4499            not method
 4500            and not (outer_apply or cross_apply)
 4501            and not isinstance(kwargs["this"], exp.Unnest)
 4502            and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY))
 4503        ):
 4504            index = self._index
 4505            joins: list | None = list(self._parse_joins(alias_tokens=alias_tokens))
 4506
 4507            if joins and self._match(TokenType.ON):
 4508                kwargs["on"] = self._parse_disjunction()
 4509            elif joins and self._match(TokenType.USING):
 4510                kwargs["using"] = self._parse_using_identifiers()
 4511            else:
 4512                joins = None
 4513                self._retreat(index)
 4514
 4515            kwargs["this"].set("joins", joins if joins else None)
 4516
 4517        kwargs["pivots"] = self._parse_pivots()
 4518
 4519        comments = [c for token in (method, side, kind) if token for c in token.comments]
 4520        comments = (join_comments or []) + comments
 4521
 4522        if (
 4523            self.ADD_JOIN_ON_TRUE
 4524            and not kwargs.get("on")
 4525            and not kwargs.get("using")
 4526            and not kwargs.get("method")
 4527            and kwargs.get("kind") in (None, "INNER", "OUTER")
 4528        ):
 4529            kwargs["on"] = exp.true()
 4530
 4531        if directed:
 4532            kwargs["directed"] = directed
 4533
 4534        return self.expression(exp.Join(**kwargs), comments=comments)
 4535
 4536    def _parse_opclass(self) -> exp.Expr | None:
 4537        this = self._parse_disjunction()
 4538
 4539        if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False):
 4540            return this
 4541
 4542        if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False):
 4543            return self.expression(exp.Opclass(this=this, expression=self._parse_table_parts()))
 4544
 4545        return this
 4546
 4547    def _parse_index_params(self) -> exp.IndexParameters:
 4548        using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None
 4549
 4550        if self._match(TokenType.L_PAREN, advance=False):
 4551            columns = self._parse_wrapped_csv(self._parse_with_operator)
 4552        else:
 4553            columns = None
 4554
 4555        include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None
 4556        partition_by = self._parse_partition_by()
 4557        with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties()
 4558        tablespace = (
 4559            self._parse_var(any_token=True)
 4560            if self._match_text_seq("USING", "INDEX", "TABLESPACE")
 4561            else None
 4562        )
 4563        where = self._parse_where()
 4564
 4565        on = self._parse_field() if self._match(TokenType.ON) else None
 4566
 4567        return self.expression(
 4568            exp.IndexParameters(
 4569                using=using,
 4570                columns=columns,
 4571                include=include,
 4572                partition_by=partition_by,
 4573                where=where,
 4574                with_storage=with_storage,
 4575                tablespace=tablespace,
 4576                on=on,
 4577            )
 4578        )
 4579
 4580    def _parse_index(
 4581        self, index: exp.Expr | None = None, anonymous: bool = False
 4582    ) -> exp.Index | None:
 4583        if index or anonymous:
 4584            unique = None
 4585            primary = None
 4586            amp = None
 4587
 4588            self._match(TokenType.ON)
 4589            self._match(TokenType.TABLE)  # hive
 4590            table = self._parse_table_parts(schema=True)
 4591        else:
 4592            unique = self._match(TokenType.UNIQUE)
 4593            primary = self._match_text_seq("PRIMARY")
 4594            amp = self._match_text_seq("AMP")
 4595
 4596            if not self._match(TokenType.INDEX):
 4597                return None
 4598
 4599            index = self._parse_id_var()
 4600            table = None
 4601
 4602        params = self._parse_index_params()
 4603
 4604        return self.expression(
 4605            exp.Index(
 4606                this=index, table=table, unique=unique, primary=primary, amp=amp, params=params
 4607            )
 4608        )
 4609
 4610    def _parse_table_hints(self) -> list[exp.Expr] | None:
 4611        hints: list[exp.Expr] = []
 4612        if self._match_pair(TokenType.WITH, TokenType.L_PAREN):
 4613            # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16
 4614            hints.append(
 4615                self.expression(
 4616                    exp.WithTableHint(
 4617                        expressions=self._parse_csv(
 4618                            lambda: self._parse_function() or self._parse_var(any_token=True)
 4619                        )
 4620                    )
 4621                )
 4622            )
 4623            self._match_r_paren()
 4624        else:
 4625            # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html
 4626            while self._match_set(self.TABLE_INDEX_HINT_TOKENS):
 4627                hint = exp.IndexTableHint(this=self._prev.text.upper())
 4628
 4629                self._match_set((TokenType.INDEX, TokenType.KEY))
 4630                if self._match(TokenType.FOR):
 4631                    hint.set("target", self._advance_any() and self._prev.text.upper())
 4632
 4633                hint.set("expressions", self._parse_wrapped_id_vars())
 4634                hints.append(hint)
 4635
 4636        return hints or None
 4637
 4638    def _parse_table_part(self, schema: bool = False) -> exp.Expr | None:
 4639        return (
 4640            (not schema and self._parse_function(optional_parens=False))
 4641            or self._parse_id_var(any_token=False)
 4642            or self._parse_string_as_identifier()
 4643            or self._parse_placeholder()
 4644        )
 4645
 4646    def _parse_table_parts_fast(self) -> exp.Table | None:
 4647        index = self._index
 4648        parts: list[exp.Identifier] | None = None
 4649        all_comments: list[str] | None = None
 4650
 4651        while self._match_set(self.IDENTIFIER_TOKENS):
 4652            token = self._prev
 4653            comments = self._prev_comments
 4654
 4655            has_dot = self._match(TokenType.DOT)
 4656            curr_tt = self._curr.token_type
 4657
 4658            if not has_dot:
 4659                if curr_tt in self.TABLE_POSTFIX_TOKENS:
 4660                    self._retreat(index)
 4661                    return None
 4662            elif curr_tt not in self.IDENTIFIER_TOKENS:
 4663                self._retreat(index)
 4664                return None
 4665
 4666            if parts is None:
 4667                parts = []
 4668
 4669            if comments:
 4670                if all_comments is None:
 4671                    all_comments = []
 4672                all_comments.extend(comments)
 4673                self._prev_comments = []
 4674
 4675            parts.append(
 4676                self.expression(
 4677                    exp.Identifier(
 4678                        this=token.text, quoted=token.token_type == TokenType.IDENTIFIER
 4679                    ),
 4680                    token,
 4681                )
 4682            )
 4683
 4684            if not has_dot:
 4685                break
 4686
 4687        if parts is None:
 4688            return None
 4689
 4690        n = len(parts)
 4691
 4692        if n == 1:
 4693            table: exp.Table = exp.Table(this=parts[0])
 4694        elif n == 2:
 4695            table = exp.Table(this=parts[1], db=parts[0])
 4696        elif n >= 3:
 4697            this: exp.Identifier | exp.Dot = parts[2]
 4698            for i in range(3, n):
 4699                this = exp.Dot(this=this, expression=parts[i])
 4700
 4701            table = exp.Table(this=this, db=parts[1], catalog=parts[0])
 4702
 4703        if table is None:
 4704            self._retreat(index)
 4705        elif all_comments:
 4706            table.add_comments(all_comments)
 4707        return table
 4708
 4709    def _parse_table_parts(
 4710        self,
 4711        schema: bool = False,
 4712        is_db_reference: bool = False,
 4713        wildcard: bool = False,
 4714        fast: bool = False,
 4715    ) -> exp.Table | exp.Dot | None:
 4716        if fast:
 4717            return self._parse_table_parts_fast()
 4718
 4719        catalog: exp.Expr | str | None = None
 4720        db: exp.Expr | str | None = None
 4721        table: exp.Expr | str | None = self._parse_table_part(schema=schema)
 4722
 4723        while self._match(TokenType.DOT):
 4724            if catalog:
 4725                # This allows nesting the table in arbitrarily many dot expressions if needed
 4726                table = self.expression(
 4727                    exp.Dot(this=table, expression=self._parse_table_part(schema=schema))
 4728                )
 4729            else:
 4730                catalog = db
 4731                db = table
 4732                # "" used for tsql FROM a..b case
 4733                table = self._parse_table_part(schema=schema) or ""
 4734
 4735        if (
 4736            wildcard
 4737            and self._is_connected()
 4738            and (isinstance(table, exp.Identifier) or not table)
 4739            and self._match(TokenType.STAR)
 4740        ):
 4741            if isinstance(table, exp.Identifier):
 4742                table.args["this"] += "*"
 4743            else:
 4744                table = exp.Identifier(this="*")
 4745
 4746        if is_db_reference:
 4747            catalog = db
 4748            db = table
 4749            table = None
 4750
 4751        if not table and not is_db_reference:
 4752            self.raise_error(f"Expected table name but got {self._curr}")
 4753        if not db and is_db_reference:
 4754            self.raise_error(f"Expected database name but got {self._curr}")
 4755
 4756        table = self.expression(exp.Table(this=table, db=db, catalog=catalog))
 4757
 4758        # Bubble up comments from identifier parts to the Table
 4759        comments = []
 4760        for part in table.parts:
 4761            if part_comments := part.pop_comments():
 4762                comments.extend(part_comments)
 4763        if comments:
 4764            table.add_comments(comments)
 4765
 4766        changes = self._parse_changes()
 4767        if changes:
 4768            table.set("changes", changes)
 4769
 4770        at_before = self._parse_historical_data()
 4771        if at_before:
 4772            table.set("when", at_before)
 4773
 4774        pivots = self._parse_pivots()
 4775        if pivots:
 4776            table.set("pivots", pivots)
 4777
 4778        return table
 4779
 4780    def _parse_table(
 4781        self,
 4782        schema: bool = False,
 4783        joins: bool = False,
 4784        alias_tokens: t.Collection[TokenType] | None = None,
 4785        parse_bracket: bool = False,
 4786        is_db_reference: bool = False,
 4787        parse_partition: bool = False,
 4788        consume_pipe: bool = False,
 4789    ) -> exp.Expr | None:
 4790        if not schema and not is_db_reference and not consume_pipe and not joins:
 4791            index = self._index
 4792            table = self._parse_table_parts(fast=True)
 4793
 4794            if table is not None:
 4795                curr_tt = self._curr.token_type
 4796                next_tt = self._next.token_type
 4797
 4798                fast_terminators = self.TABLE_TERMINATORS
 4799
 4800                # only return the table if we're sure there are no other operators
 4801                # MATCH_CONDITION is a special case because it accepts any alias before it like LIMIT
 4802                if curr_tt in fast_terminators and next_tt != TokenType.MATCH_CONDITION:
 4803                    return table
 4804
 4805                postfix_tokens = self.TABLE_POSTFIX_TOKENS
 4806
 4807                if curr_tt not in postfix_tokens and next_tt not in postfix_tokens:
 4808                    if alias := self._parse_table_alias(
 4809                        alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS
 4810                    ):
 4811                        table.set("alias", alias)
 4812
 4813                    if self._curr.token_type in fast_terminators:
 4814                        return table
 4815
 4816                self._retreat(index)
 4817
 4818        if stream := self._parse_stream():
 4819            return stream
 4820
 4821        if lateral := self._parse_lateral():
 4822            return lateral
 4823
 4824        if unnest := self._parse_unnest():
 4825            return unnest
 4826
 4827        if values := self._parse_derived_table_values():
 4828            return values
 4829
 4830        if subquery := self._parse_select(table=True, consume_pipe=consume_pipe):
 4831            if not subquery.args.get("pivots"):
 4832                subquery.set("pivots", self._parse_pivots())
 4833            if joins:
 4834                for join in self._parse_joins():
 4835                    subquery.append("joins", join)
 4836            return subquery
 4837
 4838        bracket = parse_bracket and self._parse_bracket(None)
 4839        bracket = self.expression(exp.Table(this=bracket)) if bracket else None
 4840
 4841        rows_from_tables = (
 4842            self._parse_wrapped_csv(self._parse_table)
 4843            if self._match_text_seq("ROWS", "FROM")
 4844            else None
 4845        )
 4846        rows_from = (
 4847            self.expression(exp.Table(rows_from=rows_from_tables)) if rows_from_tables else None
 4848        )
 4849
 4850        only = self._match(TokenType.ONLY)
 4851
 4852        this = t.cast(
 4853            exp.Expr,
 4854            bracket
 4855            or rows_from
 4856            or self._parse_bracket(
 4857                self._parse_table_parts(schema=schema, is_db_reference=is_db_reference)
 4858            ),
 4859        )
 4860
 4861        if only:
 4862            this.set("only", only)
 4863
 4864        # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context
 4865        self._match(TokenType.STAR)
 4866
 4867        parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION
 4868        if parse_partition and self._match(TokenType.PARTITION, advance=False):
 4869            this.set("partition", self._parse_partition())
 4870
 4871        if schema:
 4872            return self._parse_schema(this=this)
 4873
 4874        if self.dialect.ALIAS_POST_VERSION:
 4875            this.set("version", self._parse_version())
 4876
 4877        if self.dialect.ALIAS_POST_TABLESAMPLE:
 4878            this.set("sample", self._parse_table_sample())
 4879
 4880        alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS)
 4881        if alias:
 4882            this.set("alias", alias)
 4883
 4884        if self._match(TokenType.INDEXED_BY):
 4885            this.set("indexed", self._parse_table_parts())
 4886        elif self._match_text_seq("NOT", "INDEXED"):
 4887            this.set("indexed", False)
 4888
 4889        if isinstance(this, exp.Table) and self._match_text_seq("AT"):
 4890            return self.expression(
 4891                exp.AtIndex(this=this.to_column(copy=False), expression=self._parse_id_var())
 4892            )
 4893
 4894        this.set("hints", self._parse_table_hints())
 4895
 4896        if not this.args.get("pivots"):
 4897            this.set("pivots", self._parse_pivots())
 4898
 4899        if not self.dialect.ALIAS_POST_TABLESAMPLE:
 4900            this.set("sample", self._parse_table_sample())
 4901
 4902        if not self.dialect.ALIAS_POST_VERSION:
 4903            this.set("version", self._parse_version())
 4904
 4905        if joins:
 4906            for join in self._parse_joins(alias_tokens=alias_tokens):
 4907                this.append("joins", join)
 4908
 4909        if self._match_pair(TokenType.WITH, TokenType.ORDINALITY):
 4910            this.set("ordinality", True)
 4911            this.set("alias", self._parse_table_alias())
 4912
 4913        return this
 4914
 4915    def _parse_version(self) -> exp.Version | None:
 4916        if self._match(TokenType.TIMESTAMP_SNAPSHOT):
 4917            this = "TIMESTAMP"
 4918        elif self._match(TokenType.VERSION_SNAPSHOT):
 4919            this = "VERSION"
 4920        else:
 4921            return None
 4922
 4923        if self._match_set((TokenType.FROM, TokenType.BETWEEN)):
 4924            kind = self._prev.text.upper()
 4925            start = self._parse_bitwise()
 4926            self._match_texts(("TO", "AND"))
 4927            end = self._parse_bitwise()
 4928            expression: exp.Expr | None = self.expression(exp.Tuple(expressions=[start, end]))
 4929        elif self._match_text_seq("CONTAINED", "IN"):
 4930            kind = "CONTAINED IN"
 4931            expression = self.expression(
 4932                exp.Tuple(expressions=self._parse_wrapped_csv(self._parse_bitwise))
 4933            )
 4934        elif self._match(TokenType.ALL):
 4935            kind = "ALL"
 4936            expression = None
 4937        else:
 4938            self._match_text_seq("AS", "OF")
 4939            kind = "AS OF"
 4940            expression = self._parse_type()
 4941
 4942        return self.expression(exp.Version(this=this, expression=expression, kind=kind))
 4943
 4944    def _parse_historical_data(self) -> exp.HistoricalData | None:
 4945        # https://docs.snowflake.com/en/sql-reference/constructs/at-before
 4946        index = self._index
 4947        historical_data = None
 4948        if self._match_texts(self.HISTORICAL_DATA_PREFIX):
 4949            this = self._prev.text.upper()
 4950            kind = (
 4951                self._match(TokenType.L_PAREN)
 4952                and self._match_texts(self.HISTORICAL_DATA_KIND)
 4953                and self._prev.text.upper()
 4954            )
 4955            expression = self._match(TokenType.FARROW) and self._parse_bitwise()
 4956
 4957            if expression:
 4958                self._match_r_paren()
 4959                historical_data = self.expression(
 4960                    exp.HistoricalData(this=this, kind=kind, expression=expression)
 4961                )
 4962            else:
 4963                self._retreat(index)
 4964
 4965        return historical_data
 4966
 4967    def _parse_changes(self) -> exp.Changes | None:
 4968        if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"):
 4969            return None
 4970
 4971        information = self._parse_var(any_token=True)
 4972        self._match_r_paren()
 4973
 4974        return self.expression(
 4975            exp.Changes(
 4976                information=information,
 4977                at_before=self._parse_historical_data(),
 4978                end=self._parse_historical_data(),
 4979            )
 4980        )
 4981
 4982    def _parse_unnest(self, with_alias: bool = True) -> exp.Unnest | None:
 4983        if not self._match_pair(TokenType.UNNEST, TokenType.L_PAREN, advance=False):
 4984            return None
 4985
 4986        self._advance()
 4987
 4988        expressions = self._parse_wrapped_csv(self._parse_equality)
 4989        offset: bool | exp.Expr = self._match_pair(TokenType.WITH, TokenType.ORDINALITY)
 4990
 4991        alias = self._parse_table_alias() if with_alias else None
 4992
 4993        if alias:
 4994            if self.dialect.UNNEST_COLUMN_ONLY:
 4995                if alias.args.get("columns"):
 4996                    self.raise_error("Unexpected extra column alias in unnest.")
 4997
 4998                alias.set("columns", [alias.this])
 4999                alias.set("this", None)
 5000
 5001            columns = alias.args.get("columns") or []
 5002            if offset and len(expressions) < len(columns):
 5003                offset = columns.pop()
 5004
 5005        if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET):
 5006            self._match(TokenType.ALIAS)
 5007            offset = self._parse_id_var(
 5008                any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS
 5009            ) or exp.to_identifier("offset")
 5010
 5011        return self.expression(exp.Unnest(expressions=expressions, alias=alias, offset=offset))
 5012
 5013    def _parse_derived_table_values(self) -> exp.Values | None:
 5014        is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES)
 5015        if not is_derived and not (
 5016            # ClickHouse's `FORMAT Values` is equivalent to `VALUES`
 5017            self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES")
 5018        ):
 5019            return None
 5020
 5021        expressions = self._parse_csv(self._parse_value)
 5022        alias = self._parse_table_alias()
 5023
 5024        if is_derived:
 5025            self._match_r_paren()
 5026
 5027        return self.expression(
 5028            exp.Values(expressions=expressions, alias=alias or self._parse_table_alias())
 5029        )
 5030
 5031    def _parse_table_sample(self, as_modifier: bool = False) -> exp.TableSample | None:
 5032        if not self._match(TokenType.TABLE_SAMPLE) and not (
 5033            as_modifier and self._match_text_seq("USING", "SAMPLE")
 5034        ):
 5035            return None
 5036
 5037        bucket_numerator = None
 5038        bucket_denominator = None
 5039        bucket_field = None
 5040        percent = None
 5041        size = None
 5042        seed = None
 5043
 5044        method = self._parse_var(tokens=(TokenType.ROW,), upper=True)
 5045        matched_l_paren = self._match(TokenType.L_PAREN)
 5046
 5047        if self.TABLESAMPLE_CSV:
 5048            num = None
 5049            expressions = self._parse_csv(self._parse_primary)
 5050        else:
 5051            expressions = None
 5052            num = (
 5053                self._parse_factor()
 5054                if self._match(TokenType.NUMBER, advance=False)
 5055                else self._parse_primary() or self._parse_placeholder()
 5056            )
 5057
 5058        if self._match_text_seq("BUCKET"):
 5059            bucket_numerator = self._parse_number()
 5060            self._match_text_seq("OUT", "OF")
 5061            bucket_denominator = bucket_denominator = self._parse_number()
 5062            self._match(TokenType.ON)
 5063            bucket_field = self._parse_field()
 5064        elif self._match_set((TokenType.PERCENT, TokenType.MOD)):
 5065            percent = num
 5066        elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT:
 5067            size = num
 5068        else:
 5069            percent = num
 5070
 5071        if matched_l_paren:
 5072            self._match_r_paren()
 5073
 5074        if self._match(TokenType.L_PAREN):
 5075            method = self._parse_var(upper=True)
 5076            seed = self._match(TokenType.COMMA) and self._parse_number()
 5077            self._match_r_paren()
 5078        elif self._match_texts(("SEED", "REPEATABLE")):
 5079            seed = self._parse_wrapped(self._parse_number)
 5080
 5081        if not method and self.DEFAULT_SAMPLING_METHOD:
 5082            method = exp.var(self.DEFAULT_SAMPLING_METHOD)
 5083
 5084        return self.expression(
 5085            exp.TableSample(
 5086                expressions=expressions,
 5087                method=method,
 5088                bucket_numerator=bucket_numerator,
 5089                bucket_denominator=bucket_denominator,
 5090                bucket_field=bucket_field,
 5091                percent=percent,
 5092                size=size,
 5093                seed=seed,
 5094            )
 5095        )
 5096
 5097    def _parse_pivots(self) -> list[exp.Pivot] | None:
 5098        if self._curr.token_type not in (TokenType.PIVOT, TokenType.UNPIVOT):
 5099            return None
 5100        return list(iter(self._parse_pivot, None)) or None
 5101
 5102    def _parse_joins(
 5103        self, alias_tokens: t.Collection[TokenType] | None = None
 5104    ) -> t.Iterator[exp.Join]:
 5105        return iter(lambda: self._parse_join(alias_tokens=alias_tokens), None)
 5106
 5107    def _parse_unpivot_columns(self) -> exp.UnpivotColumns | None:
 5108        if not self._match(TokenType.INTO):
 5109            return None
 5110
 5111        return self.expression(
 5112            exp.UnpivotColumns(
 5113                this=self._match_text_seq("NAME") and self._parse_column(),
 5114                expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column),
 5115            )
 5116        )
 5117
 5118    # https://duckdb.org/docs/sql/statements/pivot
 5119    def _parse_simplified_pivot(self, is_unpivot: bool | None = None) -> exp.Pivot:
 5120        def _parse_on() -> exp.Expr | None:
 5121            this = self._parse_bitwise()
 5122
 5123            if self._match(TokenType.IN):
 5124                # PIVOT ... ON col IN (row_val1, row_val2)
 5125                return self._parse_in(this)
 5126            if self._match(TokenType.ALIAS, advance=False):
 5127                # UNPIVOT ... ON (col1, col2, col3) AS row_val
 5128                return self._parse_alias(this)
 5129
 5130            return this
 5131
 5132        this = self._parse_table()
 5133        expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on)
 5134        into = self._parse_unpivot_columns()
 5135        using = self._match(TokenType.USING) and self._parse_csv(
 5136            lambda: self._parse_alias(self._parse_column())
 5137        )
 5138        group = self._parse_group()
 5139
 5140        return self.expression(
 5141            exp.Pivot(
 5142                this=this,
 5143                expressions=expressions,
 5144                using=using,
 5145                group=group,
 5146                unpivot=is_unpivot,
 5147                into=into,
 5148            )
 5149        )
 5150
 5151    def _parse_pivot_in(self) -> exp.In:
 5152        def _parse_aliased_expression() -> exp.Expr | None:
 5153            this = self._parse_select_or_expression()
 5154
 5155            self._match(TokenType.ALIAS)
 5156            alias = self._parse_bitwise()
 5157            if alias:
 5158                if isinstance(alias, exp.Column) and not alias.db:
 5159                    alias = alias.this
 5160                return self.expression(exp.PivotAlias(this=this, alias=alias))
 5161
 5162            return this
 5163
 5164        value = self._parse_column()
 5165
 5166        if not self._match(TokenType.IN):
 5167            self.raise_error("Expecting IN")
 5168
 5169        if self._match(TokenType.L_PAREN):
 5170            if self._match(TokenType.ANY):
 5171                exprs: list[exp.Expr] = ensure_list(exp.PivotAny(this=self._parse_order()))
 5172            else:
 5173                exprs = self._parse_csv(_parse_aliased_expression)
 5174            self._match_r_paren()
 5175            return self.expression(exp.In(this=value, expressions=exprs))
 5176
 5177        return self.expression(exp.In(this=value, field=self._parse_id_var()))
 5178
 5179    def _parse_pivot_aggregation(self) -> exp.Expr | None:
 5180        func = self._parse_function()
 5181        if not func:
 5182            if self._prev.token_type == TokenType.COMMA:
 5183                return None
 5184            self.raise_error("Expecting an aggregation function in PIVOT")
 5185
 5186        return self._parse_alias(func)
 5187
 5188    def _parse_pivot(self) -> exp.Pivot | None:
 5189        index = self._index
 5190        include_nulls = None
 5191
 5192        if self._match(TokenType.PIVOT):
 5193            unpivot = False
 5194        elif self._match(TokenType.UNPIVOT):
 5195            unpivot = True
 5196
 5197            # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax
 5198            if self._match_text_seq("INCLUDE", "NULLS"):
 5199                include_nulls = True
 5200            elif self._match_text_seq("EXCLUDE", "NULLS"):
 5201                include_nulls = False
 5202        else:
 5203            return None
 5204
 5205        expressions = []
 5206
 5207        if not self._match(TokenType.L_PAREN):
 5208            self._retreat(index)
 5209            return None
 5210
 5211        if unpivot:
 5212            expressions = self._parse_csv(self._parse_column)
 5213        else:
 5214            expressions = self._parse_csv(self._parse_pivot_aggregation)
 5215
 5216        if not expressions:
 5217            self.raise_error("Failed to parse PIVOT's aggregation list")
 5218
 5219        if not self._match(TokenType.FOR):
 5220            self.raise_error("Expecting FOR")
 5221
 5222        fields = []
 5223        while True:
 5224            field = self._try_parse(self._parse_pivot_in)
 5225            if not field:
 5226                break
 5227            fields.append(field)
 5228
 5229        default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped(
 5230            self._parse_bitwise
 5231        )
 5232
 5233        group = self._parse_group()
 5234
 5235        self._match_r_paren()
 5236
 5237        pivot = self.expression(
 5238            exp.Pivot(
 5239                expressions=expressions,
 5240                fields=fields,
 5241                unpivot=unpivot,
 5242                include_nulls=include_nulls,
 5243                default_on_null=default_on_null,
 5244                group=group,
 5245            )
 5246        )
 5247
 5248        if unpivot:
 5249            pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions])
 5250            for pivot_field in pivot.fields:
 5251                if isinstance(pivot_field, exp.In):
 5252                    pivot_field.set("this", _unpivot_target(pivot_field.this))
 5253
 5254        if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
 5255            pivot.set("alias", self._parse_table_alias())
 5256
 5257        if not unpivot:
 5258            names = self._pivot_column_names(t.cast(list[exp.Expr], expressions))
 5259
 5260            columns: list[exp.Expr] = []
 5261            all_fields = []
 5262            for pivot_field in pivot.fields:
 5263                pivot_field_expressions = pivot_field.expressions
 5264
 5265                # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case.
 5266                if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny):
 5267                    continue
 5268
 5269                all_fields.append(
 5270                    [
 5271                        fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name
 5272                        for fld in pivot_field_expressions
 5273                    ]
 5274                )
 5275
 5276            if all_fields:
 5277                if names:
 5278                    all_fields.append(names)
 5279
 5280                # Generate all possible combinations of the pivot columns
 5281                # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US'))
 5282                # generates the product between [[2000, 2010], ['NL', 'US'], ['total']]
 5283                for fld_parts_tuple in itertools.product(*all_fields):
 5284                    fld_parts = list(fld_parts_tuple)
 5285
 5286                    if names and self.PREFIXED_PIVOT_COLUMNS:
 5287                        # Move the "name" to the front of the list
 5288                        fld_parts.insert(0, fld_parts.pop(-1))
 5289
 5290                    columns.append(exp.to_identifier("_".join(fld_parts)))
 5291
 5292            pivot.set("columns", columns)
 5293            pivot.set("identify_pivot_strings", self.IDENTIFY_PIVOT_STRINGS)
 5294            pivot.set("prefixed_pivot_columns", self.PREFIXED_PIVOT_COLUMNS)
 5295            pivot.set("pivot_column_naming", self.PIVOT_COLUMN_NAMING)
 5296
 5297        return pivot
 5298
 5299    def _pivot_column_names(self, aggregations: list[exp.Expr]) -> list[str]:
 5300        return [agg.alias for agg in aggregations if agg.alias]
 5301
 5302    def _parse_prewhere(self, skip_where_token: bool = False) -> exp.PreWhere | None:
 5303        if not skip_where_token and not self._match(TokenType.PREWHERE):
 5304            return None
 5305
 5306        comments = self._prev_comments
 5307        return self.expression(
 5308            exp.PreWhere(this=self._parse_disjunction()),
 5309            comments=comments,
 5310        )
 5311
 5312    def _parse_where(self, skip_where_token: bool = False) -> exp.Where | None:
 5313        if not skip_where_token and not self._match(TokenType.WHERE):
 5314            return None
 5315
 5316        comments = self._prev_comments
 5317        return self.expression(
 5318            exp.Where(this=self._parse_disjunction()),
 5319            comments=comments,
 5320        )
 5321
 5322    def _parse_group(self, skip_group_by_token: bool = False) -> exp.Group | None:
 5323        if not skip_group_by_token and not self._match(TokenType.GROUP_BY):
 5324            return None
 5325        comments = self._prev_comments
 5326
 5327        elements: dict[str, t.Any] = defaultdict(list)
 5328
 5329        if self._match(TokenType.ALL):
 5330            elements["all"] = True
 5331        elif self._match(TokenType.DISTINCT):
 5332            elements["all"] = False
 5333
 5334        if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False):
 5335            return self.expression(exp.Group(**elements), comments=comments)  # type: ignore
 5336
 5337        while True:
 5338            index = self._index
 5339
 5340            elements["expressions"].extend(
 5341                self._parse_csv(
 5342                    lambda: (
 5343                        None
 5344                        if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False)
 5345                        else self._parse_disjunction()
 5346                    )
 5347                )
 5348            )
 5349
 5350            before_with_index = self._index
 5351            with_prefix = self._match(TokenType.WITH)
 5352
 5353            if cube_or_rollup := self._parse_cube_or_rollup(with_prefix=with_prefix):
 5354                key = "rollup" if isinstance(cube_or_rollup, exp.Rollup) else "cube"
 5355                elements[key].append(cube_or_rollup)
 5356            elif grouping_sets := self._parse_grouping_sets():
 5357                elements["grouping_sets"].append(grouping_sets)
 5358            elif self._match_text_seq("TOTALS"):
 5359                elements["totals"] = True  # type: ignore
 5360
 5361            if before_with_index <= self._index <= before_with_index + 1:
 5362                self._retreat(before_with_index)
 5363                break
 5364
 5365            if index == self._index:
 5366                break
 5367
 5368        return self.expression(exp.Group(**elements), comments=comments)  # type: ignore
 5369
 5370    def _parse_cube_or_rollup(self, with_prefix: bool = False) -> exp.Cube | exp.Rollup | None:
 5371        if self._match(TokenType.CUBE):
 5372            kind: type[exp.Cube | exp.Rollup] = exp.Cube
 5373        elif self._match(TokenType.ROLLUP):
 5374            kind = exp.Rollup
 5375        else:
 5376            return None
 5377
 5378        return self.expression(
 5379            kind(expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_bitwise))
 5380        )
 5381
 5382    def _parse_grouping_sets(self) -> exp.GroupingSets | None:
 5383        if self._match(TokenType.GROUPING_SETS):
 5384            return self.expression(
 5385                exp.GroupingSets(expressions=self._parse_wrapped_csv(self._parse_grouping_set))
 5386            )
 5387        return None
 5388
 5389    def _parse_grouping_set(self) -> exp.Expr | None:
 5390        return self._parse_grouping_sets() or self._parse_cube_or_rollup() or self._parse_bitwise()
 5391
 5392    def _parse_having(self, skip_having_token: bool = False) -> exp.Having | None:
 5393        if not skip_having_token and not self._match(TokenType.HAVING):
 5394            return None
 5395        comments = self._prev_comments
 5396        return self.expression(
 5397            exp.Having(this=self._parse_disjunction()),
 5398            comments=comments,
 5399        )
 5400
 5401    def _parse_qualify(self) -> exp.Qualify | None:
 5402        if not self._match(TokenType.QUALIFY):
 5403            return None
 5404        return self.expression(exp.Qualify(this=self._parse_disjunction()))
 5405
 5406    def _parse_connect_with_prior(self) -> exp.Expr | None:
 5407        self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression(
 5408            exp.Prior(this=self._parse_bitwise())
 5409        )
 5410        connect = self._parse_disjunction()
 5411        self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR")
 5412        return connect
 5413
 5414    def _parse_connect(self, skip_start_token: bool = False) -> exp.Connect | None:
 5415        if skip_start_token:
 5416            start = None
 5417        elif self._match(TokenType.START_WITH):
 5418            start = self._parse_disjunction()
 5419        else:
 5420            return None
 5421
 5422        self._match(TokenType.CONNECT_BY)
 5423        nocycle = self._match_text_seq("NOCYCLE")
 5424        connect = self._parse_connect_with_prior()
 5425
 5426        if not start and self._match(TokenType.START_WITH):
 5427            start = self._parse_disjunction()
 5428
 5429        return self.expression(exp.Connect(start=start, connect=connect, nocycle=nocycle))
 5430
 5431    def _parse_name_as_expression(self) -> exp.Expr | None:
 5432        this = self._parse_id_var(any_token=True)
 5433        if self._match(TokenType.ALIAS):
 5434            this = self.expression(exp.Alias(alias=this, this=self._parse_disjunction()))
 5435        return this
 5436
 5437    def _parse_interpolate(self) -> list[exp.Expr] | None:
 5438        if self._match_text_seq("INTERPOLATE"):
 5439            return self._parse_wrapped_csv(self._parse_name_as_expression)
 5440        return None
 5441
 5442    def _parse_order(
 5443        self, this: exp.Expr | None = None, skip_order_token: bool = False
 5444    ) -> exp.Expr | None:
 5445        siblings = None
 5446        if not skip_order_token and not self._match(TokenType.ORDER_BY):
 5447            if not self._match(TokenType.ORDER_SIBLINGS_BY):
 5448                return this
 5449
 5450            siblings = True
 5451
 5452        comments = self._prev_comments
 5453        return self.expression(
 5454            exp.Order(
 5455                this=this,
 5456                expressions=self._parse_csv(self._parse_ordered),
 5457                siblings=siblings,
 5458            ),
 5459            comments=comments,
 5460        )
 5461
 5462    def _parse_sort(self, exp_class: type[E], token: TokenType) -> E | None:
 5463        if not self._match(token):
 5464            return None
 5465        return self.expression(exp_class(expressions=self._parse_csv(self._parse_ordered)))
 5466
 5467    def _parse_ordered(
 5468        self, parse_method: t.Callable[[], exp.Expr | None] | None = None
 5469    ) -> exp.Ordered | None:
 5470        this = parse_method() if parse_method else self._parse_disjunction()
 5471        if not this:
 5472            return None
 5473
 5474        if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL:
 5475            this = exp.var("ALL")
 5476
 5477        asc = self._match(TokenType.ASC)
 5478        desc: bool | None = True if self._match(TokenType.DESC) else (False if asc else None)
 5479
 5480        is_nulls_first = self._match_text_seq("NULLS", "FIRST")
 5481        is_nulls_last = self._match_text_seq("NULLS", "LAST")
 5482
 5483        nulls_first = is_nulls_first or False
 5484        explicitly_null_ordered = is_nulls_first or is_nulls_last
 5485
 5486        if (
 5487            not explicitly_null_ordered
 5488            and (
 5489                (not desc and self.dialect.NULL_ORDERING == "nulls_are_small")
 5490                or (desc and self.dialect.NULL_ORDERING != "nulls_are_small")
 5491            )
 5492            and self.dialect.NULL_ORDERING != "nulls_are_last"
 5493        ):
 5494            nulls_first = True
 5495
 5496        if self._match_text_seq("WITH", "FILL"):
 5497            with_fill = self.expression(
 5498                exp.WithFill(
 5499                    from_=self._match(TokenType.FROM) and self._parse_bitwise(),
 5500                    to=self._match_text_seq("TO") and self._parse_bitwise(),
 5501                    step=self._match_text_seq("STEP") and self._parse_bitwise(),
 5502                    interpolate=self._parse_interpolate(),
 5503                )
 5504            )
 5505        else:
 5506            with_fill = None
 5507
 5508        return self.expression(
 5509            exp.Ordered(this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill)
 5510        )
 5511
 5512    def _parse_limit_options(self) -> exp.LimitOptions | None:
 5513        percent = self._match_set((TokenType.PERCENT, TokenType.MOD))
 5514        rows = self._match_set((TokenType.ROW, TokenType.ROWS))
 5515        self._match_text_seq("ONLY")
 5516        with_ties = self._match_text_seq("WITH", "TIES")
 5517
 5518        if not (percent or rows or with_ties):
 5519            return None
 5520
 5521        return self.expression(exp.LimitOptions(percent=percent, rows=rows, with_ties=with_ties))
 5522
 5523    def _parse_limit(
 5524        self,
 5525        this: exp.Expr | None = None,
 5526        top: bool = False,
 5527        skip_limit_token: bool = False,
 5528    ) -> exp.Expr | None:
 5529        if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT):
 5530            comments = self._prev_comments
 5531            if top:
 5532                limit_paren = self._match(TokenType.L_PAREN)
 5533                expression = (
 5534                    self._parse_term() or self._parse_select()
 5535                    if limit_paren
 5536                    else self._parse_number()
 5537                )
 5538
 5539                if limit_paren:
 5540                    self._match_r_paren()
 5541
 5542            else:
 5543                if self.dialect.SUPPORTS_LIMIT_ALL and self._match(TokenType.ALL):
 5544                    return this
 5545
 5546                # Parsing LIMIT x% (i.e x PERCENT) as a term leads to an error, since
 5547                # we try to build an exp.Mod expr. For that matter, we backtrack and instead
 5548                # consume the factor plus parse the percentage separately
 5549                index = self._index
 5550                expression = self._try_parse(self._parse_term)
 5551                if isinstance(expression, exp.Mod):
 5552                    self._retreat(index)
 5553                    expression = self._parse_factor()
 5554                elif not expression:
 5555                    expression = self._parse_factor()
 5556            limit_options = self._parse_limit_options()
 5557
 5558            if self._match(TokenType.COMMA):
 5559                offset = expression
 5560                expression = self._parse_term()
 5561            else:
 5562                offset = None
 5563
 5564            limit_exp = self.expression(
 5565                exp.Limit(
 5566                    this=this,
 5567                    expression=expression,
 5568                    offset=offset,
 5569                    limit_options=limit_options,
 5570                    expressions=self._parse_limit_by(),
 5571                ),
 5572                comments=comments,
 5573            )
 5574
 5575            return limit_exp
 5576
 5577        if self._match(TokenType.FETCH):
 5578            direction = (
 5579                self._prev.text.upper()
 5580                if self._match_set((TokenType.FIRST, TokenType.NEXT))
 5581                else "FIRST"
 5582            )
 5583
 5584            count = self._parse_field(tokens=self.FETCH_TOKENS)
 5585
 5586            return self.expression(
 5587                exp.Fetch(
 5588                    direction=direction, count=count, limit_options=self._parse_limit_options()
 5589                )
 5590            )
 5591
 5592        return this
 5593
 5594    def _parse_offset(self, this: exp.Expr | None = None) -> exp.Expr | None:
 5595        if not self._match(TokenType.OFFSET):
 5596            return this
 5597
 5598        count = self._parse_term()
 5599        self._match_set((TokenType.ROW, TokenType.ROWS))
 5600
 5601        return self.expression(
 5602            exp.Offset(this=this, expression=count, expressions=self._parse_limit_by())
 5603        )
 5604
 5605    def _can_parse_limit_or_offset(self) -> bool:
 5606        if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False):
 5607            return False
 5608
 5609        index = self._index
 5610        result = bool(
 5611            self._try_parse(self._parse_limit, retreat=True)
 5612            or self._try_parse(self._parse_offset, retreat=True)
 5613        )
 5614        self._retreat(index)
 5615
 5616        # MATCH_CONDITION (...) is a special construct that should not be consumed by limit/offset
 5617        if self._next.token_type == TokenType.MATCH_CONDITION:
 5618            result = False
 5619
 5620        return result
 5621
 5622    def _can_parse_named_window(self) -> bool:
 5623        # `WINDOW` is in ID_VAR_TOKENS so it could be mistakenly consumed as an implicit alias.
 5624        # Refuse only when the following tokens look like a named-window clause: `WINDOW <id> AS (`.
 5625        if not self._match(TokenType.WINDOW, advance=False):
 5626            return False
 5627
 5628        name = self._tokens[self._index + 1] if self._index + 1 < len(self._tokens) else None
 5629        if name is None or name.token_type not in self.ID_VAR_TOKENS:
 5630            return False
 5631
 5632        alias_tok = self._tokens[self._index + 2] if self._index + 2 < len(self._tokens) else None
 5633        if alias_tok is None or alias_tok.token_type != TokenType.ALIAS:
 5634            return False
 5635
 5636        body = self._tokens[self._index + 3] if self._index + 3 < len(self._tokens) else None
 5637        return body is not None and body.token_type == TokenType.L_PAREN
 5638
 5639    def _parse_limit_by(self) -> list[exp.Expr] | None:
 5640        return self._parse_csv(self._parse_bitwise) if self._match_text_seq("BY") else None
 5641
 5642    def _parse_locks(self) -> list[exp.Lock]:
 5643        locks = []
 5644        while True:
 5645            update, key = None, None
 5646            if self._match_text_seq("FOR", "UPDATE"):
 5647                update = True
 5648            elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq(
 5649                "LOCK", "IN", "SHARE", "MODE"
 5650            ):
 5651                update = False
 5652            elif self._match_text_seq("FOR", "KEY", "SHARE"):
 5653                update, key = False, True
 5654            elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"):
 5655                update, key = True, True
 5656            else:
 5657                break
 5658
 5659            expressions = None
 5660            if self._match_text_seq("OF"):
 5661                expressions = self._parse_csv(lambda: self._parse_table(schema=True))
 5662
 5663            wait: bool | exp.Expr | None = None
 5664            if self._match_text_seq("NOWAIT"):
 5665                wait = True
 5666            elif self._match_text_seq("WAIT"):
 5667                wait = self._parse_primary()
 5668            elif self._match_text_seq("SKIP", "LOCKED"):
 5669                wait = False
 5670
 5671            locks.append(
 5672                self.expression(
 5673                    exp.Lock(update=update, expressions=expressions, wait=wait, key=key)
 5674                )
 5675            )
 5676
 5677        return locks
 5678
 5679    def parse_set_operation(
 5680        self, this: exp.Expr | None, consume_pipe: bool = False
 5681    ) -> exp.Expr | None:
 5682        start = self._index
 5683        _, side_token, kind_token = self._parse_join_parts()
 5684
 5685        side = side_token.text if side_token else None
 5686        kind = kind_token.text if kind_token else None
 5687
 5688        if not self._match_set(self.SET_OPERATIONS):
 5689            self._retreat(start)
 5690            return None
 5691
 5692        token_type = self._prev.token_type
 5693
 5694        if token_type == TokenType.UNION:
 5695            operation: type[exp.SetOperation] = exp.Union
 5696        elif token_type == TokenType.EXCEPT:
 5697            operation = exp.Except
 5698        else:
 5699            operation = exp.Intersect
 5700
 5701        comments = self._prev.comments
 5702
 5703        if self._match(TokenType.DISTINCT):
 5704            distinct: bool | None = True
 5705        elif self._match(TokenType.ALL):
 5706            distinct = False
 5707        else:
 5708            distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation]
 5709            if distinct is None:
 5710                self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}")
 5711
 5712        by_name = (
 5713            self._match_text_seq("BY", "NAME")
 5714            or self._match_text_seq("STRICT", "CORRESPONDING")
 5715            or None
 5716        )
 5717        if self._match_text_seq("CORRESPONDING"):
 5718            by_name = True
 5719            if not side and not kind:
 5720                kind = "INNER"
 5721
 5722        on_column_list = None
 5723        if by_name and self._match_texts(("ON", "BY")):
 5724            on_column_list = self._parse_wrapped_csv(self._parse_column)
 5725
 5726        expression = self._parse_select(
 5727            nested=True, parse_set_operation=False, consume_pipe=consume_pipe
 5728        )
 5729
 5730        return self.expression(
 5731            operation(
 5732                this=this,
 5733                distinct=distinct,
 5734                by_name=by_name,
 5735                expression=expression,
 5736                side=side,
 5737                kind=kind,
 5738                on=on_column_list,
 5739            ),
 5740            comments=comments,
 5741        )
 5742
 5743    def _parse_set_operations(self, this: exp.Expr | None) -> exp.Expr | None:
 5744        while this:
 5745            setop = self.parse_set_operation(this)
 5746            if not setop:
 5747                break
 5748            this = setop
 5749
 5750        if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP:
 5751            expression = this.expression
 5752
 5753            if expression:
 5754                for arg in self.SET_OP_MODIFIERS:
 5755                    expr = expression.args.get(arg)
 5756                    if expr:
 5757                        this.set(arg, expr.pop())
 5758
 5759        return this
 5760
 5761    def _parse_expression(self) -> exp.Expr | None:
 5762        return self._parse_alias(self._parse_assignment())
 5763
 5764    def _parse_assignment(self) -> exp.Expr | None:
 5765        this = self._parse_disjunction()
 5766        if not this and self._next.token_type in self.ASSIGNMENT:
 5767            # This allows us to parse <non-identifier token> := <expr>
 5768            this = exp.column(
 5769                t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text)
 5770            )
 5771
 5772        while self._match_set(self.ASSIGNMENT):
 5773            if isinstance(this, exp.Column) and len(this.parts) == 1:
 5774                this = this.this
 5775
 5776            comments = self._prev_comments
 5777            this = self.expression(
 5778                self.ASSIGNMENT[self._prev.token_type](
 5779                    this=this, expression=self._parse_assignment()
 5780                ),
 5781                comments=comments,
 5782            )
 5783
 5784        return this
 5785
 5786    def _parse_disjunction(self) -> exp.Expr | None:
 5787        this = self._parse_conjunction()
 5788        while self._match_set(self.DISJUNCTION):
 5789            comments = self._prev_comments
 5790            this = self.expression(
 5791                self.DISJUNCTION[self._prev.token_type](
 5792                    this=this, expression=self._parse_conjunction()
 5793                ),
 5794                comments=comments,
 5795            )
 5796        return this
 5797
 5798    def _parse_conjunction(self) -> exp.Expr | None:
 5799        this = self._parse_equality()
 5800        while self._match_set(self.CONJUNCTION):
 5801            comments = self._prev_comments
 5802            this = self.expression(
 5803                self.CONJUNCTION[self._prev.token_type](
 5804                    this=this, expression=self._parse_equality()
 5805                ),
 5806                comments=comments,
 5807            )
 5808        return this
 5809
 5810    def _parse_equality(self) -> exp.Expr | None:
 5811        this = self._parse_comparison()
 5812        while self._match_set(self.EQUALITY):
 5813            comments = self._prev_comments
 5814            this = self.expression(
 5815                self.EQUALITY[self._prev.token_type](
 5816                    this=this, expression=self._parse_comparison()
 5817                ),
 5818                comments=comments,
 5819            )
 5820        return this
 5821
 5822    def _parse_comparison(self) -> exp.Expr | None:
 5823        this = self._parse_range()
 5824        while self._match_set(self.COMPARISON):
 5825            comments = self._prev_comments
 5826            this = self.expression(
 5827                self.COMPARISON[self._prev.token_type](this=this, expression=self._parse_range()),
 5828                comments=comments,
 5829            )
 5830        return this
 5831
 5832    def _parse_range(self, this: exp.Expr | None = None) -> exp.Expr | None:
 5833        this = this or self._parse_bitwise()
 5834        negate = self._match(TokenType.NOT)
 5835
 5836        if self._match_set(self.RANGE_PARSERS):
 5837            expression = self.RANGE_PARSERS[self._prev.token_type](self, this)
 5838            if not expression:
 5839                return this
 5840
 5841            this = expression
 5842        elif self._match(TokenType.ISNULL) or (negate and self._match(TokenType.NULL)):
 5843            this = self.expression(exp.Is(this=this, expression=exp.Null()))
 5844
 5845        # Postgres supports ISNULL and NOTNULL for conditions.
 5846        # https://blog.andreiavram.ro/postgresql-null-composite-type/
 5847        if self._match(TokenType.NOTNULL):
 5848            this = self.expression(exp.Is(this=this, expression=exp.Null()))
 5849            this = self.expression(exp.Not(this=this))
 5850
 5851        if negate:
 5852            this = self._negate_range(this)
 5853
 5854        if self._match(TokenType.IS):
 5855            this = self._parse_is(this)
 5856
 5857        return this
 5858
 5859    def _negate_range(self, this: exp.Expr | None = None) -> exp.Expr | None:
 5860        if not this:
 5861            return this
 5862
 5863        expression = this.this if isinstance(this, exp.Escape) else this
 5864        if isinstance(expression, (exp.Like, exp.ILike)):
 5865            expression.set("negate", True)
 5866            return this
 5867
 5868        return self.expression(exp.Not(this=this))
 5869
 5870    def _parse_is(self, this: exp.Expr | None) -> exp.Expr | None:
 5871        index = self._index - 1
 5872        negate = self._match(TokenType.NOT)
 5873
 5874        if self._match_text_seq("DISTINCT", "FROM"):
 5875            klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ
 5876            return self.expression(klass(this=this, expression=self._parse_bitwise()))
 5877
 5878        if self._match(TokenType.JSON):
 5879            kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper()
 5880
 5881            if self._match_text_seq("WITH"):
 5882                _with = True
 5883            elif self._match_text_seq("WITHOUT"):
 5884                _with = False
 5885            else:
 5886                _with = None
 5887
 5888            unique = self._match(TokenType.UNIQUE)
 5889            self._match_text_seq("KEYS")
 5890            expression: exp.Expr | None = self.expression(
 5891                exp.JSON(this=kind, with_=_with, unique=unique)
 5892            )
 5893        else:
 5894            expression = self._parse_null() or self._parse_bitwise()
 5895            if not expression:
 5896                self._retreat(index)
 5897                return None
 5898
 5899        this = self.expression(exp.Is(this=this, expression=expression))
 5900        this = self.expression(exp.Not(this=this)) if negate else this
 5901        return self._parse_column_ops(this)
 5902
 5903    def _parse_in(self, this: exp.Expr | None, alias: bool = False) -> exp.In:
 5904        unnest = self._parse_unnest(with_alias=False)
 5905        if unnest:
 5906            this = self.expression(exp.In(this=this, unnest=unnest))
 5907        elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)):
 5908            matched_l_paren = self._prev.token_type == TokenType.L_PAREN
 5909            expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias))
 5910
 5911            if len(expressions) == 1 and isinstance(query := expressions[0], exp.Query):
 5912                this = self.expression(
 5913                    exp.In(this=this, query=self._parse_query_modifiers(query).subquery(copy=False))
 5914                )
 5915            else:
 5916                this = self.expression(exp.In(this=this, expressions=expressions))
 5917
 5918            if matched_l_paren:
 5919                self._match_r_paren(this)
 5920            elif not self._match(TokenType.R_BRACKET, expression=this):
 5921                self.raise_error("Expecting ]")
 5922        else:
 5923            this = self.expression(exp.In(this=this, field=self._parse_column()))
 5924
 5925        return this
 5926
 5927    def _parse_between(self, this: exp.Expr | None) -> exp.Between:
 5928        symmetric = None
 5929        if self._match_text_seq("SYMMETRIC"):
 5930            symmetric = True
 5931        elif self._match_text_seq("ASYMMETRIC"):
 5932            symmetric = False
 5933
 5934        low = self._parse_bitwise()
 5935        self._match(TokenType.AND)
 5936        high = self._parse_bitwise()
 5937
 5938        return self.expression(exp.Between(this=this, low=low, high=high, symmetric=symmetric))
 5939
 5940    def _parse_escape(self, this: exp.Expr | None) -> exp.Expr | None:
 5941        if not self._match(TokenType.ESCAPE):
 5942            return this
 5943        return self.expression(
 5944            exp.Escape(this=this, expression=self._parse_string() or self._parse_null())
 5945        )
 5946
 5947    def _parse_interval_span(self, this: exp.Expr) -> exp.Interval:
 5948        # handle day-time format interval span with omitted units:
 5949        #   INTERVAL '<number days> hh[:][mm[:ss[.ff]]]' <maybe `unit TO unit`>
 5950        interval_span_units_omitted = None
 5951        if (
 5952            this
 5953            and this.is_string
 5954            and self.SUPPORTS_OMITTED_INTERVAL_SPAN_UNIT
 5955            and exp.INTERVAL_DAY_TIME_RE.match(this.name)
 5956        ):
 5957            index = self._index
 5958
 5959            # Var "TO" Var
 5960            first_unit = self._parse_var(any_token=True, upper=True)
 5961            second_unit = None
 5962            if first_unit and self._match_text_seq("TO"):
 5963                second_unit = self._parse_var(any_token=True, upper=True)
 5964
 5965            interval_span_units_omitted = not (first_unit and second_unit)
 5966
 5967            self._retreat(index)
 5968
 5969        if interval_span_units_omitted:
 5970            unit = None
 5971        else:
 5972            unit = self._parse_function()
 5973            if not unit and (
 5974                self._curr.token_type == TokenType.VAR
 5975                or self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS
 5976            ):
 5977                unit = self._parse_var(any_token=True, upper=True)
 5978
 5979        # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse
 5980        # each INTERVAL expression into this canonical form so it's easy to transpile
 5981        if this and this.is_number:
 5982            this = exp.Literal.string(this.to_py())
 5983        elif this and this.is_string:
 5984            parts = exp.INTERVAL_STRING_RE.findall(this.name)
 5985            if parts and unit:
 5986                # Unconsume the eagerly-parsed unit, since the real unit was part of the string
 5987                unit = None
 5988                self._retreat(self._index - 1)
 5989
 5990            if len(parts) == 1:
 5991                this = exp.Literal.string(parts[0][0])
 5992                unit = self.expression(exp.Var(this=parts[0][1].upper()))
 5993
 5994        if self.INTERVAL_SPANS and self._match_text_seq("TO"):
 5995            unit = self.expression(
 5996                exp.IntervalSpan(
 5997                    this=unit,
 5998                    expression=self._parse_function()
 5999                    or self._parse_var(any_token=True, upper=True),
 6000                )
 6001            )
 6002
 6003        return self.expression(exp.Interval(this=this, unit=unit))
 6004
 6005    def _parse_interval(self, require_interval: bool = True) -> exp.Add | exp.Interval | None:
 6006        index = self._index
 6007
 6008        if not self._match(TokenType.INTERVAL) and require_interval:
 6009            return None
 6010
 6011        if self._match(TokenType.STRING, advance=False):
 6012            this = self._parse_primary()
 6013        else:
 6014            this = self._parse_term()
 6015
 6016        if not this or (
 6017            isinstance(this, exp.Column)
 6018            and not this.table
 6019            and not this.this.quoted
 6020            and self._curr
 6021            and self._curr.text.upper() not in self.dialect.VALID_INTERVAL_UNITS
 6022        ):
 6023            self._retreat(index)
 6024            return None
 6025
 6026        interval = self._parse_interval_span(this)
 6027
 6028        index = self._index
 6029        self._match(TokenType.PLUS)
 6030
 6031        # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals
 6032        if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False):
 6033            return self.expression(exp.Add(this=interval, expression=self._parse_interval(False)))
 6034
 6035        self._retreat(index)
 6036        return interval
 6037
 6038    def _parse_bitwise(self) -> exp.Expr | None:
 6039        this = self._parse_term()
 6040
 6041        while True:
 6042            if self._match_set(self.BITWISE):
 6043                this = self.expression(
 6044                    self.BITWISE[self._prev.token_type](this=this, expression=self._parse_term())
 6045                )
 6046            elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE):
 6047                this = self.expression(
 6048                    exp.DPipe(
 6049                        this=this,
 6050                        expression=self._parse_term(),
 6051                        safe=not self.dialect.STRICT_STRING_CONCAT,
 6052                    )
 6053                )
 6054            elif self._match(TokenType.DQMARK):
 6055                this = self.expression(
 6056                    exp.Coalesce(this=this, expressions=ensure_list(self._parse_term()))
 6057                )
 6058            elif self._match_pair(TokenType.LT, TokenType.LT):
 6059                this = self.expression(
 6060                    exp.BitwiseLeftShift(this=this, expression=self._parse_term())
 6061                )
 6062            elif self._match_pair(TokenType.GT, TokenType.GT):
 6063                this = self.expression(
 6064                    exp.BitwiseRightShift(this=this, expression=self._parse_term())
 6065                )
 6066            else:
 6067                break
 6068
 6069        return this
 6070
 6071    def _parse_term(self) -> exp.Expr | None:
 6072        this = self._parse_factor()
 6073
 6074        while self._match_set(self.TERM):
 6075            klass = self.TERM[self._prev.token_type]
 6076            comments = self._prev_comments
 6077            expression = self._parse_factor()
 6078
 6079            this = self.expression(klass(this=this, expression=expression), comments=comments)
 6080
 6081            if isinstance(this, exp.Collate):
 6082                expr = this.expression
 6083
 6084                # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise
 6085                # fallback to Identifier / Var
 6086                if isinstance(expr, exp.Column) and len(expr.parts) == 1:
 6087                    ident = expr.this
 6088                    if isinstance(ident, exp.Identifier):
 6089                        this.set("expression", ident if ident.quoted else exp.var(ident.name))
 6090
 6091        return this
 6092
 6093    def _parse_factor(self) -> exp.Expr | None:
 6094        parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary
 6095        this = self._parse_at_time_zone(parse_method())
 6096
 6097        while self._match_set(self.FACTOR):
 6098            klass = self.FACTOR[self._prev.token_type]
 6099            comments = self._prev_comments
 6100            expression = parse_method()
 6101
 6102            if not expression and klass is exp.IntDiv and self._prev.text.isalpha():
 6103                self._retreat(self._index - 1)
 6104                return this
 6105
 6106            this = self.expression(klass(this=this, expression=expression), comments=comments)
 6107
 6108            if isinstance(this, exp.Div):
 6109                this.set("typed", self.dialect.TYPED_DIVISION)
 6110                this.set("safe", self.dialect.SAFE_DIVISION)
 6111
 6112        return this
 6113
 6114    def _parse_exponent(self) -> exp.Expr | None:
 6115        this = self._parse_unary()
 6116        while self._match_set(self.EXPONENT):
 6117            comments = self._prev_comments
 6118            this = self.expression(
 6119                self.EXPONENT[self._prev.token_type](this=this, expression=self._parse_unary()),
 6120                comments=comments,
 6121            )
 6122        return this
 6123
 6124    def _parse_unary(self) -> exp.Expr | None:
 6125        if self._match_set(self.UNARY_PARSERS):
 6126            return self.UNARY_PARSERS[self._prev.token_type](self)
 6127        return self._parse_type()
 6128
 6129    def _parse_type(
 6130        self, parse_interval: bool = True, fallback_to_identifier: bool = False
 6131    ) -> exp.Expr | None:
 6132        if not fallback_to_identifier and (atom := self._parse_atom()) is not None:
 6133            return atom
 6134
 6135        if interval := parse_interval and self._parse_interval():
 6136            return self._parse_column_ops(interval)
 6137
 6138        index = self._index
 6139        data_type = self._parse_types(check_func=True, allow_identifiers=False)
 6140
 6141        # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g.
 6142        # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>)
 6143        if isinstance(data_type, exp.Cast):
 6144            # This constructor can contain ops directly after it, for instance struct unnesting:
 6145            # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).*
 6146            return self._parse_column_ops(data_type)
 6147
 6148        if data_type:
 6149            index2 = self._index
 6150            this = self._parse_primary()
 6151
 6152            if isinstance(this, exp.Literal):
 6153                literal = this.name
 6154                this = self._parse_column_ops(this)
 6155
 6156                parser = self.TYPE_LITERAL_PARSERS.get(data_type.this)
 6157                if parser:
 6158                    return parser(self, this, data_type)
 6159
 6160                if (
 6161                    self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
 6162                    and data_type.is_type(exp.DType.TIMESTAMP)
 6163                    and TIME_ZONE_RE.search(literal)
 6164                ):
 6165                    data_type = exp.DType.TIMESTAMPTZ.into_expr()
 6166
 6167                return self.expression(exp.Cast(this=this, to=data_type))
 6168
 6169            # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0)
 6170            # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 )
 6171            #
 6172            # If the index difference here is greater than 1, that means the parser itself must have
 6173            # consumed additional tokens such as the DECIMAL scale and precision in the above example.
 6174            #
 6175            # If it's not greater than 1, then it must be 1, because we've consumed at least the type
 6176            # keyword, meaning that the expressions arg of the DataType must have gotten set by a
 6177            # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to
 6178            # DECIMAL(38, 0)) in order to facilitate the data type's transpilation.
 6179            #
 6180            # In these cases, we don't really want to return the converted type, but instead retreat
 6181            # and try to parse a Column or Identifier in the section below.
 6182            if data_type.expressions and index2 - index > 1:
 6183                self._retreat(index2)
 6184                return self._parse_column_ops(data_type)
 6185
 6186            self._retreat(index)
 6187
 6188        if fallback_to_identifier:
 6189            return self._parse_id_var()
 6190
 6191        return self._parse_column()
 6192
 6193    def _parse_type_size(self) -> exp.DataTypeParam | None:
 6194        this = self._parse_type()
 6195        if not this:
 6196            return None
 6197
 6198        if isinstance(this, exp.Column) and not this.table:
 6199            this = exp.var(this.name.upper())
 6200
 6201        return self.expression(
 6202            exp.DataTypeParam(this=this, expression=self._parse_var(any_token=True))
 6203        )
 6204
 6205    def _parse_user_defined_type(self, identifier: exp.Identifier) -> exp.Expr | None:
 6206        type_name = identifier.name
 6207
 6208        while self._match(TokenType.DOT):
 6209            type_name = f"{type_name}.{self._advance_any() and self._prev.text}"
 6210
 6211        return exp.DataType.from_str(type_name, dialect=self.dialect, udt=True)
 6212
 6213    def _parse_types(
 6214        self,
 6215        check_func: bool = False,
 6216        schema: bool = False,
 6217        allow_identifiers: bool = True,
 6218        with_collation: bool = False,
 6219    ) -> exp.Expr | None:
 6220        index = self._index
 6221        this: exp.Expr | None = None
 6222
 6223        if self._match_set(self.TYPE_TOKENS):
 6224            type_token = self._prev.token_type
 6225        else:
 6226            type_token = None
 6227            identifier = allow_identifiers and self._parse_id_var(
 6228                any_token=False, tokens=(TokenType.VAR,)
 6229            )
 6230            if isinstance(identifier, exp.Identifier):
 6231                try:
 6232                    tokens = self.dialect.tokenize(identifier.name)
 6233                except TokenError:
 6234                    tokens = None
 6235
 6236                if tokens and (type_token := tokens[0].token_type) in self.TYPE_TOKENS:
 6237                    if len(tokens) > 1:
 6238                        return exp.DataType.from_str(identifier.name, dialect=self.dialect)
 6239                elif self.dialect.SUPPORTS_USER_DEFINED_TYPES:
 6240                    this = self._parse_user_defined_type(identifier)
 6241                else:
 6242                    self._retreat(self._index - 1)
 6243                    return None
 6244            else:
 6245                return None
 6246
 6247        if type_token == TokenType.PSEUDO_TYPE:
 6248            return self.expression(exp.PseudoType(this=self._prev.text.upper()))
 6249
 6250        if type_token == TokenType.OBJECT_IDENTIFIER:
 6251            return self.expression(exp.ObjectIdentifier(this=self._prev.text.upper()))
 6252
 6253        # https://materialize.com/docs/sql/types/map/
 6254        if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET):
 6255            key_type = self._parse_types(
 6256                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
 6257            )
 6258            if not self._match(TokenType.FARROW):
 6259                self._retreat(index)
 6260                return None
 6261
 6262            value_type = self._parse_types(
 6263                check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
 6264            )
 6265            if not self._match(TokenType.R_BRACKET):
 6266                self._retreat(index)
 6267                return None
 6268
 6269            return exp.DataType(
 6270                this=exp.DType.MAP,
 6271                expressions=[key_type, value_type],
 6272                nested=True,
 6273            )
 6274
 6275        nested = type_token in self.NESTED_TYPE_TOKENS
 6276        is_struct = type_token in self.STRUCT_TYPE_TOKENS
 6277        is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS
 6278        expressions = None
 6279        maybe_func = False
 6280
 6281        if self._match(TokenType.L_PAREN):
 6282            if is_struct:
 6283                expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True))
 6284            elif nested:
 6285                expressions = self._parse_csv(
 6286                    lambda: self._parse_types(
 6287                        check_func=check_func, schema=schema, allow_identifiers=allow_identifiers
 6288                    )
 6289                )
 6290                if type_token == TokenType.NULLABLE and len(expressions) == 1:
 6291                    this = expressions[0]
 6292                    this.set("nullable", True)
 6293                    self._match_r_paren()
 6294                    return this
 6295            elif type_token in self.ENUM_TYPE_TOKENS:
 6296                expressions = self._parse_csv(self._parse_equality)
 6297            elif type_token == TokenType.JSON:
 6298                # ClickHouse JSON type supports arguments: JSON(col Type, SKIP col, param=value)
 6299                # https://clickhouse.com/docs/sql-reference/data-types/newjson
 6300                expressions = self._parse_csv(self._parse_json_type_arg)
 6301            elif is_aggregate:
 6302                func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var(
 6303                    any_token=False, tokens=(TokenType.VAR, TokenType.ANY)
 6304                )
 6305                if not func_or_ident:
 6306                    return None
 6307                expressions = [func_or_ident]
 6308                if self._match(TokenType.COMMA):
 6309                    expressions.extend(
 6310                        self._parse_csv(
 6311                            lambda: self._parse_types(
 6312                                check_func=check_func,
 6313                                schema=schema,
 6314                                allow_identifiers=allow_identifiers,
 6315                            )
 6316                        )
 6317                    )
 6318            else:
 6319                expressions = self._parse_csv(self._parse_type_size)
 6320
 6321                # https://docs.snowflake.com/en/sql-reference/data-types-vector
 6322                if type_token == TokenType.VECTOR and len(expressions) == 2:
 6323                    expressions = self._parse_vector_expressions(expressions)
 6324
 6325            if not self._match(TokenType.R_PAREN):
 6326                self._retreat(index)
 6327                return None
 6328
 6329            maybe_func = True
 6330
 6331        values: list[exp.Expr] | None = None
 6332
 6333        if nested and self._match(TokenType.LT):
 6334            if is_struct:
 6335                expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True))
 6336            else:
 6337                expressions = self._parse_csv(
 6338                    lambda: self._parse_types(
 6339                        check_func=check_func,
 6340                        schema=schema,
 6341                        allow_identifiers=allow_identifiers,
 6342                        with_collation=True,
 6343                    )
 6344                )
 6345
 6346            if not self._match(TokenType.GT):
 6347                self.raise_error("Expecting >")
 6348
 6349            if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)):
 6350                values = self._parse_csv(self._parse_disjunction)
 6351                if not values and is_struct:
 6352                    values = None
 6353                    self._retreat(self._index - 1)
 6354                else:
 6355                    self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN))
 6356
 6357        if type_token in self.TIMESTAMPS:
 6358            if self._match_text_seq("WITH", "TIME", "ZONE"):
 6359                maybe_func = False
 6360                tz_type = exp.DType.TIMETZ if type_token in self.TIMES else exp.DType.TIMESTAMPTZ
 6361                this = exp.DataType(this=tz_type, expressions=expressions)
 6362            elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"):
 6363                maybe_func = False
 6364                this = exp.DataType(this=exp.DType.TIMESTAMPLTZ, expressions=expressions)
 6365            elif self._match_text_seq("WITHOUT", "TIME", "ZONE"):
 6366                maybe_func = False
 6367        elif type_token == TokenType.INTERVAL:
 6368            if self._curr.text.upper() in self.dialect.VALID_INTERVAL_UNITS:
 6369                unit = self._parse_var(upper=True)
 6370                if self._match_text_seq("TO"):
 6371                    unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True))
 6372
 6373                this = self.expression(exp.DataType(this=self.expression(exp.Interval(unit=unit))))
 6374            else:
 6375                this = self.expression(exp.DataType(this=exp.DType.INTERVAL))
 6376        elif type_token == TokenType.VOID:
 6377            this = exp.DataType(this=exp.DType.NULL)
 6378
 6379        if maybe_func and check_func:
 6380            index2 = self._index
 6381            peek = self._parse_string()
 6382
 6383            if not peek:
 6384                self._retreat(index)
 6385                return None
 6386
 6387            self._retreat(index2)
 6388
 6389        if not this:
 6390            assert type_token is not None
 6391            if self._match_text_seq("UNSIGNED"):
 6392                unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token)
 6393                if not unsigned_type_token:
 6394                    self.raise_error(f"Cannot convert {type_token.name} to unsigned.")
 6395
 6396                type_token = unsigned_type_token or type_token
 6397
 6398            # NULLABLE without parentheses can be a column (Presto/Trino)
 6399            if type_token == TokenType.NULLABLE and not expressions:
 6400                self._retreat(index)
 6401                return None
 6402
 6403            this = exp.DataType(
 6404                this=exp.DType[type_token.name],
 6405                expressions=expressions,
 6406                nested=nested,
 6407            )
 6408
 6409            # Empty arrays/structs are allowed
 6410            if values is not None:
 6411                cls = exp.Struct if is_struct else exp.Array
 6412                this = exp.cast(cls(expressions=values), this, copy=False)
 6413
 6414        elif expressions:
 6415            this.set("expressions", expressions)
 6416
 6417        # https://materialize.com/docs/sql/types/list/#type-name
 6418        while self._match(TokenType.LIST):
 6419            this = exp.DataType(this=exp.DType.LIST, expressions=[this], nested=True)
 6420
 6421        index = self._index
 6422
 6423        # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3]
 6424        matched_array = self._match(TokenType.ARRAY)
 6425
 6426        while self._curr:
 6427            datatype_token = self._prev.token_type
 6428            matched_l_bracket = self._match(TokenType.L_BRACKET)
 6429
 6430            if (not matched_l_bracket and not matched_array) or (
 6431                datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET)
 6432            ):
 6433                # Postgres allows casting empty arrays such as ARRAY[]::INT[],
 6434                # not to be confused with the fixed size array parsing
 6435                break
 6436
 6437            matched_array = False
 6438            values = self._parse_csv(self._parse_disjunction) or None
 6439            if (
 6440                values
 6441                and not schema
 6442                and (
 6443                    not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS
 6444                    or datatype_token == TokenType.ARRAY
 6445                    or not self._match(TokenType.R_BRACKET, advance=False)
 6446                )
 6447            ):
 6448                # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB
 6449                # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type
 6450                self._retreat(index)
 6451                break
 6452
 6453            this = exp.DataType(
 6454                this=exp.DType.ARRAY, expressions=[this], values=values, nested=True
 6455            )
 6456            self._match(TokenType.R_BRACKET)
 6457
 6458        if self.TYPE_CONVERTERS and isinstance(this.this, exp.DType):
 6459            converter = self.TYPE_CONVERTERS.get(this.this)
 6460            if converter:
 6461                this = converter(t.cast(exp.DataType, this))
 6462
 6463        if with_collation and isinstance(this, exp.DataType) and self._match(TokenType.COLLATE):
 6464            this.set("collate", self._parse_identifier() or self._parse_column())
 6465
 6466        return this
 6467
 6468    def _parse_json_type_arg(self) -> exp.Expr | None:
 6469        """Parse a single argument to ClickHouse's JSON type."""
 6470
 6471        # SKIP col or SKIP REGEXP 'pattern'
 6472        if self._match_text_seq("SKIP"):
 6473            regexp = self._match(TokenType.RLIKE)
 6474            arg = self._parse_column()
 6475            if isinstance(arg, exp.Column):
 6476                arg = arg.to_dot()
 6477            return self.expression(exp.SkipJSONColumn(regexp=regexp, expression=arg))
 6478
 6479        param_or_col = self._parse_column()
 6480        if not isinstance(param_or_col, exp.Column):
 6481            return None
 6482
 6483        # Parameter: name=value (e.g., max_dynamic_paths=2)
 6484        if len(param_or_col.parts) == 1 and self._match(TokenType.EQ):
 6485            param = param_or_col.name
 6486            value = self._parse_primary()
 6487            return self.expression(exp.EQ(this=exp.var(param), expression=value))
 6488
 6489        # Column type hint: col_name Type
 6490        col = param_or_col.to_dot()
 6491        kind = self._parse_types(check_func=False, allow_identifiers=False)
 6492        return self.expression(exp.ColumnDef(this=col, kind=kind))
 6493
 6494    def _parse_vector_expressions(self, expressions: list[exp.Expr]) -> list[exp.Expr]:
 6495        return [exp.DataType.from_str(expressions[0].name, dialect=self.dialect), *expressions[1:]]
 6496
 6497    def _parse_struct_types(self, type_required: bool = False) -> exp.Expr | None:
 6498        index = self._index
 6499
 6500        if (
 6501            self._curr
 6502            and self._next
 6503            and self._curr.token_type in self.TYPE_TOKENS
 6504            and self._next.token_type in self.TYPE_TOKENS
 6505        ):
 6506            # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a
 6507            # type token. Without this, the list will be parsed as a type and we'll eventually crash
 6508            this = self._parse_id_var()
 6509        else:
 6510            this = (
 6511                self._parse_type(parse_interval=False, fallback_to_identifier=True)
 6512                or self._parse_id_var()
 6513            )
 6514
 6515        self._match(TokenType.COLON)
 6516
 6517        if (
 6518            type_required
 6519            and not isinstance(this, exp.DataType)
 6520            and not self._match_set(self.TYPE_TOKENS, advance=False)
 6521        ):
 6522            self._retreat(index)
 6523            return self._parse_types()
 6524
 6525        return self._parse_column_def(this)
 6526
 6527    def _parse_at_time_zone(self, this: exp.Expr | None) -> exp.Expr | None:
 6528        if not self._match_text_seq("AT", "TIME", "ZONE"):
 6529            return this
 6530        return self._parse_at_time_zone(
 6531            self.expression(exp.AtTimeZone(this=this, zone=self._parse_unary()))
 6532        )
 6533
 6534    def _parse_atom(self) -> exp.Expr | None:
 6535        if (
 6536            self._curr.token_type in self.IDENTIFIER_TOKENS
 6537            and (column := self._parse_column()) is not None
 6538        ):
 6539            return column
 6540
 6541        token = self._curr
 6542        token_type = token.token_type
 6543
 6544        if not (primary_parser := self.PRIMARY_PARSERS.get(token_type)):
 6545            return None
 6546
 6547        next_type = self._next.token_type
 6548
 6549        if (
 6550            next_type in self.COLUMN_OPERATORS
 6551            or next_type in self.COLUMN_POSTFIX_TOKENS
 6552            or (token_type == TokenType.STRING and next_type == TokenType.STRING)
 6553        ):
 6554            return None
 6555
 6556        self._advance()
 6557        return primary_parser(self, token)
 6558
 6559    def _parse_column(self) -> exp.Expr | None:
 6560        column: exp.Expr | None = self._parse_column_parts_fast()
 6561        if column is None:
 6562            this = self._parse_column_reference()
 6563            if not this:
 6564                this = self._parse_bracket(this)
 6565            column = self._parse_column_ops(this) if this else this
 6566
 6567        if column:
 6568            if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS:
 6569                column.set("join_mark", self._match(TokenType.JOIN_MARKER))
 6570            if self.COLON_IS_VARIANT_EXTRACT:
 6571                column = self._parse_colon_as_variant_extract(column)
 6572
 6573        return column
 6574
 6575    def _parse_column_parts_fast(self) -> exp.Column | exp.Dot | None:
 6576        """Fast path for simple column and dot references (a, a.b, ...).
 6577
 6578        Greedily consumes VAR/IDENTIFIER tokens separated by DOTs, then checks
 6579        that nothing complex follows. If it does, retreats and returns None so
 6580        the slow path can handle it. For >4 parts, wraps in exp.Dot nodes.
 6581        """
 6582        index = self._index
 6583        parts: list[exp.Identifier] | None = None
 6584        all_comments: list[str] | None = None
 6585
 6586        while self._match_set(self.IDENTIFIER_TOKENS):
 6587            token = self._prev
 6588            comments = self._prev_comments
 6589
 6590            if parts is None and token.text.upper() in self.NO_PAREN_FUNCTION_PARSERS:
 6591                self._retreat(index)
 6592                return None
 6593
 6594            has_dot = self._match(TokenType.DOT)
 6595            curr_tt = self._curr.token_type
 6596
 6597            if not has_dot:
 6598                if curr_tt in self.COLUMN_OPERATORS or curr_tt in self.COLUMN_POSTFIX_TOKENS:
 6599                    self._retreat(index)
 6600                    return None
 6601            elif curr_tt not in self.IDENTIFIER_TOKENS:
 6602                self._retreat(index)
 6603                return None
 6604
 6605            if parts is None:
 6606                parts = []
 6607
 6608            if comments:
 6609                if all_comments is None:
 6610                    all_comments = []
 6611                all_comments.extend(comments)
 6612                self._prev_comments = []
 6613
 6614            parts.append(
 6615                self.expression(
 6616                    exp.Identifier(
 6617                        this=token.text, quoted=token.token_type == TokenType.IDENTIFIER
 6618                    ),
 6619                    token,
 6620                )
 6621            )
 6622
 6623            if not has_dot:
 6624                break
 6625
 6626        if parts is None:
 6627            return None
 6628
 6629        n = len(parts)
 6630
 6631        if n == 1:
 6632            column: exp.Column | exp.Dot = exp.Column(this=parts[0])
 6633        elif n == 2:
 6634            column = exp.Column(this=parts[1], table=parts[0])
 6635        elif n == 3:
 6636            column = exp.Column(this=parts[2], table=parts[1], db=parts[0])
 6637        else:
 6638            column = exp.Column(this=parts[3], table=parts[2], db=parts[1], catalog=parts[0])
 6639
 6640            for i in range(4, n):
 6641                column = exp.Dot(this=column, expression=parts[i])
 6642
 6643        if all_comments:
 6644            column.add_comments(all_comments)
 6645
 6646        return column
 6647
 6648    def _parse_column_reference(self) -> exp.Expr | None:
 6649        this = self._parse_field()
 6650        if (
 6651            not this
 6652            and self._match(TokenType.VALUES, advance=False)
 6653            and self.VALUES_FOLLOWED_BY_PAREN
 6654            and (not self._next or self._next.token_type != TokenType.L_PAREN)
 6655        ):
 6656            this = self._parse_id_var()
 6657
 6658        if isinstance(this, exp.Identifier):
 6659            # We bubble up comments from the Identifier to the Column
 6660            this = self.expression(exp.Column(this=this), comments=this.pop_comments())
 6661
 6662        return this
 6663
 6664    def _build_json_extract(
 6665        self,
 6666        this: exp.Expr | None,
 6667        path_parts: list[exp.JSONPathPart],
 6668        escape: bool | None,
 6669    ) -> tuple[exp.Expr | None, list[exp.JSONPathPart]]:
 6670        if len(path_parts) > 1:
 6671            this = self.expression(
 6672                exp.JSONExtract(
 6673                    this=this,
 6674                    expression=exp.JSONPath(expressions=path_parts, escape=escape),
 6675                    variant_extract=True,
 6676                    requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION,
 6677                )
 6678            )
 6679            path_parts = [exp.JSONPathRoot()]
 6680
 6681        return this, path_parts
 6682
 6683    def _parse_colon_as_variant_extract(self, this: exp.Expr | None) -> exp.Expr | None:
 6684        path_parts: list[exp.JSONPathPart] = [exp.JSONPathRoot()]
 6685        escape = None
 6686
 6687        while self._match(TokenType.COLON):
 6688            key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,))
 6689
 6690            if key:
 6691                if isinstance(key, exp.Identifier) and key.quoted:
 6692                    escape = True
 6693                path_parts.append(exp.JSONPathKey(this=key.name))
 6694
 6695            while True:
 6696                if self._match(TokenType.DOT):
 6697                    next_key = self._parse_id_var(any_token=True, tokens=(TokenType.SELECT,))
 6698
 6699                    if next_key:
 6700                        if isinstance(next_key, exp.Identifier) and next_key.quoted:
 6701                            escape = True
 6702                        path_parts.append(exp.JSONPathKey(this=next_key.name))
 6703                elif self._match(TokenType.L_BRACKET):
 6704                    bracket_expr = self._parse_bracket_key_value()
 6705
 6706                    if not self._match(TokenType.R_BRACKET):
 6707                        self.raise_error("Expected ]")
 6708
 6709                    if bracket_expr:
 6710                        if bracket_expr.is_string:
 6711                            path_parts.append(exp.JSONPathKey(this=bracket_expr.name))
 6712                            escape = True
 6713                        elif bracket_expr.is_star:
 6714                            path_parts.append(exp.JSONPathSubscript(this=exp.JSONPathWildcard()))
 6715                        elif bracket_expr.is_number:
 6716                            path_parts.append(exp.JSONPathSubscript(this=bracket_expr.to_py()))
 6717                        else:
 6718                            this, path_parts = self._build_json_extract(this, path_parts, escape)
 6719                            escape = None
 6720
 6721                            this = self.expression(
 6722                                exp.Bracket(
 6723                                    this=this, expressions=[bracket_expr], json_access=True
 6724                                ),
 6725                            )
 6726
 6727                elif self._match(TokenType.DCOLON):
 6728                    this, path_parts = self._build_json_extract(this, path_parts, escape)
 6729                    escape = None
 6730
 6731                    cast_type = self._parse_types()
 6732                    if cast_type:
 6733                        this = self.expression(exp.Cast(this=this, to=cast_type))
 6734                    else:
 6735                        self.raise_error("Expected type after '::'")
 6736                else:
 6737                    break
 6738
 6739        this, _ = self._build_json_extract(this, path_parts, escape)
 6740
 6741        return this
 6742
 6743    def _parse_dcolon(self) -> exp.Expr | None:
 6744        return self._parse_types()
 6745
 6746    def _parse_column_ops(self, this: exp.Expr | None) -> exp.Expr | None:
 6747        while self._curr.token_type in self.BRACKETS:
 6748            this = self._parse_bracket(this)
 6749
 6750        column_operators = self.COLUMN_OPERATORS
 6751        cast_column_operators = self.CAST_COLUMN_OPERATORS
 6752        while self._curr:
 6753            op_token = self._curr.token_type
 6754
 6755            if op_token not in column_operators:
 6756                break
 6757            op = column_operators[op_token]
 6758            self._advance()
 6759
 6760            if op_token in cast_column_operators:
 6761                field = self._parse_dcolon()
 6762                if not field:
 6763                    self.raise_error("Expected type")
 6764            elif op and self._curr:
 6765                field = self._parse_column_reference() or self._parse_bitwise()
 6766                if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False):
 6767                    field = self._parse_column_ops(field)
 6768            else:
 6769                field = self._parse_field(any_token=True, anonymous_func=True)
 6770
 6771            # Function calls can be qualified, e.g., x.y.FOO()
 6772            # This converts the final AST to a series of Dots leading to the function call
 6773            # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules
 6774            if isinstance(field, (exp.Func, exp.Window)) and this:
 6775                this = this.transform(
 6776                    lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n
 6777                )
 6778
 6779            if op:
 6780                this = op(self, this, field)
 6781            elif isinstance(this, exp.Column) and not this.args.get("catalog"):
 6782                this = self.expression(
 6783                    exp.Column(
 6784                        this=field,
 6785                        table=this.this,
 6786                        db=this.args.get("table"),
 6787                        catalog=this.args.get("db"),
 6788                    ),
 6789                    comments=this.comments,
 6790                )
 6791            elif isinstance(field, exp.Window):
 6792                # Move the exp.Dot's to the window's function
 6793                window_func = self.expression(exp.Dot(this=this, expression=field.this))
 6794                field.set("this", window_func)
 6795                this = field
 6796            else:
 6797                this = self.expression(exp.Dot(this=this, expression=field))
 6798
 6799            if field and field.comments:
 6800                t.cast(exp.Expr, this).add_comments(field.pop_comments())
 6801
 6802            this = self._parse_bracket(this)
 6803
 6804        return this
 6805
 6806    def _parse_paren(self) -> exp.Expr | None:
 6807        if not self._match(TokenType.L_PAREN):
 6808            return None
 6809
 6810        comments = self._prev_comments
 6811        query = self._parse_select()
 6812
 6813        if query:
 6814            expressions = [query]
 6815        else:
 6816            expressions = self._parse_expressions()
 6817
 6818        this = seq_get(expressions, 0)
 6819
 6820        if not this and self._match(TokenType.R_PAREN, advance=False):
 6821            this = self.expression(exp.Tuple())
 6822        elif isinstance(this, exp.UNWRAPPED_QUERIES):
 6823            this = self._parse_subquery(this=this, parse_alias=False)
 6824        elif isinstance(this, (exp.Subquery, exp.Values)):
 6825            this = self._parse_subquery(
 6826                this=self._parse_query_modifiers(self._parse_set_operations(this)),
 6827                parse_alias=False,
 6828            )
 6829        elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA:
 6830            this = self.expression(exp.Tuple(expressions=expressions))
 6831        else:
 6832            this = self.expression(exp.Paren(this=this))
 6833
 6834        if this:
 6835            this.add_comments(comments)
 6836
 6837        self._match_r_paren(expression=this)
 6838
 6839        if isinstance(this, exp.Paren) and isinstance(this.this, exp.AggFunc):
 6840            return self._parse_window(this)
 6841
 6842        return this
 6843
 6844    def _parse_primary(self) -> exp.Expr | None:
 6845        if self._match_set(self.PRIMARY_PARSERS):
 6846            token_type = self._prev.token_type
 6847            primary = self.PRIMARY_PARSERS[token_type](self, self._prev)
 6848
 6849            if token_type == TokenType.STRING:
 6850                expressions = [primary]
 6851                while self._match(TokenType.STRING):
 6852                    expressions.append(exp.Literal.string(self._prev.text))
 6853
 6854                if len(expressions) > 1:
 6855                    return self.expression(
 6856                        exp.Concat(expressions=expressions, coalesce=self.dialect.CONCAT_COALESCE)
 6857                    )
 6858
 6859            return primary
 6860
 6861        if self._match_pair(TokenType.DOT, TokenType.NUMBER):
 6862            return exp.Literal.number(f"0.{self._prev.text}")
 6863
 6864        return self._parse_paren()
 6865
 6866    def _parse_field(
 6867        self,
 6868        any_token: bool = False,
 6869        tokens: t.Collection[TokenType] | None = None,
 6870        anonymous_func: bool = False,
 6871    ) -> exp.Expr | None:
 6872        if anonymous_func:
 6873            field = (
 6874                self._parse_function(anonymous=anonymous_func, any_token=any_token)
 6875                or self._parse_primary()
 6876            )
 6877        else:
 6878            field = self._parse_primary() or self._parse_function(
 6879                anonymous=anonymous_func, any_token=any_token
 6880            )
 6881        return field or self._parse_id_var(any_token=any_token, tokens=tokens)
 6882
 6883    def _parse_function(
 6884        self,
 6885        functions: dict[str, t.Callable] | None = None,
 6886        anonymous: bool = False,
 6887        optional_parens: bool = True,
 6888        any_token: bool = False,
 6889    ) -> exp.Expr | None:
 6890        # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this)
 6891        # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences
 6892        fn_syntax = False
 6893        if (
 6894            self._match(TokenType.L_BRACE, advance=False)
 6895            and self._next
 6896            and self._next.text.upper() == "FN"
 6897        ):
 6898            self._advance(2)
 6899            fn_syntax = True
 6900
 6901        func = self._parse_function_call(
 6902            functions=functions,
 6903            anonymous=anonymous,
 6904            optional_parens=optional_parens,
 6905            any_token=any_token,
 6906        )
 6907
 6908        if fn_syntax:
 6909            self._match(TokenType.R_BRACE)
 6910
 6911        return func
 6912
 6913    def _parse_function_args(self, alias: bool = False) -> list[exp.Expr]:
 6914        return self._parse_csv(lambda: self._parse_lambda(alias=alias))
 6915
 6916    def _parse_function_call(
 6917        self,
 6918        functions: dict[str, t.Callable] | None = None,
 6919        anonymous: bool = False,
 6920        optional_parens: bool = True,
 6921        any_token: bool = False,
 6922    ) -> exp.Expr | None:
 6923        if not self._curr:
 6924            return None
 6925
 6926        comments = self._curr.comments
 6927        prev = self._prev
 6928        token = self._curr
 6929        token_type = self._curr.token_type
 6930        this: str | exp.Expr = self._curr.text
 6931        upper = self._curr.text.upper()
 6932
 6933        after_dot = prev.token_type == TokenType.DOT
 6934        parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper)
 6935        if (
 6936            optional_parens
 6937            and parser
 6938            and token_type not in self.INVALID_FUNC_NAME_TOKENS
 6939            and not after_dot
 6940        ):
 6941            self._advance()
 6942            return self._parse_window(parser(self))
 6943
 6944        if self._next.token_type != TokenType.L_PAREN:
 6945            if optional_parens and token_type in self.NO_PAREN_FUNCTIONS and not after_dot:
 6946                self._advance()
 6947                return self.expression(self.NO_PAREN_FUNCTIONS[token_type]())
 6948
 6949            return None
 6950
 6951        if any_token:
 6952            if token_type in self.RESERVED_TOKENS:
 6953                return None
 6954        elif token_type not in self.FUNC_TOKENS:
 6955            return None
 6956
 6957        self._advance(2)
 6958
 6959        parser = self.FUNCTION_PARSERS.get(upper)
 6960        if parser and not anonymous:
 6961            result = parser(self)
 6962        else:
 6963            subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type)
 6964
 6965            if subquery_predicate:
 6966                expr = None
 6967                if self._curr.token_type in self.SUBQUERY_TOKENS:
 6968                    expr = self._parse_select()
 6969                    self._match_r_paren()
 6970                elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE):
 6971                    # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like
 6972                    # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren
 6973                    self._advance(-1)
 6974                    expr = self._parse_bitwise()
 6975
 6976                if expr:
 6977                    return self.expression(subquery_predicate(this=expr), comments=comments)
 6978
 6979            if functions is None:
 6980                functions = self.FUNCTIONS
 6981
 6982            function = functions.get(upper)
 6983            known_function = function and not anonymous
 6984
 6985            alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS
 6986            args = self._parse_function_args(alias)
 6987
 6988            post_func_comments = self._curr.comments if self._curr else None
 6989            if known_function and post_func_comments:
 6990                # If the user-inputted comment "/* sqlglot.anonymous */" is following the function
 6991                # call we'll construct it as exp.Anonymous, even if it's "known"
 6992                if any(
 6993                    comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS)
 6994                    for comment in post_func_comments
 6995                ):
 6996                    known_function = False
 6997
 6998            if alias and known_function:
 6999                args = self._kv_to_prop_eq(args)
 7000
 7001            if known_function:
 7002                func_builder = t.cast(t.Callable, function)
 7003
 7004                # mypyc compiled functions don't have __code__, so we use
 7005                # try/except to check if func_builder accepts 'dialect'.
 7006                try:
 7007                    func = func_builder(args)
 7008                except TypeError:
 7009                    func = func_builder(args, dialect=self.dialect)
 7010
 7011                func = self.validate_expression(func, args)
 7012                if self.dialect.PRESERVE_ORIGINAL_NAMES:
 7013                    func.meta["name"] = this
 7014
 7015                result = func
 7016            else:
 7017                if token_type == TokenType.IDENTIFIER:
 7018                    this = exp.Identifier(this=this, quoted=True).update_positions(token)
 7019
 7020                result = self.expression(exp.Anonymous(this=this, expressions=args))
 7021
 7022            result = result.update_positions(token)
 7023
 7024        if isinstance(result, exp.Expr):
 7025            result.add_comments(comments)
 7026
 7027        if parser:
 7028            self._match(TokenType.R_PAREN, expression=result)
 7029        else:
 7030            self._match_r_paren(result)
 7031        return self._parse_window(result)
 7032
 7033    def _to_prop_eq(self, expression: exp.Expr, index: int) -> exp.Expr:
 7034        return expression
 7035
 7036    def _kv_to_prop_eq(
 7037        self, expressions: list[exp.Expr], parse_map: bool = False
 7038    ) -> list[exp.Expr]:
 7039        transformed = []
 7040
 7041        for index, e in enumerate(expressions):
 7042            if isinstance(e, self.KEY_VALUE_DEFINITIONS):
 7043                if isinstance(e, exp.Alias):
 7044                    e = self.expression(exp.PropertyEQ(this=e.args.get("alias"), expression=e.this))
 7045
 7046                if not isinstance(e, exp.PropertyEQ):
 7047                    e = self.expression(
 7048                        exp.PropertyEQ(
 7049                            this=e.this if parse_map else exp.to_identifier(e.this.name),
 7050                            expression=e.expression,
 7051                        )
 7052                    )
 7053
 7054                if isinstance(e.this, exp.Column):
 7055                    e.this.replace(e.this.this)
 7056            else:
 7057                e = self._to_prop_eq(e, index)
 7058
 7059            transformed.append(e)
 7060
 7061        return transformed
 7062
 7063    def _parse_function_properties(self) -> exp.Properties | None:
 7064        # Skip the generic `key = value` fallback in _parse_property since this
 7065        # runs post-AS where a function body like `name = expr` can be misread
 7066        # as a property.
 7067        properties = []
 7068        while True:
 7069            if self._match_texts(self.PROPERTY_PARSERS):
 7070                prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self)
 7071            elif self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS):
 7072                prop = self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True)
 7073            else:
 7074                break
 7075            for p in ensure_list(prop):
 7076                properties.append(p)
 7077
 7078        return self.expression(exp.Properties(expressions=properties)) if properties else None
 7079
 7080    def _parse_user_defined_function_expression(self) -> exp.Expr | None:
 7081        return self._parse_statement()
 7082
 7083    def _parse_function_parameter(self) -> exp.Expr | None:
 7084        return self._parse_column_def(this=self._parse_id_var(), computed_column=False)
 7085
 7086    def _parse_user_defined_function(self, kind: TokenType | None = None) -> exp.Expr | None:
 7087        this = self._parse_table_parts(schema=True)
 7088
 7089        if not self._match(TokenType.L_PAREN):
 7090            return this
 7091
 7092        expressions = self._parse_csv(self._parse_function_parameter)
 7093        self._match_r_paren()
 7094        return self.expression(
 7095            exp.UserDefinedFunction(this=this, expressions=expressions, wrapped=True)
 7096        )
 7097
 7098    def _parse_macro_overloads(
 7099        self,
 7100        this: exp.UserDefinedFunction,
 7101        first_body: exp.Expr,
 7102        first_is_table: bool = False,
 7103    ) -> exp.MacroOverloads:
 7104        overloads = [
 7105            self.expression(
 7106                exp.MacroOverload(
 7107                    this=first_body,
 7108                    expressions=this.expressions or None,
 7109                    is_table=first_is_table,
 7110                )
 7111            )
 7112        ]
 7113        this.set("expressions", None)
 7114        this.set("wrapped", False)
 7115
 7116        while self._match(TokenType.COMMA):
 7117            if not self._match(TokenType.L_PAREN):
 7118                break
 7119
 7120            params = self._parse_csv(self._parse_function_parameter)
 7121            self._match_r_paren()
 7122
 7123            if not self._match(TokenType.ALIAS):
 7124                break
 7125
 7126            is_table = self._match(TokenType.TABLE)
 7127            body = self._parse_expression()
 7128            macro = exp.MacroOverload(this=body, expressions=params, is_table=is_table)
 7129            overloads.append(self.expression(macro))
 7130
 7131        return self.expression(exp.MacroOverloads(expressions=overloads))
 7132
 7133    def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier:
 7134        literal = self._parse_primary()
 7135        if literal:
 7136            return self.expression(exp.Introducer(this=token.text, expression=literal), token)
 7137
 7138        return self._identifier_expression(token)
 7139
 7140    def _parse_session_parameter(self) -> exp.SessionParameter:
 7141        kind = None
 7142        this = self._parse_id_var() or self._parse_primary()
 7143
 7144        if this and self._match(TokenType.DOT):
 7145            kind = this.name
 7146            this = self._parse_var() or self._parse_primary()
 7147
 7148        return self.expression(exp.SessionParameter(this=this, kind=kind))
 7149
 7150    def _parse_lambda_arg(self) -> exp.Expr | None:
 7151        return self._parse_id_var()
 7152
 7153    def _parse_lambda(self, alias: bool = False) -> exp.Expr | None:
 7154        next_token_type = self._next.token_type
 7155
 7156        # Fast path: simple atom (column, literal, null, bool) followed by , or )
 7157        if (
 7158            next_token_type in self.LAMBDA_ARG_TERMINATORS
 7159            and (atom := self._parse_atom()) is not None
 7160        ):
 7161            return atom
 7162
 7163        index = self._index
 7164
 7165        if self._match(TokenType.L_PAREN):
 7166            expressions = t.cast(
 7167                list[t.Optional[exp.Expr]], self._parse_csv(self._parse_lambda_arg)
 7168            )
 7169
 7170            if not self._match(TokenType.R_PAREN):
 7171                self._retreat(index)
 7172            elif self._match_set(self.LAMBDAS):
 7173                return self.LAMBDAS[self._prev.token_type](self, expressions)
 7174            else:
 7175                self._retreat(index)
 7176        elif self.TYPED_LAMBDA_ARGS or next_token_type in self.LAMBDAS:
 7177            expressions = [self._parse_lambda_arg()]
 7178
 7179            if self._match_set(self.LAMBDAS):
 7180                return self.LAMBDAS[self._prev.token_type](self, expressions)
 7181
 7182            self._retreat(index)
 7183
 7184        this: exp.Expr | None
 7185
 7186        if self._match(TokenType.DISTINCT):
 7187            this = self.expression(
 7188                exp.Distinct(expressions=self._parse_csv(self._parse_disjunction))
 7189            )
 7190        else:
 7191            self._match(TokenType.ALL)  # ALL is the default/no-op aggregate modifier (SQL-92)
 7192            this = self._parse_select_or_expression(alias=alias)
 7193
 7194        return self._parse_limit(
 7195            self._parse_respect_or_ignore_nulls(
 7196                self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this)))
 7197            )
 7198        )
 7199
 7200    def _parse_schema(self, this: exp.Expr | None = None) -> exp.Expr | None:
 7201        index = self._index
 7202        if not self._match(TokenType.L_PAREN):
 7203            return this
 7204
 7205        # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>),
 7206        # expr can be of both types
 7207        if self._match_set(self.SELECT_START_TOKENS):
 7208            self._retreat(index)
 7209            return this
 7210        args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def())
 7211        self._match_r_paren()
 7212        return self.expression(exp.Schema(this=this, expressions=args))
 7213
 7214    def _parse_field_def(self) -> exp.Expr | None:
 7215        return self._parse_column_def(self._parse_field(any_token=True))
 7216
 7217    def _parse_column_def(
 7218        self, this: exp.Expr | None, computed_column: bool = True
 7219    ) -> exp.Expr | None:
 7220        # column defs are not really columns, they're identifiers
 7221        if isinstance(this, exp.Column):
 7222            this = this.this
 7223
 7224        if not computed_column:
 7225            self._match(TokenType.ALIAS)
 7226
 7227        kind = self._parse_types(schema=True)
 7228
 7229        if self._match_text_seq("FOR", "ORDINALITY"):
 7230            return self.expression(exp.ColumnDef(this=this, ordinality=True))
 7231
 7232        constraints: list[exp.Expr] = []
 7233
 7234        if (not kind and self._match(TokenType.ALIAS)) or self._match_texts(
 7235            ("ALIAS", "MATERIALIZED")
 7236        ):
 7237            persisted = self._prev.text.upper() == "MATERIALIZED"
 7238            constraint_kind = exp.ComputedColumnConstraint(
 7239                this=self._parse_disjunction(),
 7240                persisted=persisted or self._match_text_seq("PERSISTED"),
 7241                data_type=exp.Var(this="AUTO")
 7242                if self._match_text_seq("AUTO")
 7243                else self._parse_types(),
 7244                not_null=self._match_pair(TokenType.NOT, TokenType.NULL),
 7245            )
 7246            constraints.append(self.expression(exp.ColumnConstraint(kind=constraint_kind)))
 7247        elif not kind and self._match_set({TokenType.IN, TokenType.OUT}, advance=False):
 7248            in_out_constraint = self.expression(
 7249                exp.InOutColumnConstraint(
 7250                    input_=self._match(TokenType.IN), output=self._match(TokenType.OUT)
 7251                )
 7252            )
 7253            constraints.append(in_out_constraint)
 7254            kind = self._parse_types()
 7255        elif (
 7256            kind
 7257            and self._match(TokenType.ALIAS, advance=False)
 7258            and (
 7259                not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
 7260                or self._next.token_type == TokenType.L_PAREN
 7261            )
 7262        ):
 7263            self._advance()
 7264            constraints.append(
 7265                self.expression(
 7266                    exp.ColumnConstraint(
 7267                        kind=exp.ComputedColumnConstraint(
 7268                            this=self._parse_disjunction(),
 7269                            persisted=self._match_texts(("STORED", "VIRTUAL"))
 7270                            and self._prev.text.upper() == "STORED",
 7271                        )
 7272                    )
 7273                )
 7274            )
 7275
 7276        while True:
 7277            constraint = self._parse_column_constraint()
 7278            if not constraint:
 7279                break
 7280            constraints.append(constraint)
 7281
 7282        if not kind and not constraints:
 7283            return this
 7284
 7285        position = None
 7286        if self._match_texts(("FIRST", "AFTER")):
 7287            pos = self._prev.text
 7288            position = self.expression(exp.ColumnPosition(this=self._parse_column(), position=pos))
 7289
 7290        return self.expression(
 7291            exp.ColumnDef(this=this, kind=kind, constraints=constraints, position=position)
 7292        )
 7293
 7294    def _parse_auto_increment(
 7295        self,
 7296    ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint:
 7297        start = None
 7298        increment = None
 7299        order = None
 7300
 7301        if self._match(TokenType.L_PAREN, advance=False):
 7302            args = self._parse_wrapped_csv(self._parse_bitwise)
 7303            start = seq_get(args, 0)
 7304            increment = seq_get(args, 1)
 7305        elif self._match_text_seq("START"):
 7306            start = self._parse_bitwise()
 7307            self._match_text_seq("INCREMENT")
 7308            increment = self._parse_bitwise()
 7309            if self._match_text_seq("ORDER"):
 7310                order = True
 7311            elif self._match_text_seq("NOORDER"):
 7312                order = False
 7313
 7314        if start and increment:
 7315            return exp.GeneratedAsIdentityColumnConstraint(
 7316                start=start, increment=increment, this=False, order=order
 7317            )
 7318
 7319        return exp.AutoIncrementColumnConstraint()
 7320
 7321    def _parse_check_constraint(self) -> exp.CheckColumnConstraint | None:
 7322        if not self._match(TokenType.L_PAREN, advance=False):
 7323            return None
 7324
 7325        return self.expression(
 7326            exp.CheckColumnConstraint(
 7327                this=self._parse_wrapped(self._parse_assignment),
 7328                enforced=self._match_text_seq("ENFORCED"),
 7329            )
 7330        )
 7331
 7332    def _parse_auto_property(self) -> exp.AutoRefreshProperty | None:
 7333        if not self._match_text_seq("REFRESH"):
 7334            self._retreat(self._index - 1)
 7335            return None
 7336        return self.expression(exp.AutoRefreshProperty(this=self._parse_var(upper=True)))
 7337
 7338    def _parse_compress(self) -> exp.CompressColumnConstraint:
 7339        if self._match(TokenType.L_PAREN, advance=False):
 7340            return self.expression(
 7341                exp.CompressColumnConstraint(this=self._parse_wrapped_csv(self._parse_bitwise))
 7342            )
 7343
 7344        return self.expression(exp.CompressColumnConstraint(this=self._parse_bitwise()))
 7345
 7346    def _parse_generated_as_identity(
 7347        self,
 7348    ) -> (
 7349        exp.GeneratedAsIdentityColumnConstraint
 7350        | exp.ComputedColumnConstraint
 7351        | exp.GeneratedAsRowColumnConstraint
 7352    ):
 7353        if self._match_text_seq("BY", "DEFAULT"):
 7354            on_null = self._match_pair(TokenType.ON, TokenType.NULL)
 7355            this = self.expression(
 7356                exp.GeneratedAsIdentityColumnConstraint(this=False, on_null=on_null)
 7357            )
 7358        else:
 7359            self._match_text_seq("ALWAYS")
 7360            this = self.expression(exp.GeneratedAsIdentityColumnConstraint(this=True))
 7361
 7362        self._match(TokenType.ALIAS)
 7363
 7364        if self._match_text_seq("ROW"):
 7365            start = self._match_text_seq("START")
 7366            if not start:
 7367                self._match(TokenType.END)
 7368            hidden = self._match_text_seq("HIDDEN")
 7369            return self.expression(exp.GeneratedAsRowColumnConstraint(start=start, hidden=hidden))
 7370
 7371        identity = self._match_text_seq("IDENTITY")
 7372
 7373        if self._match(TokenType.L_PAREN):
 7374            if self._match(TokenType.START_WITH):
 7375                this.set("start", self._parse_bitwise())
 7376            if self._match_text_seq("INCREMENT", "BY"):
 7377                this.set("increment", self._parse_bitwise())
 7378            if self._match_text_seq("MINVALUE"):
 7379                this.set("minvalue", self._parse_bitwise())
 7380            if self._match_text_seq("MAXVALUE"):
 7381                this.set("maxvalue", self._parse_bitwise())
 7382
 7383            if self._match_text_seq("CYCLE"):
 7384                this.set("cycle", True)
 7385            elif self._match_text_seq("NO", "CYCLE"):
 7386                this.set("cycle", False)
 7387
 7388            if not identity:
 7389                this.set("expression", self._parse_range())
 7390            elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False):
 7391                args = self._parse_csv(self._parse_bitwise)
 7392                this.set("start", seq_get(args, 0))
 7393                this.set("increment", seq_get(args, 1))
 7394
 7395            self._match_r_paren()
 7396
 7397        return this
 7398
 7399    def _parse_inline(self) -> exp.InlineLengthColumnConstraint:
 7400        self._match_text_seq("LENGTH")
 7401        return self.expression(exp.InlineLengthColumnConstraint(this=self._parse_bitwise()))
 7402
 7403    def _parse_not_constraint(self) -> exp.Expr | None:
 7404        if self._match_text_seq("NULL"):
 7405            return self.expression(exp.NotNullColumnConstraint())
 7406        if self._match_text_seq("CASESPECIFIC"):
 7407            return self.expression(exp.CaseSpecificColumnConstraint(not_=True))
 7408        if self._match_text_seq("FOR", "REPLICATION"):
 7409            return self.expression(exp.NotForReplicationColumnConstraint())
 7410
 7411        # Unconsume the `NOT` token
 7412        self._retreat(self._index - 1)
 7413        return None
 7414
 7415    def _parse_column_constraint(self) -> exp.Expr | None:
 7416        this = self._parse_id_var() if self._match(TokenType.CONSTRAINT) else None
 7417
 7418        procedure_option_follows = (
 7419            self._match(TokenType.WITH, advance=False)
 7420            and self._next
 7421            and self._next.text.upper() in self.PROCEDURE_OPTIONS
 7422        )
 7423
 7424        if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS):
 7425            constraint = self.CONSTRAINT_PARSERS[self._prev.text.upper()](self)
 7426            if not constraint:
 7427                self._retreat(self._index - 1)
 7428                return None
 7429
 7430            return self.expression(exp.ColumnConstraint(this=this, kind=constraint))
 7431
 7432        return this
 7433
 7434    def _parse_constraint(self) -> exp.Expr | None:
 7435        if not self._match(TokenType.CONSTRAINT):
 7436            return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS)
 7437
 7438        return self.expression(
 7439            exp.Constraint(this=self._parse_id_var(), expressions=self._parse_unnamed_constraints())
 7440        )
 7441
 7442    def _parse_unnamed_constraints(self) -> list[exp.Expr]:
 7443        constraints = []
 7444        while True:
 7445            constraint = self._parse_unnamed_constraint() or self._parse_function()
 7446            if not constraint:
 7447                break
 7448            constraints.append(constraint)
 7449
 7450        return constraints
 7451
 7452    def _parse_unnamed_constraint(
 7453        self, constraints: t.Collection[str] | None = None
 7454    ) -> exp.Expr | None:
 7455        index = self._index
 7456
 7457        if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts(
 7458            constraints or self.CONSTRAINT_PARSERS
 7459        ):
 7460            return None
 7461
 7462        constraint_key = self._prev.text.upper()
 7463        if constraint_key not in self.CONSTRAINT_PARSERS:
 7464            self.raise_error(f"No parser found for schema constraint {constraint_key}.")
 7465
 7466        result = self.CONSTRAINT_PARSERS[constraint_key](self)
 7467        if not result:
 7468            self._retreat(index)
 7469
 7470        return result
 7471
 7472    def _parse_unique_key(self) -> exp.Expr | None:
 7473        if (
 7474            self._curr
 7475            and self._curr.token_type != TokenType.IDENTIFIER
 7476            and self._curr.text.upper() in self.CONSTRAINT_PARSERS
 7477        ):
 7478            return None
 7479        return self._parse_id_var(any_token=False)
 7480
 7481    def _parse_unique(self) -> exp.UniqueColumnConstraint:
 7482        self._match_texts(("KEY", "INDEX"))
 7483        return self.expression(
 7484            exp.UniqueColumnConstraint(
 7485                nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"),
 7486                this=self._parse_schema(self._parse_unique_key()),
 7487                index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text,
 7488                on_conflict=self._parse_on_conflict(),
 7489                options=self._parse_key_constraint_options(),
 7490            )
 7491        )
 7492
 7493    def _parse_key_constraint_options(self) -> list[str]:
 7494        options = []
 7495        while True:
 7496            if not self._curr:
 7497                break
 7498
 7499            if self._match(TokenType.ON):
 7500                action = None
 7501                on = self._advance_any() and self._prev.text
 7502
 7503                if self._match_text_seq("NO", "ACTION"):
 7504                    action = "NO ACTION"
 7505                elif self._match_text_seq("CASCADE"):
 7506                    action = "CASCADE"
 7507                elif self._match_text_seq("RESTRICT"):
 7508                    action = "RESTRICT"
 7509                elif self._match_pair(TokenType.SET, TokenType.NULL):
 7510                    action = "SET NULL"
 7511                elif self._match_pair(TokenType.SET, TokenType.DEFAULT):
 7512                    action = "SET DEFAULT"
 7513                else:
 7514                    self.raise_error("Invalid key constraint")
 7515
 7516                options.append(f"ON {on} {action}")
 7517            else:
 7518                var = self._parse_var_from_options(
 7519                    self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False
 7520                )
 7521                if not var:
 7522                    break
 7523                options.append(var.name)
 7524
 7525        return options
 7526
 7527    def _parse_references(self, match: bool = True) -> exp.Reference | None:
 7528        if match and not self._match(TokenType.REFERENCES):
 7529            return None
 7530
 7531        expressions: list | None = None
 7532        this = self._parse_table(schema=True)
 7533        options = self._parse_key_constraint_options()
 7534        return self.expression(exp.Reference(this=this, expressions=expressions, options=options))
 7535
 7536    def _parse_foreign_key(self) -> exp.ForeignKey:
 7537        expressions = (
 7538            self._parse_wrapped_id_vars()
 7539            if not self._match(TokenType.REFERENCES, advance=False)
 7540            else None
 7541        )
 7542        reference = self._parse_references()
 7543        on_options = {}
 7544
 7545        while self._match(TokenType.ON):
 7546            if not self._match_set((TokenType.DELETE, TokenType.UPDATE)):
 7547                self.raise_error("Expected DELETE or UPDATE")
 7548
 7549            kind = self._prev.text.lower()
 7550
 7551            if self._match_text_seq("NO", "ACTION"):
 7552                action = "NO ACTION"
 7553            elif self._match(TokenType.SET):
 7554                self._match_set((TokenType.NULL, TokenType.DEFAULT))
 7555                action = "SET " + self._prev.text.upper()
 7556            else:
 7557                self._advance()
 7558                action = self._prev.text.upper()
 7559
 7560            on_options[kind] = action
 7561
 7562        return self.expression(
 7563            exp.ForeignKey(
 7564                expressions=expressions,
 7565                reference=reference,
 7566                options=self._parse_key_constraint_options(),
 7567                **on_options,
 7568            )
 7569        )
 7570
 7571    def _parse_primary_key_part(self) -> exp.Expr | None:
 7572        return self._parse_field()
 7573
 7574    def _parse_period_for_system_time(self) -> exp.PeriodForSystemTimeConstraint | None:
 7575        if not self._match(TokenType.TIMESTAMP_SNAPSHOT):
 7576            self._retreat(self._index - 1)
 7577            return None
 7578
 7579        id_vars = self._parse_wrapped_id_vars()
 7580        return self.expression(
 7581            exp.PeriodForSystemTimeConstraint(
 7582                this=seq_get(id_vars, 0), expression=seq_get(id_vars, 1)
 7583            )
 7584        )
 7585
 7586    def _parse_primary_key(
 7587        self,
 7588        wrapped_optional: bool = False,
 7589        in_props: bool = False,
 7590        named_primary_key: bool = False,
 7591    ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey:
 7592        desc = (
 7593            self._prev.token_type == TokenType.DESC
 7594            if self._match_set((TokenType.ASC, TokenType.DESC))
 7595            else None
 7596        )
 7597
 7598        this = None
 7599        if (
 7600            named_primary_key
 7601            and self._curr.text.upper() not in self.CONSTRAINT_PARSERS
 7602            and self._next
 7603            and self._next.token_type == TokenType.L_PAREN
 7604        ):
 7605            this = self._parse_id_var()
 7606
 7607        if not in_props and not self._match(TokenType.L_PAREN, advance=False):
 7608            return self.expression(
 7609                exp.PrimaryKeyColumnConstraint(
 7610                    desc=desc, options=self._parse_key_constraint_options()
 7611                )
 7612            )
 7613
 7614        expressions = self._parse_wrapped_csv(
 7615            self._parse_primary_key_part, optional=wrapped_optional
 7616        )
 7617
 7618        return self.expression(
 7619            exp.PrimaryKey(
 7620                this=this,
 7621                expressions=expressions,
 7622                include=self._parse_index_params(),
 7623                options=self._parse_key_constraint_options(),
 7624            )
 7625        )
 7626
 7627    def _parse_bracket_key_value(self, is_map: bool = False) -> exp.Expr | None:
 7628        return self._parse_slice(self._parse_alias(self._parse_disjunction(), explicit=True))
 7629
 7630    def _parse_odbc_datetime_literal(self) -> exp.Expr:
 7631        """
 7632        Parses a datetime column in ODBC format. We parse the column into the corresponding
 7633        types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the
 7634        same as we did for `DATE('yyyy-mm-dd')`.
 7635
 7636        Reference:
 7637        https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals
 7638        """
 7639        self._match(TokenType.VAR)
 7640        exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()]
 7641        expression = self.expression(exp_class(this=self._parse_string()))
 7642        if not self._match(TokenType.R_BRACE):
 7643            self.raise_error("Expected }")
 7644        return expression
 7645
 7646    def _parse_bracket(self, this: exp.Expr | None = None) -> exp.Expr | None:
 7647        if not self._match_set(self.BRACKETS):
 7648            return this
 7649
 7650        if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS:
 7651            map_token = seq_get(self._tokens, self._index - 2)
 7652            parse_map = map_token is not None and map_token.text.upper() == "MAP"
 7653        else:
 7654            parse_map = False
 7655
 7656        bracket_kind = self._prev.token_type
 7657        if (
 7658            bracket_kind == TokenType.L_BRACE
 7659            and self._curr
 7660            and self._curr.token_type == TokenType.VAR
 7661            and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS
 7662        ):
 7663            return self._parse_odbc_datetime_literal()
 7664
 7665        expressions = self._parse_csv(
 7666            lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE)
 7667        )
 7668
 7669        if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET):
 7670            self.raise_error("Expected ]")
 7671        elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE):
 7672            self.raise_error("Expected }")
 7673
 7674        # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs
 7675        if bracket_kind == TokenType.L_BRACE:
 7676            this = self.expression(
 7677                exp.Struct(
 7678                    expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map)
 7679                )
 7680            )
 7681        elif not this:
 7682            this = build_array_constructor(
 7683                exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect
 7684            )
 7685        else:
 7686            constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper())
 7687            if constructor_type:
 7688                return build_array_constructor(
 7689                    constructor_type,
 7690                    args=expressions,
 7691                    bracket_kind=bracket_kind,
 7692                    dialect=self.dialect,
 7693                )
 7694
 7695            expressions = apply_index_offset(
 7696                this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect
 7697            )
 7698            this = self.expression(
 7699                exp.Bracket(this=this, expressions=expressions), comments=this.pop_comments()
 7700            )
 7701
 7702        self._add_comments(this)
 7703        return self._parse_bracket(this)
 7704
 7705    def _parse_slice(self, this: exp.Expr | None) -> exp.Expr | None:
 7706        if not self._match(TokenType.COLON):
 7707            return this
 7708
 7709        if self._match_pair(TokenType.DASH, TokenType.COLON, advance=False):
 7710            self._advance()
 7711            end: exp.Expr | None = -exp.Literal.number("1")
 7712        else:
 7713            end = self._parse_assignment()
 7714        step = self._parse_unary() if self._match(TokenType.COLON) else None
 7715        return self.expression(exp.Slice(this=this, expression=end, step=step))
 7716
 7717    def _parse_case(self) -> exp.Expr | None:
 7718        if self._match(TokenType.DOT, advance=False):
 7719            # Avoid raising on valid expressions like case.*, supported by, e.g., spark & snowflake
 7720            self._retreat(self._index - 1)
 7721            return None
 7722
 7723        ifs = []
 7724        default = None
 7725
 7726        comments = self._prev_comments
 7727        expression = self._parse_disjunction()
 7728
 7729        while self._match(TokenType.WHEN):
 7730            this = self._parse_disjunction()
 7731            self._match(TokenType.THEN)
 7732            then = self._parse_disjunction()
 7733            ifs.append(self.expression(exp.If(this=this, true=then)))
 7734
 7735        if self._match(TokenType.ELSE):
 7736            default = self._parse_disjunction()
 7737
 7738        if not self._match(TokenType.END):
 7739            if isinstance(default, exp.Interval) and default.this.sql().upper() == "END":
 7740                default = exp.column("interval")
 7741            else:
 7742                self.raise_error("Expected END after CASE", self._prev)
 7743
 7744        return self.expression(
 7745            exp.Case(this=expression, ifs=ifs, default=default), comments=comments
 7746        )
 7747
 7748    def _parse_if(self) -> exp.Expr | None:
 7749        if self._match(TokenType.L_PAREN):
 7750            args = self._parse_csv(
 7751                lambda: self._parse_alias(self._parse_assignment(), explicit=True)
 7752            )
 7753            this = self.validate_expression(exp.If.from_arg_list(args), args)
 7754            self._match_r_paren()
 7755        else:
 7756            index = self._index - 1
 7757
 7758            if self.NO_PAREN_IF_COMMANDS and index == 0:
 7759                return self._parse_as_command(self._prev)
 7760
 7761            condition = self._parse_disjunction()
 7762
 7763            if not condition:
 7764                self._retreat(index)
 7765                return None
 7766
 7767            self._match(TokenType.THEN)
 7768            true = self._parse_disjunction()
 7769            false = self._parse_disjunction() if self._match(TokenType.ELSE) else None
 7770            self._match(TokenType.END)
 7771            this = self.expression(exp.If(this=condition, true=true, false=false))
 7772
 7773        return this
 7774
 7775    def _parse_next_value_for(self) -> exp.Expr | None:
 7776        if not self._match_text_seq("VALUE", "FOR"):
 7777            self._retreat(self._index - 1)
 7778            return None
 7779
 7780        return self.expression(
 7781            exp.NextValueFor(
 7782                this=self._parse_column(),
 7783                order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order),
 7784            )
 7785        )
 7786
 7787    def _parse_extract(self) -> exp.Extract | exp.Anonymous:
 7788        this = self._parse_function() or self._parse_var_or_string(upper=True)
 7789
 7790        if self._match(TokenType.FROM):
 7791            return self.expression(exp.Extract(this=this, expression=self._parse_bitwise()))
 7792
 7793        if not self._match(TokenType.COMMA):
 7794            self.raise_error("Expected FROM or comma after EXTRACT", self._prev)
 7795
 7796        return self.expression(exp.Extract(this=this, expression=self._parse_bitwise()))
 7797
 7798    def _parse_gap_fill(self) -> exp.GapFill:
 7799        self._match(TokenType.TABLE)
 7800        this = self._parse_table()
 7801
 7802        self._match(TokenType.COMMA)
 7803        args = [this, *self._parse_csv(self._parse_lambda)]
 7804
 7805        gap_fill = exp.GapFill.from_arg_list(args)
 7806        return self.validate_expression(gap_fill, args)
 7807
 7808    def _parse_char(self) -> exp.Chr:
 7809        return self.expression(
 7810            exp.Chr(
 7811                expressions=self._parse_csv(self._parse_assignment),
 7812                charset=self._match(TokenType.USING) and self._parse_charset_name(),
 7813            )
 7814        )
 7815
 7816    def _parse_charset_name(self) -> exp.Expr | None:
 7817        """
 7818        Parse a charset name after USING or CHARACTER SET. Dialects that need to preserve quoting
 7819        for specific name shapes override this.
 7820        """
 7821        return self._parse_var(
 7822            tokens={TokenType.BINARY, TokenType.IDENTIFIER},
 7823        )
 7824
 7825    def _parse_cast(self, strict: bool, safe: bool | None = None) -> exp.Expr:
 7826        this = self._parse_assignment()
 7827
 7828        if not self._match(TokenType.ALIAS):
 7829            if self._match(TokenType.COMMA):
 7830                return self.expression(exp.CastToStrType(this=this, to=self._parse_string()))
 7831
 7832            self.raise_error("Expected AS after CAST")
 7833
 7834        fmt = None
 7835        to = self._parse_types(with_collation=True)
 7836
 7837        default = None
 7838        if self._match(TokenType.DEFAULT):
 7839            default = self._parse_bitwise()
 7840            self._match_text_seq("ON", "CONVERSION", "ERROR")
 7841
 7842        if self._match_set((TokenType.FORMAT, TokenType.COMMA)):
 7843            fmt_string = self._parse_wrapped(self._parse_string, optional=True)
 7844            fmt = self._parse_at_time_zone(fmt_string)
 7845
 7846            if not to:
 7847                to = exp.DType.UNKNOWN.into_expr()
 7848            if to.this in exp.DataType.TEMPORAL_TYPES:
 7849                this = self.expression(
 7850                    (exp.StrToDate if to.this == exp.DType.DATE else exp.StrToTime)(
 7851                        this=this,
 7852                        format=exp.Literal.string(
 7853                            format_time(
 7854                                fmt_string.this if fmt_string else "",
 7855                                self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING,
 7856                                self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE,
 7857                            )
 7858                        ),
 7859                        safe=safe,
 7860                    )
 7861                )
 7862
 7863                if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime):
 7864                    this.set("zone", fmt.args["zone"])
 7865                return this
 7866        elif not to:
 7867            self.raise_error("Expected TYPE after CAST")
 7868        elif isinstance(to, exp.Identifier):
 7869            to = exp.DataType.from_str(to.name, dialect=self.dialect, udt=True)
 7870        elif to.this == exp.DType.CHAR and self._match(TokenType.CHARACTER_SET):
 7871            to = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_var_or_string())
 7872
 7873        return self.build_cast(
 7874            strict=strict,
 7875            this=this,
 7876            to=to,
 7877            format=fmt,
 7878            safe=safe,
 7879            action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False),
 7880            default=default,
 7881        )
 7882
 7883    def _parse_string_agg(self) -> exp.GroupConcat:
 7884        if self._match(TokenType.DISTINCT):
 7885            args: list[exp.Expr | None] = [
 7886                self.expression(exp.Distinct(expressions=[self._parse_disjunction()]))
 7887            ]
 7888            if self._match(TokenType.COMMA):
 7889                args.extend(self._parse_csv(self._parse_disjunction))
 7890        else:
 7891            args = self._parse_csv(self._parse_disjunction)  # type: ignore
 7892
 7893        if self._match_text_seq("ON", "OVERFLOW"):
 7894            # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior])
 7895            if self._match_text_seq("ERROR"):
 7896                on_overflow: exp.Expr | None = exp.var("ERROR")
 7897            else:
 7898                self._match_text_seq("TRUNCATE")
 7899                on_overflow = self.expression(
 7900                    exp.OverflowTruncateBehavior(
 7901                        this=self._parse_string(),
 7902                        with_count=(
 7903                            self._match_text_seq("WITH", "COUNT")
 7904                            or not self._match_text_seq("WITHOUT", "COUNT")
 7905                        ),
 7906                    )
 7907                )
 7908        else:
 7909            on_overflow = None
 7910
 7911        index = self._index
 7912        if not self._match(TokenType.R_PAREN) and args:
 7913            # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]])
 7914            # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n])
 7915            # The order is parsed through `this` as a canonicalization for WITHIN GROUPs
 7916            args[0] = self._parse_limit(this=self._parse_order(this=args[0]))
 7917            return self.expression(exp.GroupConcat(this=args[0], separator=seq_get(args, 1)))
 7918
 7919        # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]).
 7920        # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that
 7921        # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them.
 7922        if not self._match_text_seq("WITHIN", "GROUP"):
 7923            self._retreat(index)
 7924            return self.validate_expression(exp.GroupConcat.from_arg_list(args), args)
 7925
 7926        # The corresponding match_r_paren will be called in parse_function (caller)
 7927        self._match_l_paren()
 7928
 7929        return self.expression(
 7930            exp.GroupConcat(
 7931                this=self._parse_order(this=seq_get(args, 0)),
 7932                separator=seq_get(args, 1),
 7933                on_overflow=on_overflow,
 7934            )
 7935        )
 7936
 7937    def _parse_convert(self, strict: bool, safe: bool | None = None) -> exp.Expr | None:
 7938        this = self._parse_bitwise()
 7939
 7940        if self._match(TokenType.USING):
 7941            to: exp.Expr | None = exp.DType.CHARACTER_SET.into_expr(kind=self._parse_charset_name())
 7942        elif self._match(TokenType.COMMA):
 7943            to = self._parse_types()
 7944        else:
 7945            to = None
 7946
 7947        return self.build_cast(strict=strict, this=this, to=to, safe=safe)
 7948
 7949    def _parse_xml_element(self) -> exp.XMLElement:
 7950        if self._match_text_seq("EVALNAME"):
 7951            evalname = True
 7952            this = self._parse_bitwise()
 7953        else:
 7954            evalname = None
 7955            self._match_text_seq("NAME")
 7956            this = self._parse_id_var()
 7957
 7958        return self.expression(
 7959            exp.XMLElement(
 7960                this=this,
 7961                expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_bitwise),
 7962                evalname=evalname,
 7963            )
 7964        )
 7965
 7966    def _parse_xml_table(self) -> exp.XMLTable:
 7967        namespaces = None
 7968        passing = None
 7969        columns = None
 7970
 7971        if self._match_text_seq("XMLNAMESPACES", "("):
 7972            namespaces = self._parse_xml_namespace()
 7973            self._match_text_seq(")", ",")
 7974
 7975        this = self._parse_string()
 7976
 7977        if self._match_text_seq("PASSING"):
 7978            # The BY VALUE keywords are optional and are provided for semantic clarity
 7979            self._match_text_seq("BY", "VALUE")
 7980            passing = self._parse_csv(self._parse_column)
 7981
 7982        by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF")
 7983
 7984        if self._match_text_seq("COLUMNS"):
 7985            columns = self._parse_csv(self._parse_field_def)
 7986
 7987        return self.expression(
 7988            exp.XMLTable(
 7989                this=this, namespaces=namespaces, passing=passing, columns=columns, by_ref=by_ref
 7990            )
 7991        )
 7992
 7993    def _parse_xml_namespace(self) -> list[exp.XMLNamespace]:
 7994        namespaces = []
 7995
 7996        while True:
 7997            if self._match(TokenType.DEFAULT):
 7998                uri = self._parse_string()
 7999            else:
 8000                uri = self._parse_alias(self._parse_string())
 8001            namespaces.append(self.expression(exp.XMLNamespace(this=uri)))
 8002            if not self._match(TokenType.COMMA):
 8003                break
 8004
 8005        return namespaces
 8006
 8007    def _parse_decode(self) -> exp.Decode | exp.DecodeCase | None:
 8008        args = self._parse_csv(self._parse_disjunction)
 8009
 8010        if len(args) < 3:
 8011            return self.expression(exp.Decode(this=seq_get(args, 0), charset=seq_get(args, 1)))
 8012
 8013        return self.expression(exp.DecodeCase(expressions=args))
 8014
 8015    def _parse_json_key_value(self) -> exp.JSONKeyValue | None:
 8016        self._match_text_seq("KEY")
 8017        key = self._parse_column()
 8018        self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS)
 8019        self._match_text_seq("VALUE")
 8020        value = self._parse_bitwise()
 8021
 8022        if not key and not value:
 8023            return None
 8024        return self.expression(exp.JSONKeyValue(this=key, expression=value))
 8025
 8026    def _parse_format_json(self, this: exp.Expr | None) -> exp.Expr | None:
 8027        if not this or not self._match_text_seq("FORMAT", "JSON"):
 8028            return this
 8029
 8030        return self.expression(exp.FormatJson(this=this))
 8031
 8032    def _parse_on_condition(self) -> exp.OnCondition | None:
 8033        # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS)
 8034        if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR:
 8035            empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS)
 8036            error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS)
 8037        else:
 8038            error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS)
 8039            empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS)
 8040
 8041        null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS)
 8042
 8043        if not empty and not error and not null:
 8044            return None
 8045
 8046        return self.expression(exp.OnCondition(empty=empty, error=error, null=null))
 8047
 8048    def _parse_on_handling(self, on: str, *values: str) -> str | None | exp.Expr | None:
 8049        # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL)
 8050        for value in values:
 8051            if self._match_text_seq(value, "ON", on):
 8052                return f"{value} ON {on}"
 8053
 8054        index = self._index
 8055        if self._match(TokenType.DEFAULT):
 8056            default_value = self._parse_bitwise()
 8057            if self._match_text_seq("ON", on):
 8058                return default_value
 8059
 8060            self._retreat(index)
 8061
 8062        return None
 8063
 8064    @t.overload
 8065    def _parse_json_object(self, agg: t.Literal[False]) -> exp.JSONObject: ...
 8066
 8067    @t.overload
 8068    def _parse_json_object(self, agg: t.Literal[True]) -> exp.JSONObjectAgg: ...
 8069
 8070    def _parse_json_object(self, agg=False):
 8071        star = self._parse_star()
 8072        expressions = (
 8073            [star]
 8074            if star
 8075            else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value()))
 8076        )
 8077        null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT")
 8078
 8079        unique_keys = None
 8080        if self._match_text_seq("WITH", "UNIQUE"):
 8081            unique_keys = True
 8082        elif self._match_text_seq("WITHOUT", "UNIQUE"):
 8083            unique_keys = False
 8084
 8085        self._match_text_seq("KEYS")
 8086
 8087        return_type = self._match_text_seq("RETURNING") and self._parse_format_json(
 8088            self._parse_type()
 8089        )
 8090        encoding = self._match_text_seq("ENCODING") and self._parse_var()
 8091
 8092        return self.expression(
 8093            (exp.JSONObjectAgg if agg else exp.JSONObject)(
 8094                expressions=expressions,
 8095                null_handling=null_handling,
 8096                unique_keys=unique_keys,
 8097                return_type=return_type,
 8098                encoding=encoding,
 8099            )
 8100        )
 8101
 8102    # Note: this is currently incomplete; it only implements the "JSON_value_column" part
 8103    def _parse_json_column_def(self) -> exp.JSONColumnDef:
 8104        if not self._match_text_seq("NESTED"):
 8105            this = self._parse_id_var()
 8106            ordinality = self._match_pair(TokenType.FOR, TokenType.ORDINALITY)
 8107            kind = self._parse_types(allow_identifiers=False)
 8108            nested = None
 8109        else:
 8110            this = None
 8111            ordinality = None
 8112            kind = None
 8113            nested = True
 8114
 8115        format_json = self._match_text_seq("FORMAT", "JSON")
 8116        path = self._match_text_seq("PATH") and self._parse_string()
 8117        nested_schema = nested and self._parse_json_schema()
 8118
 8119        return self.expression(
 8120            exp.JSONColumnDef(
 8121                this=this,
 8122                kind=kind,
 8123                path=path,
 8124                nested_schema=nested_schema,
 8125                ordinality=ordinality,
 8126                format_json=format_json,
 8127            )
 8128        )
 8129
 8130    def _parse_json_schema(self) -> exp.JSONSchema:
 8131        self._match_text_seq("COLUMNS")
 8132        return self.expression(
 8133            exp.JSONSchema(
 8134                expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True)
 8135            )
 8136        )
 8137
 8138    def _parse_json_table(self) -> exp.JSONTable:
 8139        this = self._parse_format_json(self._parse_bitwise())
 8140        path = self._match(TokenType.COMMA) and self._parse_string()
 8141        error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL")
 8142        empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL")
 8143        schema = self._parse_json_schema()
 8144
 8145        return exp.JSONTable(
 8146            this=this,
 8147            schema=schema,
 8148            path=path,
 8149            error_handling=error_handling,
 8150            empty_handling=empty_handling,
 8151        )
 8152
 8153    def _parse_match_against(self) -> exp.MatchAgainst:
 8154        if self._match_text_seq("TABLE"):
 8155            # parse SingleStore MATCH(TABLE ...) syntax
 8156            # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/
 8157            expressions = []
 8158            table = self._parse_table()
 8159            if table:
 8160                expressions = [table]
 8161        else:
 8162            expressions = self._parse_csv(self._parse_column)
 8163
 8164        self._match_text_seq(")", "AGAINST", "(")
 8165
 8166        this = self._parse_string()
 8167
 8168        if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"):
 8169            modifier = "IN NATURAL LANGUAGE MODE"
 8170            if self._match_text_seq("WITH", "QUERY", "EXPANSION"):
 8171                modifier = f"{modifier} WITH QUERY EXPANSION"
 8172        elif self._match_text_seq("IN", "BOOLEAN", "MODE"):
 8173            modifier = "IN BOOLEAN MODE"
 8174        elif self._match_text_seq("WITH", "QUERY", "EXPANSION"):
 8175            modifier = "WITH QUERY EXPANSION"
 8176        else:
 8177            modifier = None
 8178
 8179        return self.expression(
 8180            exp.MatchAgainst(this=this, expressions=expressions, modifier=modifier)
 8181        )
 8182
 8183    # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16
 8184    def _parse_open_json(self) -> exp.OpenJSON:
 8185        this = self._parse_bitwise()
 8186        path = self._match(TokenType.COMMA) and self._parse_string()
 8187
 8188        def _parse_open_json_column_def() -> exp.OpenJSONColumnDef:
 8189            this = self._parse_field(any_token=True)
 8190            kind = self._parse_types()
 8191            path = self._parse_string()
 8192            as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON)
 8193
 8194            return self.expression(
 8195                exp.OpenJSONColumnDef(this=this, kind=kind, path=path, as_json=as_json)
 8196            )
 8197
 8198        expressions = None
 8199        if self._match_pair(TokenType.R_PAREN, TokenType.WITH):
 8200            self._match_l_paren()
 8201            expressions = self._parse_csv(_parse_open_json_column_def)
 8202
 8203        return self.expression(exp.OpenJSON(this=this, path=path, expressions=expressions))
 8204
 8205    def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition:
 8206        args = self._parse_csv(self._parse_bitwise)
 8207
 8208        if self._match(TokenType.IN):
 8209            return self.expression(
 8210                exp.StrPosition(this=self._parse_bitwise(), substr=seq_get(args, 0))
 8211            )
 8212
 8213        if haystack_first:
 8214            haystack = seq_get(args, 0)
 8215            needle = seq_get(args, 1)
 8216        else:
 8217            haystack = seq_get(args, 1)
 8218            needle = seq_get(args, 0)
 8219
 8220        return self.expression(
 8221            exp.StrPosition(this=haystack, substr=needle, position=seq_get(args, 2))
 8222        )
 8223
 8224    def _parse_join_hint(self, func_name: str) -> exp.JoinHint:
 8225        args = self._parse_csv(self._parse_table)
 8226        return exp.JoinHint(this=func_name.upper(), expressions=args)
 8227
 8228    def _parse_substring(self) -> exp.Substring:
 8229        # Postgres supports the form: substring(string [from int] [for int])
 8230        # (despite being undocumented, the reverse order also works)
 8231        # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6
 8232
 8233        args = t.cast(list[t.Optional[exp.Expr]], self._parse_csv(self._parse_bitwise))
 8234
 8235        start, length = None, None
 8236
 8237        while self._curr:
 8238            if self._match(TokenType.FROM):
 8239                start = self._parse_bitwise()
 8240            elif self._match(TokenType.FOR):
 8241                if not start:
 8242                    start = exp.Literal.number(1)
 8243                length = self._parse_bitwise()
 8244            else:
 8245                break
 8246
 8247        if start:
 8248            args.append(start)
 8249        if length:
 8250            args.append(length)
 8251
 8252        return self.validate_expression(exp.Substring.from_arg_list(args), args)
 8253
 8254    def _parse_trim(self) -> exp.Trim:
 8255        # https://www.w3resource.com/sql/character-functions/trim.php
 8256        # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html
 8257
 8258        position = None
 8259        collation = None
 8260        expression = None
 8261
 8262        if self._match_texts(self.TRIM_TYPES):
 8263            position = self._prev.text.upper()
 8264
 8265        this = self._parse_bitwise()
 8266        if self._match_set((TokenType.FROM, TokenType.COMMA)):
 8267            invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST
 8268            expression = self._parse_bitwise()
 8269
 8270            if invert_order:
 8271                this, expression = expression, this
 8272
 8273        if self._match(TokenType.COLLATE):
 8274            collation = self._parse_bitwise()
 8275
 8276        return self.expression(
 8277            exp.Trim(this=this, position=position, expression=expression, collation=collation)
 8278        )
 8279
 8280    def _parse_window_clause(self) -> list[exp.Expr] | None:
 8281        return self._parse_csv(self._parse_named_window) if self._match(TokenType.WINDOW) else None
 8282
 8283    def _parse_named_window(self) -> exp.Expr | None:
 8284        return self._parse_window(self._parse_id_var(), alias=True)
 8285
 8286    def _parse_respect_or_ignore_nulls(self, this: exp.Expr | None) -> exp.Expr | None:
 8287        if self._curr.token_type == TokenType.VAR:
 8288            if self._match_text_seq("IGNORE", "NULLS"):
 8289                return self.expression(exp.IgnoreNulls(this=this))
 8290            if self._match_text_seq("RESPECT", "NULLS"):
 8291                return self.expression(exp.RespectNulls(this=this))
 8292        return this
 8293
 8294    def _parse_having_max(self, this: exp.Expr | None) -> exp.Expr | None:
 8295        if self._match(TokenType.HAVING):
 8296            self._match_texts(("MAX", "MIN"))
 8297            max = self._prev.text.upper() != "MIN"
 8298            return self.expression(
 8299                exp.HavingMax(this=this, expression=self._parse_column(), max=max)
 8300            )
 8301
 8302        return this
 8303
 8304    def _parse_window(self, this: exp.Expr | None, alias: bool = False) -> exp.Expr | None:
 8305        func = this
 8306        comments = func.comments if isinstance(func, exp.Expr) else None
 8307
 8308        # T-SQL allows the OVER (...) syntax after WITHIN GROUP.
 8309        # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16
 8310        if self._match_text_seq("WITHIN", "GROUP"):
 8311            order = self._parse_wrapped(self._parse_order)
 8312            this = self.expression(exp.WithinGroup(this=this, expression=order))
 8313
 8314        if self._match_pair(TokenType.FILTER, TokenType.L_PAREN):
 8315            self._match(TokenType.WHERE)
 8316            this = self.expression(
 8317                exp.Filter(this=this, expression=self._parse_where(skip_where_token=True))
 8318            )
 8319            self._match_r_paren()
 8320
 8321        # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER
 8322        # Some dialects choose to implement and some do not.
 8323        # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html
 8324
 8325        # There is some code above in _parse_lambda that handles
 8326        #   SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ...
 8327
 8328        # The below changes handle
 8329        #   SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ...
 8330
 8331        # Oracle allows both formats
 8332        #   (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html)
 8333        #   and Snowflake chose to do the same for familiarity
 8334        #   https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes
 8335        if isinstance(this, exp.AggFunc):
 8336            ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls)
 8337
 8338            if ignore_respect and ignore_respect is not this:
 8339                ignore_respect.replace(ignore_respect.this)
 8340                this = self.expression(ignore_respect.__class__(this=this))
 8341
 8342        this = self._parse_respect_or_ignore_nulls(this)
 8343
 8344        # bigquery select from window x AS (partition by ...)
 8345        if alias:
 8346            over = None
 8347            self._match(TokenType.ALIAS)
 8348        elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS):
 8349            return this
 8350        else:
 8351            over = self._prev.text.upper()
 8352
 8353        if comments and isinstance(func, exp.Expr):
 8354            func.pop_comments()
 8355
 8356        if not self._match(TokenType.L_PAREN):
 8357            return self.expression(
 8358                exp.Window(this=this, alias=self._parse_id_var(False), over=over), comments=comments
 8359            )
 8360
 8361        window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS)
 8362
 8363        first: bool | None = True if self._match(TokenType.FIRST) else None
 8364        if self._match_text_seq("LAST"):
 8365            first = False
 8366
 8367        partition, order = self._parse_partition_and_order()
 8368        kind = (
 8369            self._match_set((TokenType.ROWS, TokenType.RANGE)) or self._match_text_seq("GROUPS")
 8370        ) and self._prev.text
 8371
 8372        if kind:
 8373            self._match(TokenType.BETWEEN)
 8374            start = self._parse_window_spec()
 8375
 8376            end = self._parse_window_spec() if self._match(TokenType.AND) else {}
 8377            exclude = (
 8378                self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS)
 8379                if self._match_text_seq("EXCLUDE")
 8380                else None
 8381            )
 8382
 8383            spec = self.expression(
 8384                exp.WindowSpec(
 8385                    kind=kind,
 8386                    start=start["value"],
 8387                    start_side=start["side"],
 8388                    end=end.get("value"),
 8389                    end_side=end.get("side"),
 8390                    exclude=exclude,
 8391                )
 8392            )
 8393        else:
 8394            spec = None
 8395
 8396        self._match_r_paren()
 8397
 8398        window = self.expression(
 8399            exp.Window(
 8400                this=this,
 8401                partition_by=partition,
 8402                order=order,
 8403                spec=spec,
 8404                alias=window_alias,
 8405                over=over,
 8406                first=first,
 8407            ),
 8408            comments=comments,
 8409        )
 8410
 8411        # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...)
 8412        if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False):
 8413            return self._parse_window(window, alias=alias)
 8414
 8415        return window
 8416
 8417    def _parse_partition_and_order(
 8418        self,
 8419    ) -> tuple[list[exp.Expr], exp.Expr | None]:
 8420        return self._parse_partition_by(), self._parse_order()
 8421
 8422    def _parse_window_spec(self) -> dict[str, str | exp.Expr | None]:
 8423        self._match(TokenType.BETWEEN)
 8424
 8425        return {
 8426            "value": (
 8427                (self._match_text_seq("UNBOUNDED") and "UNBOUNDED")
 8428                or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW")
 8429                or self._parse_bitwise()
 8430            ),
 8431            "side": self._prev.text if self._match_texts(self.WINDOW_SIDES) else None,
 8432        }
 8433
 8434    def _parse_alias(self, this: exp.Expr | None, explicit: bool = False) -> exp.Expr | None:
 8435        # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses)
 8436        # so this section tries to parse the clause version and if it fails, it treats the token
 8437        # as an identifier (alias)
 8438        if self._can_parse_limit_or_offset():
 8439            return this
 8440
 8441        # WINDOW is in ID_VAR_TOKENS, so it can be consumed as an implicit alias. Detect the
 8442        # named-window clause shape (`WINDOW <ident> AS (...)`) and avoid swallowing it.
 8443        if self._can_parse_named_window():
 8444            return this
 8445
 8446        any_token = self._match(TokenType.ALIAS)
 8447        comments = self._prev_comments
 8448
 8449        if explicit and not any_token:
 8450            return this
 8451
 8452        if self._match(TokenType.L_PAREN):
 8453            aliases = self.expression(
 8454                exp.Aliases(
 8455                    this=this, expressions=self._parse_csv(lambda: self._parse_id_var(any_token))
 8456                ),
 8457                comments=comments,
 8458            )
 8459            self._match_r_paren(aliases)
 8460            return aliases
 8461
 8462        alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or (
 8463            self.STRING_ALIASES and self._parse_string_as_identifier()
 8464        )
 8465
 8466        if alias:
 8467            comments.extend(alias.pop_comments())
 8468            this = self.expression(exp.Alias(this=this, alias=alias), comments=comments)
 8469            column = this.this
 8470
 8471            # Moves the comment next to the alias in `expr /* comment */ AS alias`
 8472            if not this.comments and column and column.comments:
 8473                this.comments = column.pop_comments()
 8474
 8475        return this
 8476
 8477    def _parse_id_var(
 8478        self,
 8479        any_token: bool = True,
 8480        tokens: t.Collection[TokenType] | None = None,
 8481    ) -> exp.Expr | None:
 8482        expression = self._parse_identifier()
 8483        if not expression and (
 8484            (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS)
 8485        ):
 8486            quoted = self._prev.token_type == TokenType.STRING
 8487            expression = self._identifier_expression(quoted=quoted)
 8488
 8489        return expression
 8490
 8491    def _parse_string(self) -> exp.Expr | None:
 8492        if self._match_set(self.STRING_PARSERS):
 8493            return self.STRING_PARSERS[self._prev.token_type](self, self._prev)
 8494        return self._parse_placeholder()
 8495
 8496    def _parse_string_as_identifier(self) -> exp.Identifier | None:
 8497        if not self._match(TokenType.STRING):
 8498            return None
 8499        output = exp.to_identifier(self._prev.text, quoted=True)
 8500        output.update_positions(self._prev)
 8501        return output
 8502
 8503    def _parse_number(self) -> exp.Expr | None:
 8504        if self._match_set(self.NUMERIC_PARSERS):
 8505            return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev)
 8506        return self._parse_placeholder()
 8507
 8508    def _parse_identifier(self) -> exp.Expr | None:
 8509        if self._match(TokenType.IDENTIFIER):
 8510            return self._identifier_expression(quoted=True)
 8511        return self._parse_placeholder()
 8512
 8513    def _parse_var(
 8514        self,
 8515        any_token: bool = False,
 8516        tokens: t.Collection[TokenType] | None = None,
 8517        upper: bool = False,
 8518    ) -> exp.Expr | None:
 8519        if (
 8520            (any_token and self._advance_any())
 8521            or self._match(TokenType.VAR)
 8522            or (self._match_set(tokens) if tokens else False)
 8523        ):
 8524            return self.expression(
 8525                exp.Var(this=self._prev.text.upper() if upper else self._prev.text)
 8526            )
 8527        return self._parse_placeholder()
 8528
 8529    def _advance_any(self, ignore_reserved: bool = False) -> Token | None:
 8530        if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS):
 8531            self._advance()
 8532            return self._prev
 8533        return None
 8534
 8535    def _parse_var_or_string(self, upper: bool = False) -> exp.Expr | None:
 8536        return self._parse_string() or self._parse_var(any_token=True, upper=upper)
 8537
 8538    def _parse_primary_or_var(self) -> exp.Expr | None:
 8539        return self._parse_primary() or self._parse_var(any_token=True)
 8540
 8541    def _parse_null(self) -> exp.Expr | None:
 8542        if self._match_set((TokenType.NULL, TokenType.UNKNOWN)):
 8543            return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev)
 8544        return self._parse_placeholder()
 8545
 8546    def _parse_boolean(self) -> exp.Expr | None:
 8547        if self._match(TokenType.TRUE):
 8548            return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev)
 8549        if self._match(TokenType.FALSE):
 8550            return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev)
 8551        return self._parse_placeholder()
 8552
 8553    def _parse_star(self) -> exp.Expr | None:
 8554        if self._match(TokenType.STAR):
 8555            return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev)
 8556        return self._parse_placeholder()
 8557
 8558    def _parse_parameter(self) -> exp.Parameter:
 8559        this = self._parse_identifier() or self._parse_primary_or_var()
 8560        return self.expression(exp.Parameter(this=this))
 8561
 8562    def _parse_placeholder(self) -> exp.Expr | None:
 8563        if self._match_set(self.PLACEHOLDER_PARSERS):
 8564            placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self)
 8565            if placeholder:
 8566                return placeholder
 8567            self._advance(-1)
 8568        return None
 8569
 8570    def _parse_star_op(self, *keywords: str) -> list[exp.Expr] | None:
 8571        if not self._match_texts(keywords):
 8572            return None
 8573        if self._match(TokenType.L_PAREN, advance=False):
 8574            return self._parse_wrapped_csv(self._parse_expression)
 8575
 8576        expression = self._parse_alias(self._parse_disjunction(), explicit=True)
 8577        return [expression] if expression else None
 8578
 8579    def _parse_csv(
 8580        self, parse_method: t.Callable[[], T | None], sep: TokenType = TokenType.COMMA
 8581    ) -> list[T]:
 8582        parse_result = parse_method()
 8583        items = [parse_result] if parse_result is not None else []
 8584
 8585        while self._match(sep):
 8586            if isinstance(parse_result, exp.Expr):
 8587                self._add_comments(parse_result)
 8588            parse_result = parse_method()
 8589            if parse_result is not None:
 8590                items.append(parse_result)
 8591
 8592        return items
 8593
 8594    def _parse_wrapped_id_vars(self, optional: bool = False) -> list[exp.Expr]:
 8595        return self._parse_wrapped_csv(self._parse_id_var, optional=optional)
 8596
 8597    def _parse_wrapped_csv(
 8598        self,
 8599        parse_method: t.Callable[[], T | None],
 8600        sep: TokenType = TokenType.COMMA,
 8601        optional: bool = False,
 8602    ) -> list[T]:
 8603        return self._parse_wrapped(
 8604            lambda: self._parse_csv(parse_method, sep=sep), optional=optional
 8605        )
 8606
 8607    def _parse_wrapped(self, parse_method: t.Callable[[], T], optional: bool = False) -> T:
 8608        wrapped = self._match(TokenType.L_PAREN)
 8609        if not wrapped and not optional:
 8610            self.raise_error("Expecting (")
 8611        parse_result = parse_method()
 8612        if wrapped:
 8613            self._match_r_paren()
 8614        return parse_result
 8615
 8616    def _parse_expressions(self) -> list[exp.Expr]:
 8617        return self._parse_csv(self._parse_expression)
 8618
 8619    def _parse_select_or_expression(self, alias: bool = False) -> exp.Expr | None:
 8620        return (
 8621            self._parse_set_operations(
 8622                self._parse_alias(self._parse_assignment(), explicit=True)
 8623                if alias
 8624                else self._parse_assignment()
 8625            )
 8626            or self._parse_select()
 8627        )
 8628
 8629    def _parse_ddl_select(self) -> exp.Expr | None:
 8630        return self._parse_query_modifiers(
 8631            self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False))
 8632        )
 8633
 8634    def _parse_transaction(self) -> exp.Transaction | exp.Command:
 8635        this = None
 8636        if self._match_texts(self.TRANSACTION_KIND):
 8637            this = self._prev.text
 8638
 8639        self._match_texts(("TRANSACTION", "WORK"))
 8640
 8641        modes = []
 8642        while True:
 8643            mode = []
 8644            while self._match(TokenType.VAR) or self._match(TokenType.NOT):
 8645                mode.append(self._prev.text)
 8646
 8647            if mode:
 8648                modes.append(" ".join(mode))
 8649            if not self._match(TokenType.COMMA):
 8650                break
 8651
 8652        return self.expression(exp.Transaction(this=this, modes=modes))
 8653
 8654    def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback:
 8655        chain = None
 8656        savepoint = None
 8657        is_rollback = self._prev.token_type == TokenType.ROLLBACK
 8658
 8659        self._match_texts(("TRANSACTION", "WORK"))
 8660
 8661        if self._match_text_seq("TO"):
 8662            self._match_text_seq("SAVEPOINT")
 8663            savepoint = self._parse_id_var()
 8664
 8665        if self._match(TokenType.AND):
 8666            chain = not self._match_text_seq("NO")
 8667            self._match_text_seq("CHAIN")
 8668
 8669        if is_rollback:
 8670            return self.expression(exp.Rollback(savepoint=savepoint))
 8671
 8672        return self.expression(exp.Commit(chain=chain))
 8673
 8674    def _parse_refresh(self) -> exp.Refresh | exp.Command:
 8675        if self._match(TokenType.TABLE):
 8676            kind = "TABLE"
 8677        elif self._match_text_seq("MATERIALIZED", "VIEW"):
 8678            kind = "MATERIALIZED VIEW"
 8679        else:
 8680            kind = ""
 8681
 8682        this = self._parse_string() or self._parse_table()
 8683        if not kind and not isinstance(this, exp.Literal):
 8684            return self._parse_as_command(self._prev)
 8685
 8686        return self.expression(exp.Refresh(this=this, kind=kind))
 8687
 8688    def _parse_column_def_with_exists(self):
 8689        start = self._index
 8690        self._match(TokenType.COLUMN)
 8691
 8692        exists_column = self._parse_exists(not_=True)
 8693        expression = self._parse_field_def()
 8694
 8695        if not isinstance(expression, exp.ColumnDef):
 8696            self._retreat(start)
 8697            return None
 8698
 8699        expression.set("exists", exists_column)
 8700
 8701        return expression
 8702
 8703    def _parse_add_column(self) -> exp.ColumnDef | None:
 8704        if not self._prev.text.upper() == "ADD":
 8705            return None
 8706
 8707        return self._parse_column_def_with_exists()
 8708
 8709    def _parse_drop_column(self) -> exp.Drop | exp.Command | None:
 8710        drop = self._parse_drop() if self._match(TokenType.DROP) else None
 8711        if drop and not isinstance(drop, exp.Command):
 8712            drop.set("kind", drop.args.get("kind", "COLUMN"))
 8713        return drop
 8714
 8715    def _parse_alter_drop_action(self) -> exp.Expr | None:
 8716        return self._parse_drop_column()
 8717
 8718    # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html
 8719    def _parse_drop_partition(self, exists: bool | None = None) -> exp.DropPartition:
 8720        return self.expression(
 8721            exp.DropPartition(expressions=self._parse_csv(self._parse_partition), exists=exists)
 8722        )
 8723
 8724    def _parse_alter_table_add(self) -> list[exp.Expr]:
 8725        def _parse_add_alteration() -> exp.Expr | None:
 8726            self._match_text_seq("ADD")
 8727            if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False):
 8728                return self.expression(
 8729                    exp.AddConstraint(expressions=self._parse_csv(self._parse_constraint))
 8730                )
 8731
 8732            column_def = self._parse_add_column()
 8733            if isinstance(column_def, exp.ColumnDef):
 8734                return column_def
 8735
 8736            exists = self._parse_exists(not_=True)
 8737            if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False):
 8738                return self.expression(
 8739                    exp.AddPartition(
 8740                        exists=exists,
 8741                        this=self._parse_field(any_token=True),
 8742                        location=self._match_text_seq("LOCATION", advance=False)
 8743                        and self._parse_property(),
 8744                    )
 8745                )
 8746
 8747            return None
 8748
 8749        if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and (
 8750            not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
 8751            or self._match_text_seq("COLUMNS")
 8752        ):
 8753            schema = self._parse_schema()
 8754
 8755            return (
 8756                ensure_list(schema)
 8757                if schema
 8758                else self._parse_csv(self._parse_column_def_with_exists)
 8759            )
 8760
 8761        return self._parse_csv(_parse_add_alteration)
 8762
 8763    def _parse_alter_table_alter(self) -> exp.Expr | None:
 8764        if self._match_texts(self.ALTER_ALTER_PARSERS):
 8765            return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self)
 8766
 8767        # Many dialects support the ALTER [COLUMN] syntax, so if there is no
 8768        # keyword after ALTER we default to parsing this statement
 8769        self._match(TokenType.COLUMN)
 8770        column = self._parse_field(any_token=True)
 8771
 8772        if self._match_pair(TokenType.DROP, TokenType.DEFAULT):
 8773            return self.expression(exp.AlterColumn(this=column, drop=True))
 8774        if self._match_pair(TokenType.SET, TokenType.DEFAULT):
 8775            return self.expression(exp.AlterColumn(this=column, default=self._parse_disjunction()))
 8776        if self._match(TokenType.COMMENT):
 8777            return self.expression(exp.AlterColumn(this=column, comment=self._parse_string()))
 8778        if self._match_text_seq("DROP", "NOT", "NULL"):
 8779            return self.expression(exp.AlterColumn(this=column, drop=True, allow_null=True))
 8780        if self._match_text_seq("SET", "NOT", "NULL"):
 8781            return self.expression(exp.AlterColumn(this=column, allow_null=False))
 8782
 8783        if self._match_text_seq("SET", "VISIBLE"):
 8784            return self.expression(exp.AlterColumn(this=column, visible="VISIBLE"))
 8785        if self._match_text_seq("SET", "INVISIBLE"):
 8786            return self.expression(exp.AlterColumn(this=column, visible="INVISIBLE"))
 8787
 8788        self._match_text_seq("SET", "DATA")
 8789        self._match_text_seq("TYPE")
 8790        return self.expression(
 8791            exp.AlterColumn(
 8792                this=column,
 8793                dtype=self._parse_types(),
 8794                collate=self._match(TokenType.COLLATE) and self._parse_term(),
 8795                using=self._match(TokenType.USING) and self._parse_disjunction(),
 8796            )
 8797        )
 8798
 8799    def _parse_alter_diststyle(self) -> exp.AlterDistStyle:
 8800        if self._match_texts(("ALL", "EVEN", "AUTO")):
 8801            return self.expression(exp.AlterDistStyle(this=exp.var(self._prev.text.upper())))
 8802
 8803        self._match_text_seq("KEY", "DISTKEY")
 8804        return self.expression(exp.AlterDistStyle(this=self._parse_column()))
 8805
 8806    def _parse_alter_sortkey(self, compound: bool | None = None) -> exp.AlterSortKey:
 8807        if compound:
 8808            self._match_text_seq("SORTKEY")
 8809
 8810        if self._match(TokenType.L_PAREN, advance=False):
 8811            return self.expression(
 8812                exp.AlterSortKey(expressions=self._parse_wrapped_id_vars(), compound=compound)
 8813            )
 8814
 8815        self._match_texts(("AUTO", "NONE"))
 8816        return self.expression(
 8817            exp.AlterSortKey(this=exp.var(self._prev.text.upper()), compound=compound)
 8818        )
 8819
 8820    def _parse_alter_table_drop(self) -> list[exp.Expr]:
 8821        index = self._index - 1
 8822
 8823        partition_exists = self._parse_exists()
 8824        if self._match(TokenType.PARTITION, advance=False):
 8825            return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists))
 8826
 8827        self._retreat(index)
 8828        return self._parse_csv(self._parse_alter_drop_action)
 8829
 8830    def _parse_alter_table_rename(self) -> exp.AlterRename | exp.RenameColumn | None:
 8831        if self._match(TokenType.COLUMN) or (
 8832            not self.ALTER_RENAME_REQUIRES_COLUMN and not self._match_text_seq("TO", advance=False)
 8833        ):
 8834            exists = self._parse_exists()
 8835            old_column = self._parse_column()
 8836            to = self._match_text_seq("TO")
 8837            new_column = self._parse_column()
 8838
 8839            if old_column is None or not to or new_column is None:
 8840                return None
 8841
 8842            return self.expression(exp.RenameColumn(this=old_column, to=new_column, exists=exists))
 8843
 8844        self._match_text_seq("TO")
 8845        return self.expression(exp.AlterRename(this=self._parse_table(schema=True)))
 8846
 8847    def _parse_alter_table_set(self) -> exp.AlterSet:
 8848        alter_set = self.expression(exp.AlterSet())
 8849
 8850        if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq(
 8851            "TABLE", "PROPERTIES"
 8852        ):
 8853            alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment))
 8854        elif self._match_text_seq("FILESTREAM_ON", advance=False):
 8855            alter_set.set("expressions", [self._parse_assignment()])
 8856        elif self._match_texts(("LOGGED", "UNLOGGED")):
 8857            alter_set.set("option", exp.var(self._prev.text.upper()))
 8858        elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")):
 8859            alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}"))
 8860        elif self._match_text_seq("LOCATION"):
 8861            alter_set.set("location", self._parse_field())
 8862        elif self._match_text_seq("ACCESS", "METHOD"):
 8863            alter_set.set("access_method", self._parse_field())
 8864        elif self._match_text_seq("TABLESPACE"):
 8865            alter_set.set("tablespace", self._parse_field())
 8866        elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"):
 8867            alter_set.set("file_format", [self._parse_field()])
 8868        elif self._match_text_seq("STAGE_FILE_FORMAT"):
 8869            alter_set.set("file_format", self._parse_wrapped_options())
 8870        elif self._match_text_seq("STAGE_COPY_OPTIONS"):
 8871            alter_set.set("copy_options", self._parse_wrapped_options())
 8872        elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"):
 8873            alter_set.set("tag", self._parse_csv(self._parse_assignment))
 8874        else:
 8875            if self._match_text_seq("SERDE"):
 8876                alter_set.set("serde", self._parse_field())
 8877
 8878            properties = self._parse_wrapped(self._parse_properties, optional=True)
 8879            alter_set.set("expressions", [properties])
 8880
 8881        return alter_set
 8882
 8883    def _parse_alter_session(self) -> exp.AlterSession:
 8884        """Parse ALTER SESSION SET/UNSET statements."""
 8885        if self._match(TokenType.SET):
 8886            expressions = self._parse_csv(lambda: self._parse_set_item_assignment())
 8887            return self.expression(exp.AlterSession(expressions=expressions, unset=False))
 8888
 8889        self._match_text_seq("UNSET")
 8890        expressions = self._parse_csv(
 8891            lambda: self.expression(exp.SetItem(this=self._parse_id_var(any_token=True)))
 8892        )
 8893        return self.expression(exp.AlterSession(expressions=expressions, unset=True))
 8894
 8895    def _parse_alter(self) -> exp.Alter | exp.Command:
 8896        start = self._prev
 8897
 8898        iceberg = self._match_text_seq("ICEBERG")
 8899
 8900        alter_token = self._match_set(self.ALTERABLES) and self._prev
 8901        if not alter_token:
 8902            return self._parse_as_command(start)
 8903        if iceberg and alter_token.token_type != TokenType.TABLE:
 8904            return self._parse_as_command(start)
 8905
 8906        exists = self._parse_exists()
 8907        only = self._match_text_seq("ONLY")
 8908
 8909        if alter_token.token_type == TokenType.SESSION:
 8910            this = None
 8911            check = None
 8912            cluster = None
 8913        else:
 8914            this = self._parse_table(schema=True, parse_partition=self.ALTER_TABLE_PARTITIONS)
 8915            check = self._match_text_seq("WITH", "CHECK")
 8916            cluster = self._parse_on_property() if self._match(TokenType.ON) else None
 8917
 8918            if self._next:
 8919                self._advance()
 8920
 8921        parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None
 8922        if parser:
 8923            actions = ensure_list(parser(self))
 8924            not_valid = self._match_text_seq("NOT", "VALID")
 8925            options = self._parse_csv(self._parse_property)
 8926            cascade = self.dialect.ALTER_TABLE_SUPPORTS_CASCADE and self._match_text_seq("CASCADE")
 8927
 8928            if not self._curr and actions:
 8929                return self.expression(
 8930                    exp.Alter(
 8931                        this=this,
 8932                        kind=alter_token.text.upper(),
 8933                        exists=exists,
 8934                        actions=actions,
 8935                        only=only,
 8936                        options=options,
 8937                        cluster=cluster,
 8938                        not_valid=not_valid,
 8939                        check=check,
 8940                        cascade=cascade,
 8941                        iceberg=iceberg,
 8942                    )
 8943                )
 8944
 8945        return self._parse_as_command(start)
 8946
 8947    def _parse_analyze(self) -> exp.Analyze | exp.Command:
 8948        start = self._prev
 8949        # https://duckdb.org/docs/sql/statements/analyze
 8950        if not self._curr:
 8951            return self.expression(exp.Analyze())
 8952
 8953        options = []
 8954        while self._match_texts(self.ANALYZE_STYLES):
 8955            if self._prev.text.upper() == "BUFFER_USAGE_LIMIT":
 8956                options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}")
 8957            else:
 8958                options.append(self._prev.text.upper())
 8959
 8960        this: exp.Expr | None = None
 8961        inner_expression: exp.Expr | None = None
 8962
 8963        kind = self._curr.text.upper() if self._curr else None
 8964
 8965        if self._match(TokenType.TABLE) or self._match(TokenType.INDEX):
 8966            this = self._parse_table_parts()
 8967        elif self._match_text_seq("TABLES"):
 8968            if self._match_set((TokenType.FROM, TokenType.IN)):
 8969                kind = f"{kind} {self._prev.text.upper()}"
 8970                this = self._parse_table(schema=True, is_db_reference=True)
 8971        elif self._match_text_seq("DATABASE"):
 8972            this = self._parse_table(schema=True, is_db_reference=True)
 8973        elif self._match_text_seq("CLUSTER"):
 8974            this = self._parse_table()
 8975        # Try matching inner expr keywords before fallback to parse table.
 8976        elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
 8977            kind = None
 8978            inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
 8979        else:
 8980            # Empty kind  https://prestodb.io/docs/current/sql/analyze.html
 8981            kind = None
 8982            this = self._parse_table_parts()
 8983
 8984        partition = self._try_parse(self._parse_partition)
 8985        if not partition and self._match_texts(self.PARTITION_KEYWORDS):
 8986            return self._parse_as_command(start)
 8987
 8988        # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/
 8989        if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq(
 8990            "WITH", "ASYNC", "MODE"
 8991        ):
 8992            mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE"
 8993        else:
 8994            mode = None
 8995
 8996        if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS):
 8997            inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self)
 8998
 8999        properties = self._parse_properties()
 9000        return self.expression(
 9001            exp.Analyze(
 9002                kind=kind,
 9003                this=this,
 9004                mode=mode,
 9005                partition=partition,
 9006                properties=properties,
 9007                expression=inner_expression,
 9008                options=options,
 9009            )
 9010        )
 9011
 9012    # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html
 9013    def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics:
 9014        this = None
 9015        kind = self._prev.text.upper()
 9016        option = self._prev.text.upper() if self._match_text_seq("DELTA") else None
 9017        expressions = []
 9018
 9019        if not self._match_text_seq("STATISTICS"):
 9020            self.raise_error("Expecting token STATISTICS")
 9021
 9022        if self._match_text_seq("NOSCAN"):
 9023            this = "NOSCAN"
 9024        elif self._match(TokenType.FOR):
 9025            if self._match_text_seq("ALL", "COLUMNS"):
 9026                this = "FOR ALL COLUMNS"
 9027            if self._match_texts("COLUMNS"):
 9028                this = "FOR COLUMNS"
 9029                expressions = self._parse_csv(self._parse_column_reference)
 9030        elif self._match_text_seq("SAMPLE"):
 9031            sample = self._parse_number()
 9032            expressions = [
 9033                self.expression(
 9034                    exp.AnalyzeSample(
 9035                        sample=sample,
 9036                        kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None,
 9037                    )
 9038                )
 9039            ]
 9040
 9041        return self.expression(
 9042            exp.AnalyzeStatistics(kind=kind, option=option, this=this, expressions=expressions)
 9043        )
 9044
 9045    # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html
 9046    def _parse_analyze_validate(self) -> exp.AnalyzeValidate:
 9047        kind = None
 9048        this = None
 9049        expression: exp.Expr | None = None
 9050        if self._match_text_seq("REF", "UPDATE"):
 9051            kind = "REF"
 9052            this = "UPDATE"
 9053            if self._match_text_seq("SET", "DANGLING", "TO", "NULL"):
 9054                this = "UPDATE SET DANGLING TO NULL"
 9055        elif self._match_text_seq("STRUCTURE"):
 9056            kind = "STRUCTURE"
 9057            if self._match_text_seq("CASCADE", "FAST"):
 9058                this = "CASCADE FAST"
 9059            elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts(
 9060                ("ONLINE", "OFFLINE")
 9061            ):
 9062                this = f"CASCADE COMPLETE {self._prev.text.upper()}"
 9063                expression = self._parse_into()
 9064
 9065        return self.expression(exp.AnalyzeValidate(kind=kind, this=this, expression=expression))
 9066
 9067    def _parse_analyze_columns(self) -> exp.AnalyzeColumns | None:
 9068        this = self._prev.text.upper()
 9069        if self._match_text_seq("COLUMNS"):
 9070            return self.expression(exp.AnalyzeColumns(this=f"{this} {self._prev.text.upper()}"))
 9071        return None
 9072
 9073    def _parse_analyze_delete(self) -> exp.AnalyzeDelete | None:
 9074        kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None
 9075        if self._match_text_seq("STATISTICS"):
 9076            return self.expression(exp.AnalyzeDelete(kind=kind))
 9077        return None
 9078
 9079    def _parse_analyze_list(self) -> exp.AnalyzeListChainedRows | None:
 9080        if self._match_text_seq("CHAINED", "ROWS"):
 9081            return self.expression(exp.AnalyzeListChainedRows(expression=self._parse_into()))
 9082        return None
 9083
 9084    # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html
 9085    def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram:
 9086        this = self._prev.text.upper()
 9087        expression: exp.Expr | None = None
 9088        expressions = []
 9089        update_options = None
 9090
 9091        if self._match_text_seq("HISTOGRAM", "ON"):
 9092            expressions = self._parse_csv(self._parse_column_reference)
 9093            with_expressions = []
 9094            while self._match(TokenType.WITH):
 9095                # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/
 9096                if self._match_texts(("SYNC", "ASYNC")):
 9097                    if self._match_text_seq("MODE", advance=False):
 9098                        with_expressions.append(f"{self._prev.text.upper()} MODE")
 9099                        self._advance()
 9100                else:
 9101                    buckets = self._parse_number()
 9102                    if self._match_text_seq("BUCKETS"):
 9103                        with_expressions.append(f"{buckets} BUCKETS")
 9104            if with_expressions:
 9105                expression = self.expression(exp.AnalyzeWith(expressions=with_expressions))
 9106
 9107            if self._match_texts(("MANUAL", "AUTO")) and self._match(
 9108                TokenType.UPDATE, advance=False
 9109            ):
 9110                update_options = self._prev.text.upper()
 9111                self._advance()
 9112            elif self._match_text_seq("USING", "DATA"):
 9113                expression = self.expression(exp.UsingData(this=self._parse_string()))
 9114
 9115        return self.expression(
 9116            exp.AnalyzeHistogram(
 9117                this=this,
 9118                expressions=expressions,
 9119                expression=expression,
 9120                update_options=update_options,
 9121            )
 9122        )
 9123
 9124    def _parse_merge(self) -> exp.Merge:
 9125        self._match(TokenType.INTO)
 9126        target = self._parse_table()
 9127
 9128        if target and self._match(TokenType.ALIAS, advance=False):
 9129            target.set("alias", self._parse_table_alias())
 9130
 9131        self._match(TokenType.USING)
 9132        using = self._parse_table()
 9133
 9134        return self.expression(
 9135            exp.Merge(
 9136                this=target,
 9137                using=using,
 9138                on=self._match(TokenType.ON) and self._parse_disjunction(),
 9139                using_cond=self._match(TokenType.USING) and self._parse_using_identifiers(),
 9140                whens=self._parse_when_matched(),
 9141                returning=self._parse_returning(),
 9142            )
 9143        )
 9144
 9145    def _parse_when_matched(self) -> exp.Whens:
 9146        whens = []
 9147
 9148        while self._match(TokenType.WHEN):
 9149            matched = not self._match(TokenType.NOT)
 9150            self._match_text_seq("MATCHED")
 9151            source = (
 9152                False
 9153                if self._match_text_seq("BY", "TARGET")
 9154                else self._match_text_seq("BY", "SOURCE")
 9155            )
 9156            condition = self._parse_disjunction() if self._match(TokenType.AND) else None
 9157
 9158            self._match(TokenType.THEN)
 9159
 9160            if self._match(TokenType.INSERT):
 9161                this = self._parse_star()
 9162                if this:
 9163                    then: exp.Expr | None = self.expression(exp.Insert(this=this))
 9164                else:
 9165                    then = self.expression(
 9166                        exp.Insert(
 9167                            this=exp.var("ROW")
 9168                            if self._match_text_seq("ROW")
 9169                            else self._parse_value(values=False),
 9170                            expression=self._match_text_seq("VALUES") and self._parse_value(),
 9171                            where=self._parse_where(),
 9172                        )
 9173                    )
 9174            elif self._match(TokenType.UPDATE):
 9175                expressions = self._parse_star()
 9176                if expressions:
 9177                    then = self.expression(exp.Update(expressions=expressions))
 9178                else:
 9179                    then = self.expression(
 9180                        exp.Update(
 9181                            expressions=self._match(TokenType.SET)
 9182                            and self._parse_csv(self._parse_equality),
 9183                            where=self._parse_where(),
 9184                        )
 9185                    )
 9186            elif self._match(TokenType.DELETE):
 9187                then = self.expression(exp.Var(this=self._prev.text))
 9188            else:
 9189                then = self._parse_var_from_options(self.CONFLICT_ACTIONS)
 9190
 9191            whens.append(
 9192                self.expression(
 9193                    exp.When(matched=matched, source=source, condition=condition, then=then)
 9194                )
 9195            )
 9196        return self.expression(exp.Whens(expressions=whens))
 9197
 9198    def _parse_show(self) -> exp.Expr | None:
 9199        parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE)
 9200        if parser:
 9201            return parser(self)
 9202        return self._parse_as_command(self._prev)
 9203
 9204    def _parse_set_item_assignment(self, kind: str | None = None) -> exp.Expr | None:
 9205        index = self._index
 9206
 9207        if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"):
 9208            return self._parse_set_transaction(global_=kind == "GLOBAL")
 9209
 9210        left = self._parse_primary() or self._parse_column()
 9211        assignment_delimiter = self._match_texts(self.SET_ASSIGNMENT_DELIMITERS)
 9212
 9213        if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter):
 9214            self._retreat(index)
 9215            return None
 9216
 9217        right = self._parse_statement() or self._parse_id_var()
 9218        if isinstance(right, (exp.Column, exp.Identifier)):
 9219            right = exp.var(right.name)
 9220
 9221        this = self.expression(exp.EQ(this=left, expression=right))
 9222        return self.expression(exp.SetItem(this=this, kind=kind))
 9223
 9224    def _parse_set_transaction(self, global_: bool = False) -> exp.Expr:
 9225        self._match_text_seq("TRANSACTION")
 9226        characteristics = self._parse_csv(
 9227            lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS)
 9228        )
 9229        return self.expression(
 9230            exp.SetItem(expressions=characteristics, kind="TRANSACTION", global_=global_)
 9231        )
 9232
 9233    def _parse_set_item(self) -> exp.Expr | None:
 9234        parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE)
 9235        return parser(self) if parser else self._parse_set_item_assignment(kind=None)
 9236
 9237    def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command:
 9238        index = self._index
 9239        set_ = self.expression(
 9240            exp.Set(expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag)
 9241        )
 9242
 9243        if self._curr:
 9244            self._retreat(index)
 9245            return self._parse_as_command(self._prev)
 9246
 9247        return set_
 9248
 9249    def _parse_var_from_options(
 9250        self, options: OPTIONS_TYPE, raise_unmatched: bool = True
 9251    ) -> exp.Var | None:
 9252        start = self._curr
 9253        if not start:
 9254            return None
 9255
 9256        option = start.text.upper()
 9257        continuations = options.get(option)
 9258
 9259        index = self._index
 9260        self._advance()
 9261        for keywords in continuations or []:
 9262            if isinstance(keywords, str):
 9263                keywords = (keywords,)
 9264
 9265            if self._match_text_seq(*keywords):
 9266                option = f"{option} {' '.join(keywords)}"
 9267                break
 9268        else:
 9269            if continuations or continuations is None:
 9270                if raise_unmatched:
 9271                    self.raise_error(f"Unknown option {option}")
 9272
 9273                self._retreat(index)
 9274                return None
 9275
 9276        return exp.var(option)
 9277
 9278    def _parse_as_command(self, start: Token) -> exp.Command:
 9279        while self._curr:
 9280            self._advance()
 9281        text = self._find_sql(start, self._prev)
 9282        size = len(start.text)
 9283        self._warn_unsupported()
 9284        return exp.Command(this=text[:size], expression=text[size:])
 9285
 9286    def _parse_dict_property(self, this: str) -> exp.DictProperty:
 9287        settings = []
 9288
 9289        self._match_l_paren()
 9290        kind = self._parse_id_var()
 9291
 9292        if self._match(TokenType.L_PAREN):
 9293            while True:
 9294                key = self._parse_id_var()
 9295                value = self._parse_function() or self._parse_primary_or_var()
 9296                if not key and value is None:
 9297                    break
 9298                settings.append(self.expression(exp.DictSubProperty(this=key, value=value)))
 9299            self._match(TokenType.R_PAREN)
 9300
 9301        self._match_r_paren()
 9302
 9303        return self.expression(
 9304            exp.DictProperty(this=this, kind=kind.this if kind else None, settings=settings)
 9305        )
 9306
 9307    def _parse_dict_range(self, this: str) -> exp.DictRange:
 9308        self._match_l_paren()
 9309        has_min = self._match_text_seq("MIN")
 9310        if has_min:
 9311            min = self._parse_var() or self._parse_primary()
 9312            self._match_text_seq("MAX")
 9313            max = self._parse_var() or self._parse_primary()
 9314        else:
 9315            max = self._parse_var() or self._parse_primary()
 9316            min = exp.Literal.number(0)
 9317        self._match_r_paren()
 9318        return self.expression(exp.DictRange(this=this, min=min, max=max))
 9319
 9320    def _parse_comprehension(self, this: exp.Expr | None) -> exp.Comprehension | None:
 9321        index = self._index
 9322        expression = self._parse_column()
 9323        position = self._match(TokenType.COMMA) and self._parse_column()
 9324
 9325        if not self._match(TokenType.IN):
 9326            self._retreat(index - 1)
 9327            return None
 9328        iterator = self._parse_column()
 9329        condition = self._parse_disjunction() if self._match_text_seq("IF") else None
 9330        return self.expression(
 9331            exp.Comprehension(
 9332                this=this,
 9333                expression=expression,
 9334                position=position,
 9335                iterator=iterator,
 9336                condition=condition,
 9337            )
 9338        )
 9339
 9340    def _parse_heredoc(self) -> exp.Heredoc | None:
 9341        if self._match(TokenType.HEREDOC_STRING):
 9342            return self.expression(exp.Heredoc(this=self._prev.text))
 9343
 9344        if not self._match_text_seq("$"):
 9345            return None
 9346
 9347        tags = ["$"]
 9348        tag_text = None
 9349
 9350        if self._is_connected():
 9351            self._advance()
 9352            tags.append(self._prev.text.upper())
 9353        else:
 9354            self.raise_error("No closing $ found")
 9355
 9356        if tags[-1] != "$":
 9357            if self._is_connected() and self._match_text_seq("$"):
 9358                tag_text = tags[-1]
 9359                tags.append("$")
 9360            else:
 9361                self.raise_error("No closing $ found")
 9362
 9363        heredoc_start = self._curr
 9364
 9365        while self._curr:
 9366            if self._match_text_seq(*tags, advance=False):
 9367                this = self._find_sql(heredoc_start, self._prev)
 9368                self._advance(len(tags))
 9369                return self.expression(exp.Heredoc(this=this, tag=tag_text))
 9370
 9371            self._advance()
 9372
 9373        self.raise_error(f"No closing {''.join(tags)} found")
 9374        return None
 9375
 9376    def _find_parser(self, parsers: dict[str, t.Callable], trie: dict) -> t.Callable | None:
 9377        if not self._curr:
 9378            return None
 9379
 9380        index = self._index
 9381        this = []
 9382        while True:
 9383            # The current token might be multiple words
 9384            curr = self._curr.text.upper()
 9385            key = curr.split(" ")
 9386            this.append(curr)
 9387
 9388            self._advance()
 9389            result, trie = in_trie(trie, key)
 9390            if result == TrieResult.FAILED:
 9391                break
 9392
 9393            if result == TrieResult.EXISTS:
 9394                subparser = parsers[" ".join(this)]
 9395                return subparser
 9396
 9397        self._retreat(index)
 9398        return None
 9399
 9400    def _match_l_paren(self, expression: exp.Expr | None = None) -> None:
 9401        if not self._match(TokenType.L_PAREN, expression=expression):
 9402            self.raise_error("Expecting (")
 9403
 9404    def _match_r_paren(self, expression: exp.Expr | None = None) -> None:
 9405        if not self._match(TokenType.R_PAREN, expression=expression):
 9406            self.raise_error("Expecting )")
 9407
 9408    def _replace_lambda(
 9409        self, node: exp.Expr | None, expressions: list[exp.Expr]
 9410    ) -> exp.Expr | None:
 9411        if not node:
 9412            return node
 9413
 9414        lambda_types = {e.name: e.args.get("to") or False for e in expressions}
 9415
 9416        for column in node.find_all(exp.Column):
 9417            typ = lambda_types.get(column.parts[0].name)
 9418            if typ is not None:
 9419                dot_or_id = column.to_dot() if column.table else column.this
 9420
 9421                if typ:
 9422                    dot_or_id = self.expression(exp.Cast(this=dot_or_id, to=typ))
 9423
 9424                parent = column.parent
 9425
 9426                while isinstance(parent, exp.Dot):
 9427                    if not isinstance(parent.parent, exp.Dot):
 9428                        parent.replace(dot_or_id)
 9429                        break
 9430                    parent = parent.parent
 9431                else:
 9432                    if column is node:
 9433                        node = dot_or_id
 9434                    else:
 9435                        column.replace(dot_or_id)
 9436        return node
 9437
 9438    def _parse_truncate_table(self) -> exp.TruncateTable | None | exp.Expr:
 9439        start = self._prev
 9440
 9441        # Not to be confused with TRUNCATE(number, decimals) function call
 9442        if self._match(TokenType.L_PAREN):
 9443            self._retreat(self._index - 2)
 9444            return self._parse_function()
 9445
 9446        # Clickhouse supports TRUNCATE DATABASE as well
 9447        is_database = self._match(TokenType.DATABASE)
 9448
 9449        self._match(TokenType.TABLE)
 9450
 9451        exists = self._parse_exists(not_=False)
 9452
 9453        expressions = self._parse_csv(
 9454            lambda: self._parse_table(schema=True, is_db_reference=is_database)
 9455        )
 9456
 9457        cluster = self._parse_on_property() if self._match(TokenType.ON) else None
 9458
 9459        if self._match_text_seq("RESTART", "IDENTITY"):
 9460            identity = "RESTART"
 9461        elif self._match_text_seq("CONTINUE", "IDENTITY"):
 9462            identity = "CONTINUE"
 9463        else:
 9464            identity = None
 9465
 9466        if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"):
 9467            option = self._prev.text
 9468        else:
 9469            option = None
 9470
 9471        partition = self._parse_partition()
 9472
 9473        # Fallback case
 9474        if self._curr:
 9475            return self._parse_as_command(start)
 9476
 9477        return self.expression(
 9478            exp.TruncateTable(
 9479                expressions=expressions,
 9480                is_database=is_database,
 9481                exists=exists,
 9482                cluster=cluster,
 9483                identity=identity,
 9484                option=option,
 9485                partition=partition,
 9486            )
 9487        )
 9488
 9489    def _parse_with_operator(self) -> exp.Expr | None:
 9490        this = self._parse_ordered(self._parse_opclass)
 9491
 9492        if not self._match(TokenType.WITH):
 9493            return this
 9494
 9495        op = self._parse_var(any_token=True, tokens=self.RESERVED_TOKENS)
 9496
 9497        return self.expression(exp.WithOperator(this=this, op=op))
 9498
 9499    def _parse_wrapped_options(self) -> list[exp.Expr]:
 9500        self._match(TokenType.EQ)
 9501        self._match(TokenType.L_PAREN)
 9502
 9503        opts: list[exp.Expr] = []
 9504        option: exp.Expr | list[exp.Expr] | None
 9505        while self._curr and not self._match(TokenType.R_PAREN):
 9506            if self._match_text_seq("FORMAT_NAME", "="):
 9507                # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL
 9508                option = self._parse_format_name()
 9509            else:
 9510                option = self._parse_property()
 9511
 9512            if option is None:
 9513                self.raise_error("Unable to parse option")
 9514                break
 9515
 9516            opts.extend(ensure_list(option))
 9517
 9518        return opts
 9519
 9520    def _parse_copy_parameters(self) -> list[exp.CopyParameter]:
 9521        sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None
 9522
 9523        options = []
 9524        while self._curr and not self._match(TokenType.R_PAREN, advance=False):
 9525            option = self._parse_var(any_token=True)
 9526            prev = self._prev.text.upper()
 9527
 9528            # Different dialects might separate options and values by white space, "=" and "AS"
 9529            self._match(TokenType.EQ)
 9530            self._match(TokenType.ALIAS)
 9531
 9532            param = self.expression(exp.CopyParameter(this=option))
 9533
 9534            if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match(
 9535                TokenType.L_PAREN, advance=False
 9536            ):
 9537                # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options
 9538                param.set("expressions", self._parse_wrapped_options())
 9539            elif prev == "FILE_FORMAT":
 9540                # T-SQL's external file format case
 9541                param.set("expression", self._parse_field())
 9542            elif (
 9543                prev == "FORMAT"
 9544                and self._prev.token_type == TokenType.ALIAS
 9545                and self._match_texts(("AVRO", "JSON"))
 9546            ):
 9547                param.set("this", exp.var(f"FORMAT AS {self._prev.text.upper()}"))
 9548                param.set("expression", self._parse_field())
 9549            else:
 9550                param.set("expression", self._parse_unquoted_field() or self._parse_bracket())
 9551
 9552            options.append(param)
 9553
 9554            if sep:
 9555                self._match(sep)
 9556
 9557        return options
 9558
 9559    def _parse_credentials(self) -> exp.Credentials | None:
 9560        expr = self.expression(exp.Credentials())
 9561
 9562        if self._match_text_seq("STORAGE_INTEGRATION", "="):
 9563            expr.set("storage", self._parse_field())
 9564        if self._match_text_seq("CREDENTIALS"):
 9565            # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string>
 9566            creds = (
 9567                self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field()
 9568            )
 9569            expr.set("credentials", creds)
 9570        if self._match_text_seq("ENCRYPTION"):
 9571            expr.set("encryption", self._parse_wrapped_options())
 9572        if self._match_text_seq("IAM_ROLE"):
 9573            expr.set(
 9574                "iam_role",
 9575                exp.var(self._prev.text) if self._match(TokenType.DEFAULT) else self._parse_field(),
 9576            )
 9577        if self._match_text_seq("REGION"):
 9578            expr.set("region", self._parse_field())
 9579
 9580        return expr
 9581
 9582    def _parse_file_location(self) -> exp.Expr | None:
 9583        return self._parse_field()
 9584
 9585    def _parse_copy(self) -> exp.Copy | exp.Command:
 9586        start = self._prev
 9587
 9588        self._match(TokenType.INTO)
 9589
 9590        this = (
 9591            self._parse_select(nested=True, parse_subquery_alias=False)
 9592            if self._match(TokenType.L_PAREN, advance=False)
 9593            else self._parse_table(schema=True)
 9594        )
 9595
 9596        kind = self._match(TokenType.FROM) or not self._match_text_seq("TO")
 9597
 9598        files = self._parse_csv(self._parse_file_location)
 9599        if self._match(TokenType.EQ, advance=False):
 9600            # Backtrack one token since we've consumed the lhs of a parameter assignment here.
 9601            # This can happen for Snowflake dialect. Instead, we'd like to parse the parameter
 9602            # list via `_parse_wrapped(..)` below.
 9603            self._advance(-1)
 9604            files = []
 9605
 9606        credentials = self._parse_credentials()
 9607
 9608        self._match_text_seq("WITH")
 9609
 9610        params = self._parse_wrapped(self._parse_copy_parameters, optional=True)
 9611
 9612        # Fallback case
 9613        if self._curr:
 9614            return self._parse_as_command(start)
 9615
 9616        return self.expression(
 9617            exp.Copy(this=this, kind=kind, credentials=credentials, files=files, params=params)
 9618        )
 9619
 9620    def _parse_normalize(self) -> exp.Normalize:
 9621        return self.expression(
 9622            exp.Normalize(
 9623                this=self._parse_bitwise(), form=self._match(TokenType.COMMA) and self._parse_var()
 9624            )
 9625        )
 9626
 9627    def _parse_ceil_floor(self, expr_type: type[TCeilFloor]) -> TCeilFloor:
 9628        args = self._parse_csv(lambda: self._parse_lambda())
 9629
 9630        this = seq_get(args, 0)
 9631        decimals = seq_get(args, 1)
 9632
 9633        return expr_type(
 9634            this=this,
 9635            decimals=decimals,
 9636            to=self._parse_var() if self._match_text_seq("TO") else None,
 9637        )
 9638
 9639    def _parse_star_ops(self) -> exp.Expr | None:
 9640        star_token = self._prev
 9641
 9642        if self._match_text_seq("COLUMNS", "(", advance=False):
 9643            this = self._parse_function()
 9644            if isinstance(this, exp.Columns):
 9645                this.set("unpack", True)
 9646            return this
 9647
 9648        return self.expression(
 9649            exp.Star(
 9650                except_=self._parse_star_op("EXCEPT", "EXCLUDE"),
 9651                replace=self._parse_star_op("REPLACE"),
 9652                rename=self._parse_star_op("RENAME"),
 9653            )
 9654        ).update_positions(star_token)
 9655
 9656    def _parse_grant_privilege(self) -> exp.GrantPrivilege | None:
 9657        privilege_parts = []
 9658
 9659        # Keep consuming consecutive keywords until comma (end of this privilege) or ON
 9660        # (end of privilege list) or L_PAREN (start of column list) are met
 9661        while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False):
 9662            privilege_parts.append(self._curr.text.upper())
 9663            self._advance()
 9664
 9665        this = exp.var(" ".join(privilege_parts))
 9666        expressions = (
 9667            self._parse_wrapped_csv(self._parse_column)
 9668            if self._match(TokenType.L_PAREN, advance=False)
 9669            else None
 9670        )
 9671
 9672        return self.expression(exp.GrantPrivilege(this=this, expressions=expressions))
 9673
 9674    def _parse_grant_principal(self) -> exp.GrantPrincipal | None:
 9675        kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper()
 9676        principal = self._parse_id_var()
 9677
 9678        if not principal:
 9679            return None
 9680
 9681        return self.expression(exp.GrantPrincipal(this=principal, kind=kind))
 9682
 9683    def _parse_grant_revoke_common(
 9684        self,
 9685    ) -> tuple[list | None, str | None, exp.Expr | None]:
 9686        privileges = self._parse_csv(self._parse_grant_privilege)
 9687
 9688        self._match(TokenType.ON)
 9689        kind = self._prev.text.upper() if self._match_set(self.CREATABLES) else None
 9690
 9691        # Attempt to parse the securable e.g. MySQL allows names
 9692        # such as "foo.*", "*.*" which are not easily parseable yet
 9693        securable = self._try_parse(self._parse_table_parts)
 9694
 9695        return privileges, kind, securable
 9696
 9697    def _parse_grant(self) -> exp.Grant | exp.Command:
 9698        start = self._prev
 9699
 9700        privileges, kind, securable = self._parse_grant_revoke_common()
 9701
 9702        if not securable or not self._match_text_seq("TO"):
 9703            return self._parse_as_command(start)
 9704
 9705        principals = self._parse_csv(self._parse_grant_principal)
 9706
 9707        grant_option = self._match_text_seq("WITH", "GRANT", "OPTION")
 9708
 9709        if self._curr:
 9710            return self._parse_as_command(start)
 9711
 9712        return self.expression(
 9713            exp.Grant(
 9714                privileges=privileges,
 9715                kind=kind,
 9716                securable=securable,
 9717                principals=principals,
 9718                grant_option=grant_option,
 9719            )
 9720        )
 9721
 9722    def _parse_revoke(self) -> exp.Revoke | exp.Command:
 9723        start = self._prev
 9724
 9725        grant_option = self._match_text_seq("GRANT", "OPTION", "FOR")
 9726
 9727        privileges, kind, securable = self._parse_grant_revoke_common()
 9728
 9729        if not securable or not self._match_text_seq("FROM"):
 9730            return self._parse_as_command(start)
 9731
 9732        principals = self._parse_csv(self._parse_grant_principal)
 9733
 9734        cascade = None
 9735        if self._match_texts(("CASCADE", "RESTRICT")):
 9736            cascade = self._prev.text.upper()
 9737
 9738        if self._curr:
 9739            return self._parse_as_command(start)
 9740
 9741        return self.expression(
 9742            exp.Revoke(
 9743                privileges=privileges,
 9744                kind=kind,
 9745                securable=securable,
 9746                principals=principals,
 9747                grant_option=grant_option,
 9748                cascade=cascade,
 9749            )
 9750        )
 9751
 9752    def _parse_overlay(self) -> exp.Overlay:
 9753        def _parse_overlay_arg(text: str) -> exp.Expr | None:
 9754            return (
 9755                self._parse_bitwise()
 9756                if self._match(TokenType.COMMA) or self._match_text_seq(text)
 9757                else None
 9758            )
 9759
 9760        return self.expression(
 9761            exp.Overlay(
 9762                this=self._parse_bitwise(),
 9763                expression=_parse_overlay_arg("PLACING"),
 9764                from_=_parse_overlay_arg("FROM"),
 9765                for_=_parse_overlay_arg("FOR"),
 9766            )
 9767        )
 9768
 9769    def _parse_format_name(self) -> exp.Property:
 9770        # Note: Although not specified in the docs, Snowflake does accept a string/identifier
 9771        # for FILE_FORMAT = <format_name>
 9772        return self.expression(
 9773            exp.Property(
 9774                this=exp.var("FORMAT_NAME"), value=self._parse_string() or self._parse_table_parts()
 9775            )
 9776        )
 9777
 9778    def _parse_max_min_by(self, expr_type: type[exp.AggFunc]) -> exp.AggFunc:
 9779        args: list[exp.Expr] = []
 9780
 9781        if self._match(TokenType.DISTINCT):
 9782            args.append(self.expression(exp.Distinct(expressions=[self._parse_lambda()])))
 9783            self._match(TokenType.COMMA)
 9784
 9785        args.extend(self._parse_function_args())
 9786
 9787        return self.expression(
 9788            expr_type(this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2))
 9789        )
 9790
 9791    def _identifier_expression(
 9792        self, token: Token | None = None, quoted: bool | None = None
 9793    ) -> exp.Identifier:
 9794        token = token or self._prev
 9795        return self.expression(exp.Identifier(this=token.text, quoted=quoted), token)
 9796
 9797    def _build_pipe_cte(
 9798        self,
 9799        query: exp.Query,
 9800        expressions: list[exp.Expr],
 9801        alias_cte: exp.TableAlias | None = None,
 9802    ) -> exp.Select:
 9803        new_cte: str | exp.TableAlias | None
 9804        if alias_cte:
 9805            new_cte = alias_cte
 9806        else:
 9807            self._pipe_cte_counter += 1
 9808            new_cte = f"__tmp{self._pipe_cte_counter}"
 9809
 9810        with_ = query.args.get("with_")
 9811        ctes = with_.pop() if with_ else None
 9812
 9813        new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False)
 9814        if ctes:
 9815            new_select.set("with_", ctes)
 9816
 9817        return new_select.with_(new_cte, as_=query, copy=False)
 9818
 9819    def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select:
 9820        select = self._parse_select(consume_pipe=False)
 9821        if not select:
 9822            return query
 9823
 9824        return self._build_pipe_cte(
 9825            query=query.select(*select.expressions, append=False), expressions=[exp.Star()]
 9826        )
 9827
 9828    def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select:
 9829        limit = self._parse_limit()
 9830        offset = self._parse_offset()
 9831        if limit:
 9832            curr_limit = query.args.get("limit", limit)
 9833            if curr_limit.expression.to_py() >= limit.expression.to_py():
 9834                query.limit(limit, copy=False)
 9835        if offset:
 9836            curr_offset = query.args.get("offset")
 9837            curr_offset = curr_offset.expression.to_py() if curr_offset else 0
 9838            query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False)
 9839
 9840        return query
 9841
 9842    def _parse_pipe_syntax_aggregate_fields(self) -> exp.Expr | None:
 9843        this = self._parse_disjunction()
 9844        if self._match_text_seq("GROUP", "AND", advance=False):
 9845            return this
 9846
 9847        this = self._parse_alias(this)
 9848
 9849        if self._match_set((TokenType.ASC, TokenType.DESC), advance=False):
 9850            return self._parse_ordered(lambda: this)
 9851
 9852        return this
 9853
 9854    def _parse_pipe_syntax_aggregate_group_order_by(
 9855        self, query: exp.Select, group_by_exists: bool = True
 9856    ) -> exp.Select:
 9857        expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields)
 9858        aggregates_or_groups, orders = [], []
 9859        for element in expr:
 9860            if isinstance(element, exp.Ordered):
 9861                this = element.this
 9862                if isinstance(this, exp.Alias):
 9863                    element.set("this", this.args["alias"])
 9864                orders.append(element)
 9865            else:
 9866                this = element
 9867            aggregates_or_groups.append(this)
 9868
 9869        if group_by_exists:
 9870            query.select(
 9871                *aggregates_or_groups, *query.expressions, append=False, copy=False
 9872            ).group_by(
 9873                *[projection.args.get("alias", projection) for projection in aggregates_or_groups],
 9874                copy=False,
 9875            )
 9876        else:
 9877            query.select(*aggregates_or_groups, append=False, copy=False)
 9878
 9879        if orders:
 9880            return query.order_by(*orders, append=False, copy=False)
 9881
 9882        return query
 9883
 9884    def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select:
 9885        self._match_text_seq("AGGREGATE")
 9886        query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False)
 9887
 9888        if self._match(TokenType.GROUP_BY) or (
 9889            self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY)
 9890        ):
 9891            query = self._parse_pipe_syntax_aggregate_group_order_by(query)
 9892
 9893        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
 9894
 9895    def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> exp.Query | None:
 9896        first_setop = self.parse_set_operation(this=query)
 9897        if not first_setop:
 9898            return None
 9899
 9900        def _parse_and_unwrap_query() -> exp.Expr | None:
 9901            expr = self._parse_paren()
 9902            return expr.assert_is(exp.Subquery).unnest() if expr else None
 9903
 9904        first_setop.this.pop()
 9905
 9906        setops = [
 9907            first_setop.expression.pop().assert_is(exp.Subquery).unnest(),
 9908            *self._parse_csv(_parse_and_unwrap_query),
 9909        ]
 9910
 9911        query = self._build_pipe_cte(query=query, expressions=[exp.Star()])
 9912        with_ = query.args.get("with_")
 9913        ctes = with_.pop() if with_ else None
 9914
 9915        if isinstance(first_setop, exp.Union):
 9916            query = query.union(*setops, copy=False, **first_setop.args)
 9917        elif isinstance(first_setop, exp.Except):
 9918            query = query.except_(*setops, copy=False, **first_setop.args)
 9919        else:
 9920            query = query.intersect(*setops, copy=False, **first_setop.args)
 9921
 9922        query.set("with_", ctes)
 9923
 9924        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
 9925
 9926    def _parse_pipe_syntax_join(self, query: exp.Query) -> exp.Query | None:
 9927        join = self._parse_join()
 9928        if not join:
 9929            return None
 9930
 9931        if isinstance(query, exp.Select):
 9932            return query.join(join, copy=False)
 9933
 9934        return query
 9935
 9936    def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select:
 9937        pivots = self._parse_pivots()
 9938        if not pivots:
 9939            return query
 9940
 9941        from_ = query.args.get("from_")
 9942        if from_:
 9943            from_.this.set("pivots", pivots)
 9944
 9945        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
 9946
 9947    def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select:
 9948        self._match_text_seq("EXTEND")
 9949        query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False)
 9950        return self._build_pipe_cte(query=query, expressions=[exp.Star()])
 9951
 9952    def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select:
 9953        sample = self._parse_table_sample()
 9954
 9955        with_ = query.args.get("with_")
 9956        if with_:
 9957            with_.expressions[-1].this.set("sample", sample)
 9958        else:
 9959            query.set("sample", sample)
 9960
 9961        return query
 9962
 9963    def _parse_pipe_syntax_query(self, query: exp.Query) -> exp.Query | None:
 9964        if isinstance(query, exp.Subquery):
 9965            query = exp.select("*").from_(query, copy=False)
 9966
 9967        if not query.args.get("from_"):
 9968            query = exp.select("*").from_(query.subquery(copy=False), copy=False)
 9969
 9970        while self._match(TokenType.PIPE_GT):
 9971            start_index = self._index
 9972            start_text = self._curr.text.upper()
 9973            parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(start_text)
 9974            if not parser:
 9975                # The set operators (UNION, etc) and the JOIN operator have a few common starting
 9976                # keywords, making it tricky to disambiguate them without lookahead. The approach
 9977                # here is to try and parse a set operation and if that fails, then try to parse a
 9978                # join operator. If that fails as well, then the operator is not supported.
 9979                parsed_query = self._parse_pipe_syntax_set_operator(query)
 9980                parsed_query = parsed_query or self._parse_pipe_syntax_join(query)
 9981                if not parsed_query:
 9982                    self._retreat(start_index)
 9983                    self.raise_error(f"Unsupported pipe syntax operator: '{start_text}'.")
 9984                    break
 9985                query = parsed_query
 9986            else:
 9987                query = parser(self, query)
 9988
 9989        return query
 9990
 9991    def _parse_declareitem(self) -> exp.DeclareItem | None:
 9992        self._match_texts(("VAR", "VARIABLE"))
 9993
 9994        vars = self._parse_csv(self._parse_id_var)
 9995        if not vars:
 9996            return None
 9997
 9998        self._match(TokenType.ALIAS)
 9999        kind = self._parse_schema() if self._match(TokenType.TABLE) else self._parse_types()
10000        default = (
10001            self._match(TokenType.DEFAULT) or self._match(TokenType.EQ)
10002        ) and self._parse_bitwise()
10003
10004        return self.expression(exp.DeclareItem(this=vars, kind=kind, default=default))
10005
10006    def _parse_declare(self) -> exp.Declare | exp.Command:
10007        start = self._prev
10008        replace = self._match_text_seq("OR", "REPLACE")
10009        expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem))
10010
10011        if not expressions or self._curr:
10012            return self._parse_as_command(start)
10013
10014        return self.expression(exp.Declare(expressions=expressions, replace=replace))
10015
10016    def build_cast(self, strict: bool, **kwargs) -> exp.Expr:
10017        exp_class = exp.Cast if strict else exp.TryCast
10018
10019        if exp_class == exp.TryCast:
10020            kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING
10021
10022        return self.expression(exp_class(**kwargs))
10023
10024    def _parse_json_value(self) -> exp.JSONValue:
10025        this = self._parse_bitwise()
10026        self._match(TokenType.COMMA)
10027        path = self._parse_bitwise()
10028
10029        returning = self._match(TokenType.RETURNING) and self._parse_type()
10030
10031        return self.expression(
10032            exp.JSONValue(
10033                this=this,
10034                path=self.dialect.to_json_path(path),
10035                returning=returning,
10036                on_condition=self._parse_on_condition(),
10037            )
10038        )
10039
10040    def _parse_group_concat(self) -> exp.Expr | None:
10041        def concat_exprs(node: exp.Expr | None, exprs: list[exp.Expr]) -> exp.Expr:
10042            if isinstance(node, exp.Distinct) and len(node.expressions) > 1:
10043                concat_exprs = [
10044                    self.expression(
10045                        exp.Concat(
10046                            expressions=node.expressions,
10047                            safe=True,
10048                            coalesce=self.dialect.CONCAT_COALESCE,
10049                        )
10050                    )
10051                ]
10052                node.set("expressions", concat_exprs)
10053                return node
10054            if len(exprs) == 1:
10055                return exprs[0]
10056            return self.expression(
10057                exp.Concat(expressions=args, safe=True, coalesce=self.dialect.CONCAT_COALESCE)
10058            )
10059
10060        args = self._parse_csv(self._parse_lambda)
10061
10062        if args:
10063            order = args[-1] if isinstance(args[-1], exp.Order) else None
10064
10065            if order:
10066                # Order By is the last (or only) expression in the list and has consumed the 'expr' before it,
10067                # remove 'expr' from exp.Order and add it back to args
10068                args[-1] = order.this
10069                order.set("this", concat_exprs(order.this, args))
10070
10071            this = order or concat_exprs(args[0], args)
10072        else:
10073            this = None
10074
10075        separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None
10076
10077        return self.expression(exp.GroupConcat(this=this, separator=separator))
10078
10079    def _parse_initcap(self) -> exp.Initcap:
10080        expr = exp.Initcap.from_arg_list(self._parse_function_args())
10081
10082        # attach dialect's default delimiters
10083        if expr.args.get("expression") is None:
10084            expr.set("expression", exp.Literal.string(self.dialect.INITCAP_DEFAULT_DELIMITER_CHARS))
10085
10086        return expr
10087
10088    def _parse_operator(self, this: exp.Expr | None) -> exp.Expr | None:
10089        while True:
10090            if not self._match(TokenType.L_PAREN):
10091                break
10092
10093            op = ""
10094            while self._curr and not self._match(TokenType.R_PAREN):
10095                op += self._curr.text
10096                self._advance()
10097
10098            comments = self._prev_comments
10099            this = self.expression(
10100                exp.Operator(this=this, operator=op, expression=self._parse_bitwise()),
10101                comments=comments,
10102            )
10103
10104            if not self._match(TokenType.OPERATOR):
10105                break
10106
10107        return this

logger = <Logger sqlglot (WARNING)>

OPTIONS_TYPE = dict[str, collections.abc.Sequence[typing.Union[collections.abc.Sequence[str], str]]]

TIME_ZONE_RE: re.Pattern[str] = re.compile(':.*?[a-zA-Z\\+\\-]')

def build_var_map( args: Sequence[typing.Any]) -> sqlglot.expressions.array.StarMap | sqlglot.expressions.array.VarMap: View Source

46def build_var_map(args: BuilderArgs) -> exp.StarMap | exp.VarMap:
47    if len(args) == 1 and args[0].is_star:
48        return exp.StarMap(this=args[0])
49
50    keys: list[ExpOrStr] = []
51    values: list[ExpOrStr] = []
52    for i in range(0, len(args), 2):
53        keys.append(args[i])
54        values.append(args[i + 1])
55
56    return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))

def build_like( args: Sequence[typing.Any]) -> sqlglot.expressions.core.Escape | sqlglot.expressions.core.Like: View Source

59def build_like(args: BuilderArgs) -> exp.Escape | exp.Like:
60    like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0))
61    return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like

def binary_range_parser( expr_type: type[sqlglot.expressions.core.Expr], reverse_args: bool = False) -> Callable[[Parser, sqlglot.expressions.core.Expr | None], sqlglot.expressions.core.Expr | None]: View Source

64def binary_range_parser(
65    expr_type: Type[exp.Expr], reverse_args: bool = False
66) -> t.Callable[[Parser, exp.Expr | None], exp.Expr | None]:
67    def _parse_binary_range(self: Parser, this: exp.Expr | None) -> exp.Expr | None:
68        expression = self._parse_bitwise()
69        if reverse_args:
70            this, expression = expression, this
71        return self._parse_escape(self.expression(expr_type(this=this, expression=expression)))
72
73    return _parse_binary_range

def build_logarithm( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.core.Func: View Source

76def build_logarithm(args: BuilderArgs, dialect: Dialect) -> exp.Func:
77    # Default argument order is base, expression
78    this = seq_get(args, 0)
79    expression = seq_get(args, 1)
80
81    if expression:
82        if not dialect.LOG_BASE_FIRST:
83            this, expression = expression, this
84        return exp.Log(this=this, expression=expression)
85
86    return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)

def build_hex( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.string.Hex | sqlglot.expressions.string.LowerHex: View Source

89def build_hex(args: BuilderArgs, dialect: Dialect) -> exp.Hex | exp.LowerHex:
90    arg = seq_get(args, 0)
91    return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg)

def build_lower( args: Sequence[typing.Any]) -> sqlglot.expressions.string.Lower | sqlglot.expressions.string.Hex: View Source

94def build_lower(args: BuilderArgs) -> exp.Lower | exp.Hex:
95    # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation
96    arg = seq_get(args, 0)
97    return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg)

def build_upper( args: Sequence[typing.Any]) -> sqlglot.expressions.string.Upper | sqlglot.expressions.string.Hex: View Source

100def build_upper(args: BuilderArgs) -> exp.Upper | exp.Hex:
101    # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation
102    arg = seq_get(args, 0)
103    return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg)

def build_extract_json_with_path( expr_type: type[~E]) -> Callable[[Sequence[Any], sqlglot.dialects.Dialect], ~E]: View Source

106def build_extract_json_with_path(
107    expr_type: Type[E],
108) -> t.Callable[[BuilderArgs, Dialect], E]:
109    def _builder(args: BuilderArgs, dialect: Dialect) -> E:
110        expression = expr_type(
111            this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1))
112        )
113        if len(args) > 2 and expr_type is exp.JSONExtract:
114            expression.set("expressions", args[2:])
115        if expr_type is exp.JSONExtractScalar:
116            expression.set("scalar_only", dialect.JSON_EXTRACT_SCALAR_SCALAR_ONLY)
117
118        return expression
119
120    return _builder

def build_mod(args: Sequence[typing.Any]) -> sqlglot.expressions.core.Mod: View Source

123def build_mod(args: BuilderArgs) -> exp.Mod:
124    this = seq_get(args, 0)
125    expression = seq_get(args, 1)
126
127    # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7
128    this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this
129    expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression
130
131    return exp.Mod(this=this, expression=expression)

def build_pad(args: Sequence[typing.Any], is_left: bool = True): View Source

134def build_pad(args: BuilderArgs, is_left: bool = True):
135    return exp.Pad(
136        this=seq_get(args, 0),
137        expression=seq_get(args, 1),
138        fill_pattern=seq_get(args, 2),
139        is_left=is_left,
140    )

def build_array_constructor( exp_class: type[~E], args: list[typing.Any], bracket_kind: sqlglot.tokenizer_core.TokenType, dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.core.Expr: View Source

143def build_array_constructor(
144    exp_class: Type[E], args: list[t.Any], bracket_kind: TokenType, dialect: Dialect
145) -> exp.Expr:
146    array_exp = exp_class(expressions=args)
147
148    if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS:
149        array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET)
150
151    return array_exp

def build_convert_timezone( args: Sequence[typing.Any], default_source_tz: str | None = None) -> sqlglot.expressions.temporal.ConvertTimezone | sqlglot.expressions.core.Anonymous: View Source

154def build_convert_timezone(
155    args: BuilderArgs, default_source_tz: str | None = None
156) -> exp.ConvertTimezone | exp.Anonymous:
157    if len(args) == 2:
158        source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None
159        return exp.ConvertTimezone(
160            source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1)
161        )
162
163    return exp.ConvertTimezone.from_arg_list(args)

def build_trim( args: Sequence[typing.Any], is_left: bool = True, reverse_args: bool = False) -> sqlglot.expressions.string.Trim: View Source

166def build_trim(args: BuilderArgs, is_left: bool = True, reverse_args: bool = False) -> exp.Trim:
167    this, expression = seq_get(args, 0), seq_get(args, 1)
168
169    if expression and reverse_args:
170        this, expression = expression, this
171
172    return exp.Trim(this=this, expression=expression, position="LEADING" if is_left else "TRAILING")

def build_coalesce( args: Sequence[typing.Any], is_nvl: bool | None = None, is_null: bool | None = None) -> sqlglot.expressions.functions.Coalesce: View Source

175def build_coalesce(
176    args: BuilderArgs, is_nvl: bool | None = None, is_null: bool | None = None
177) -> exp.Coalesce:
178    return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null)

def build_locate_strposition(args: Sequence[typing.Any]) -> sqlglot.expressions.string.StrPosition: View Source

181def build_locate_strposition(args: BuilderArgs) -> exp.StrPosition:
182    return exp.StrPosition(
183        this=seq_get(args, 1),
184        substr=seq_get(args, 0),
185        position=seq_get(args, 2),
186    )

def build_array_append( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayAppend: View Source

189def build_array_append(args: BuilderArgs, dialect: Dialect) -> exp.ArrayAppend:
190    """
191    Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.
192
193    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
194    Others (DuckDB, PostgreSQL) create a new single-element array instead.
195
196    Args:
197        args: Function arguments [array, element]
198        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
199
200    Returns:
201        ArrayAppend expression with appropriate null_propagation flag
202    """
203    return exp.ArrayAppend(
204        this=seq_get(args, 0),
205        expression=seq_get(args, 1),
206        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
207    )

Builds ArrayAppend with NULL propagation semantics based on the dialect configuration.

Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.

Arguments:

args: Function arguments [array, element]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayAppend expression with appropriate null_propagation flag

def build_array_prepend( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayPrepend: View Source

210def build_array_prepend(args: BuilderArgs, dialect: Dialect) -> exp.ArrayPrepend:
211    """
212    Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.
213
214    Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL.
215    Others (DuckDB, PostgreSQL) create a new single-element array instead.
216
217    Args:
218        args: Function arguments [array, element]
219        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
220
221    Returns:
222        ArrayPrepend expression with appropriate null_propagation flag
223    """
224    return exp.ArrayPrepend(
225        this=seq_get(args, 0),
226        expression=seq_get(args, 1),
227        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
228    )

Builds ArrayPrepend with NULL propagation semantics based on the dialect configuration.

Some dialects (Databricks, Spark, Snowflake) return NULL when the input array is NULL. Others (DuckDB, PostgreSQL) create a new single-element array instead.

Arguments:

args: Function arguments [array, element]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayPrepend expression with appropriate null_propagation flag

def build_array_concat( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayConcat: View Source

231def build_array_concat(args: BuilderArgs, dialect: Dialect) -> exp.ArrayConcat:
232    """
233    Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.
234
235    Some dialects (Redshift, Snowflake) return NULL when any input array is NULL.
236    Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.
237
238    Args:
239        args: Function arguments [array1, array2, ...] (variadic)
240        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
241
242    Returns:
243        ArrayConcat expression with appropriate null_propagation flag
244    """
245    return exp.ArrayConcat(
246        this=seq_get(args, 0),
247        expressions=args[1:],
248        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
249    )

Builds ArrayConcat with NULL propagation semantics based on the dialect configuration.

Some dialects (Redshift, Snowflake) return NULL when any input array is NULL. Others (DuckDB, PostgreSQL) skip NULL arrays and continue concatenation.

Arguments:

args: Function arguments [array1, array2, ...] (variadic)
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayConcat expression with appropriate null_propagation flag

def build_array_remove( args: Sequence[typing.Any], dialect: sqlglot.dialects.Dialect) -> sqlglot.expressions.array.ArrayRemove: View Source

252def build_array_remove(args: BuilderArgs, dialect: Dialect) -> exp.ArrayRemove:
253    """
254    Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.
255
256    Some dialects (Snowflake) return NULL when the removal value is NULL.
257    Others (DuckDB) may return empty array due to NULL comparison semantics.
258
259    Args:
260        args: Function arguments [array, value_to_remove]
261        dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from
262
263    Returns:
264        ArrayRemove expression with appropriate null_propagation flag
265    """
266    return exp.ArrayRemove(
267        this=seq_get(args, 0),
268        expression=seq_get(args, 1),
269        null_propagation=dialect.ARRAY_FUNCS_PROPAGATES_NULLS,
270    )

Builds ArrayRemove with NULL propagation semantics based on the dialect configuration.

Some dialects (Snowflake) return NULL when the removal value is NULL. Others (DuckDB) may return empty array due to NULL comparison semantics.

Arguments:

args: Function arguments [array, value_to_remove]
dialect: The dialect to read ARRAY_FUNCS_PROPAGATES_NULLS from

Returns:

ArrayRemove expression with appropriate null_propagation flag

SENTINEL_NONE: sqlglot.tokenizer_core.Token = <Token token_type: TokenType.SENTINEL, text: SENTINEL, line: 1, col: 1, start: 0, end: 0, comments: []>